mirror of
https://github.com/f-droid/fdroidclient.git
synced 2026-04-17 13:30:33 -04:00
[db] fix diff bug where zero-whitespace gets added more than once
This bug only affects CJK languages and apart from DB growth, the symptom is that word filtering in app lists doesn't find affected apps because we look for a single whitespace between tokens
This commit is contained in:
@@ -14,6 +14,7 @@ import org.fdroid.test.TestDataMaxV2.PACKAGE_NAME_3
|
||||
import org.fdroid.test.TestDataMaxV2.app3
|
||||
import org.fdroid.test.TestDataMidV2
|
||||
import org.fdroid.test.TestDataMinV2
|
||||
import org.fdroid.test.TestDataMinV2.PACKAGE_NAME
|
||||
import org.fdroid.test.TestUtils.getRes
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
@@ -31,52 +32,58 @@ internal class IndexV2DiffTest : DbTest() {
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testEmptyToMin() =
|
||||
fun testEmptyToMin() {
|
||||
testDiff(
|
||||
startPath = "index-empty-v2.json",
|
||||
diffPath = "diff-empty-min/23.json",
|
||||
endIndex = TestDataMinV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testEmptyToMid() =
|
||||
fun testEmptyToMid() {
|
||||
testDiff(
|
||||
startPath = "index-empty-v2.json",
|
||||
diffPath = "diff-empty-mid/23.json",
|
||||
endIndex = TestDataMidV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testEmptyToMax() =
|
||||
fun testEmptyToMax() {
|
||||
testDiff(
|
||||
startPath = "index-empty-v2.json",
|
||||
diffPath = "diff-empty-max/23.json",
|
||||
endIndex = TestDataMaxV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testMinToMid() =
|
||||
fun testMinToMid() {
|
||||
testDiff(
|
||||
startPath = "index-min-v2.json",
|
||||
diffPath = "diff-empty-mid/42.json",
|
||||
endIndex = TestDataMidV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testMinToMax() =
|
||||
fun testMinToMax() {
|
||||
testDiff(
|
||||
startPath = "index-min-v2.json",
|
||||
diffPath = "diff-empty-max/42.json",
|
||||
endIndex = TestDataMaxV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testMidToMax() =
|
||||
fun testMidToMax() {
|
||||
testDiff(
|
||||
startPath = "index-mid-v2.json",
|
||||
diffPath = "diff-empty-max/1337.json",
|
||||
endIndex = TestDataMaxV2.index,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testMinRemoveApp() {
|
||||
@@ -450,20 +457,55 @@ internal class IndexV2DiffTest : DbTest() {
|
||||
}
|
||||
"""
|
||||
.trimIndent()
|
||||
testJsonDiff(
|
||||
startPath = "index-min-v2.json",
|
||||
diff = diffJson,
|
||||
endIndex =
|
||||
val metadata =
|
||||
TestDataMinV2.index.packages[PACKAGE_NAME]!!
|
||||
.metadata
|
||||
.copy(
|
||||
// zero whitespaces (to separate tokens) will be added in testJsonDiff()
|
||||
name = mapOf("zh-CN" to "自由软件仓库"),
|
||||
summary = mapOf("ja" to "这个仓库中的"),
|
||||
description = mapOf("ko-KR" to "切始终是从"),
|
||||
)
|
||||
val endIndex =
|
||||
TestDataMinV2.index.copy(
|
||||
packages = TestDataMinV2.index.packages.mapValues { it.value.copy(metadata = metadata) }
|
||||
)
|
||||
val repoId = testJsonDiff(startPath = "index-min-v2.json", diff = diffJson, endIndex = endIndex)
|
||||
|
||||
// now apply another diff to ensure we don't add zero whitespace multiple times
|
||||
val newDiffJson =
|
||||
"""
|
||||
{
|
||||
"packages": {
|
||||
"org.fdroid.min1": {
|
||||
"metadata": {
|
||||
"name": { "en-US": "foo bar" },
|
||||
"summary": { "en-US": "foo bar" },
|
||||
"description": { "en-US": "foo bar" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
.trimIndent()
|
||||
// apply diff stream to the DB
|
||||
val streamReceiver = DbV2DiffStreamReceiver(db, repoId) { true }
|
||||
val streamProcessor = IndexV2DiffStreamProcessor(streamReceiver)
|
||||
val diffStream = ByteArrayInputStream(newDiffJson.toByteArray())
|
||||
db.runInTransaction { streamProcessor.process(42, diffStream) {} }
|
||||
// assert that changed DB data is equal to given endIndex
|
||||
assertDbEquals(
|
||||
repoId = repoId,
|
||||
index =
|
||||
TestDataMinV2.index.copy(
|
||||
packages =
|
||||
TestDataMinV2.index.packages.mapValues {
|
||||
it.value.copy(
|
||||
metadata =
|
||||
it.value.metadata.copy(
|
||||
// zero whitespaces (to separate tokens) will be added in testJsonDiff()
|
||||
name = mapOf("zh-CN" to "自由软件仓库"),
|
||||
summary = mapOf("ja" to "这个仓库中的"),
|
||||
description = mapOf("ko-KR" to "切始终是从"),
|
||||
metadata.copy(
|
||||
name = mapOf("en-US" to "foo bar", "zh-CN" to "自由软件仓库"),
|
||||
summary = mapOf("en-US" to "foo bar", "ja" to "这个仓库中的"),
|
||||
description = mapOf("en-US" to "foo bar", "ko-KR" to "切始终是从"),
|
||||
)
|
||||
)
|
||||
}
|
||||
@@ -471,15 +513,15 @@ internal class IndexV2DiffTest : DbTest() {
|
||||
)
|
||||
}
|
||||
|
||||
private fun testJsonDiff(startPath: String, diff: String, endIndex: IndexV2) {
|
||||
testDiff(startPath, ByteArrayInputStream(diff.toByteArray()), endIndex)
|
||||
private fun testJsonDiff(startPath: String, diff: String, endIndex: IndexV2): Long {
|
||||
return testDiff(startPath, ByteArrayInputStream(diff.toByteArray()), endIndex)
|
||||
}
|
||||
|
||||
private fun testDiff(startPath: String, diffPath: String, endIndex: IndexV2) {
|
||||
testDiff(startPath, getRes(diffPath), endIndex)
|
||||
private fun testDiff(startPath: String, diffPath: String, endIndex: IndexV2): Long {
|
||||
return testDiff(startPath, getRes(diffPath), endIndex)
|
||||
}
|
||||
|
||||
private fun testDiff(startPath: String, diffStream: InputStream, endIndex: IndexV2) {
|
||||
private fun testDiff(startPath: String, diffStream: InputStream, endIndex: IndexV2): Long {
|
||||
// stream start index into the DB
|
||||
val repoId = streamIndexV2IntoDb(startPath)
|
||||
|
||||
@@ -489,5 +531,6 @@ internal class IndexV2DiffTest : DbTest() {
|
||||
db.runInTransaction { streamProcessor.process(42, diffStream) {} }
|
||||
// assert that changed DB data is equal to given endIndex
|
||||
assertDbEquals(repoId, endIndex)
|
||||
return repoId
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,9 +140,10 @@ internal fun MetadataV2.toAppMetadata(
|
||||
* the sqlite tokenizers available to us either handle those languages or do diacritics removals.
|
||||
* Since we can't remove diacritics here ourselves, we help the tokenizer for CJK languages instead.
|
||||
*/
|
||||
internal fun LocalizedTextV2?.zero(): LocalizedTextV2? {
|
||||
internal fun LocalizedTextV2?.zero(localeAllowList: Set<String>? = null): LocalizedTextV2? {
|
||||
if (this == null) return null
|
||||
return toMutableMap().mapValues { (locale, text) ->
|
||||
if (localeAllowList != null && locale !in localeAllowList) return@mapValues text
|
||||
if (locale.startsWith("zh") || locale.startsWith("ja") || locale.startsWith("ko")) {
|
||||
StringBuilder()
|
||||
.apply {
|
||||
|
||||
@@ -28,6 +28,7 @@ import kotlinx.serialization.SerializationException
|
||||
import kotlinx.serialization.json.JsonNull
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.decodeFromJsonElement
|
||||
import kotlinx.serialization.json.jsonObject
|
||||
import org.fdroid.LocaleChooser.getBestLocale
|
||||
import org.fdroid.database.AppListSortOrder.LAST_UPDATED
|
||||
import org.fdroid.database.AppListSortOrder.NAME
|
||||
@@ -280,16 +281,26 @@ internal interface AppDaoInt : AppDao {
|
||||
}
|
||||
// diff metadata
|
||||
val diffedApp = applyDiff(metadata, jsonObject)
|
||||
val containsName = jsonObject.containsKey("name")
|
||||
val containsSummary = jsonObject.containsKey("summary")
|
||||
val containsDescription = jsonObject.containsKey("description")
|
||||
val containsName = jsonObject["name"] is JsonObject
|
||||
val containsSummary = jsonObject["summary"] is JsonObject
|
||||
val containsDescription = jsonObject["description"] is JsonObject
|
||||
val updatedApp =
|
||||
if (containsName || containsSummary || containsDescription) {
|
||||
// applies zero whitespace hack (needed for Fts search) for new/changed locales only
|
||||
// also updates localizedName and localizedSummary cache
|
||||
diffedApp.copy(
|
||||
name = if (containsName) diffedApp.name.zero() else diffedApp.name,
|
||||
summary = if (containsSummary) diffedApp.summary.zero() else diffedApp.summary,
|
||||
name =
|
||||
if (containsName) {
|
||||
diffedApp.name.zero(jsonObject["name"]?.jsonObject?.keys)
|
||||
} else diffedApp.name,
|
||||
summary =
|
||||
if (containsSummary) {
|
||||
diffedApp.summary.zero(jsonObject["summary"]?.jsonObject?.keys)
|
||||
} else diffedApp.summary,
|
||||
description =
|
||||
if (containsDescription) diffedApp.description.zero() else diffedApp.description,
|
||||
if (containsDescription) {
|
||||
diffedApp.description.zero(jsonObject["description"]?.jsonObject?.keys)
|
||||
} else diffedApp.description,
|
||||
localizedName = diffedApp.name.getBestLocale(locales),
|
||||
localizedSummary = diffedApp.summary.getBestLocale(locales),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user