From 34b3fa31f477e4ca9d06bffcbefdc4df56e7abfa Mon Sep 17 00:00:00 2001 From: Torsten Grote Date: Wed, 18 Mar 2026 15:49:00 -0300 Subject: [PATCH] [db] fix diff bug where zero-whitespace gets added more than once This bug only affects CJK languages and apart from DB growth, the symptom is that word filtering in app lists doesn't find affected apps because we look for a single whitespace between tokens --- .../org/fdroid/database/IndexV2DiffTest.kt | 83 ++++++++++++++----- .../src/main/java/org/fdroid/database/App.kt | 3 +- .../main/java/org/fdroid/database/AppDao.kt | 23 +++-- 3 files changed, 82 insertions(+), 27 deletions(-) diff --git a/libs/database/src/dbTest/java/org/fdroid/database/IndexV2DiffTest.kt b/libs/database/src/dbTest/java/org/fdroid/database/IndexV2DiffTest.kt index 695cd3c54..3b12f347b 100644 --- a/libs/database/src/dbTest/java/org/fdroid/database/IndexV2DiffTest.kt +++ b/libs/database/src/dbTest/java/org/fdroid/database/IndexV2DiffTest.kt @@ -14,6 +14,7 @@ import org.fdroid.test.TestDataMaxV2.PACKAGE_NAME_3 import org.fdroid.test.TestDataMaxV2.app3 import org.fdroid.test.TestDataMidV2 import org.fdroid.test.TestDataMinV2 +import org.fdroid.test.TestDataMinV2.PACKAGE_NAME import org.fdroid.test.TestUtils.getRes import org.junit.Ignore import org.junit.Test @@ -31,52 +32,58 @@ internal class IndexV2DiffTest : DbTest() { } @Test - fun testEmptyToMin() = + fun testEmptyToMin() { testDiff( startPath = "index-empty-v2.json", diffPath = "diff-empty-min/23.json", endIndex = TestDataMinV2.index, ) + } @Test - fun testEmptyToMid() = + fun testEmptyToMid() { testDiff( startPath = "index-empty-v2.json", diffPath = "diff-empty-mid/23.json", endIndex = TestDataMidV2.index, ) + } @Test - fun testEmptyToMax() = + fun testEmptyToMax() { testDiff( startPath = "index-empty-v2.json", diffPath = "diff-empty-max/23.json", endIndex = TestDataMaxV2.index, ) + } @Test - fun testMinToMid() = + fun testMinToMid() { testDiff( startPath = "index-min-v2.json", diffPath = "diff-empty-mid/42.json", endIndex = TestDataMidV2.index, ) + } @Test - fun testMinToMax() = + fun testMinToMax() { testDiff( startPath = "index-min-v2.json", diffPath = "diff-empty-max/42.json", endIndex = TestDataMaxV2.index, ) + } @Test - fun testMidToMax() = + fun testMidToMax() { testDiff( startPath = "index-mid-v2.json", diffPath = "diff-empty-max/1337.json", endIndex = TestDataMaxV2.index, ) + } @Test fun testMinRemoveApp() { @@ -450,20 +457,55 @@ internal class IndexV2DiffTest : DbTest() { } """ .trimIndent() - testJsonDiff( - startPath = "index-min-v2.json", - diff = diffJson, - endIndex = + val metadata = + TestDataMinV2.index.packages[PACKAGE_NAME]!! + .metadata + .copy( + // zero whitespaces (to separate tokens) will be added in testJsonDiff() + name = mapOf("zh-CN" to "自由软件仓库"), + summary = mapOf("ja" to "这个仓库中的"), + description = mapOf("ko-KR" to "切始终是从"), + ) + val endIndex = + TestDataMinV2.index.copy( + packages = TestDataMinV2.index.packages.mapValues { it.value.copy(metadata = metadata) } + ) + val repoId = testJsonDiff(startPath = "index-min-v2.json", diff = diffJson, endIndex = endIndex) + + // now apply another diff to ensure we don't add zero whitespace multiple times + val newDiffJson = + """ + { + "packages": { + "org.fdroid.min1": { + "metadata": { + "name": { "en-US": "foo bar" }, + "summary": { "en-US": "foo bar" }, + "description": { "en-US": "foo bar" } + } + } + } + } + """ + .trimIndent() + // apply diff stream to the DB + val streamReceiver = DbV2DiffStreamReceiver(db, repoId) { true } + val streamProcessor = IndexV2DiffStreamProcessor(streamReceiver) + val diffStream = ByteArrayInputStream(newDiffJson.toByteArray()) + db.runInTransaction { streamProcessor.process(42, diffStream) {} } + // assert that changed DB data is equal to given endIndex + assertDbEquals( + repoId = repoId, + index = TestDataMinV2.index.copy( packages = TestDataMinV2.index.packages.mapValues { it.value.copy( metadata = - it.value.metadata.copy( - // zero whitespaces (to separate tokens) will be added in testJsonDiff() - name = mapOf("zh-CN" to "自由软件仓库"), - summary = mapOf("ja" to "这个仓库中的"), - description = mapOf("ko-KR" to "切始终是从"), + metadata.copy( + name = mapOf("en-US" to "foo bar", "zh-CN" to "自由软件仓库"), + summary = mapOf("en-US" to "foo bar", "ja" to "这个仓库中的"), + description = mapOf("en-US" to "foo bar", "ko-KR" to "切始终是从"), ) ) } @@ -471,15 +513,15 @@ internal class IndexV2DiffTest : DbTest() { ) } - private fun testJsonDiff(startPath: String, diff: String, endIndex: IndexV2) { - testDiff(startPath, ByteArrayInputStream(diff.toByteArray()), endIndex) + private fun testJsonDiff(startPath: String, diff: String, endIndex: IndexV2): Long { + return testDiff(startPath, ByteArrayInputStream(diff.toByteArray()), endIndex) } - private fun testDiff(startPath: String, diffPath: String, endIndex: IndexV2) { - testDiff(startPath, getRes(diffPath), endIndex) + private fun testDiff(startPath: String, diffPath: String, endIndex: IndexV2): Long { + return testDiff(startPath, getRes(diffPath), endIndex) } - private fun testDiff(startPath: String, diffStream: InputStream, endIndex: IndexV2) { + private fun testDiff(startPath: String, diffStream: InputStream, endIndex: IndexV2): Long { // stream start index into the DB val repoId = streamIndexV2IntoDb(startPath) @@ -489,5 +531,6 @@ internal class IndexV2DiffTest : DbTest() { db.runInTransaction { streamProcessor.process(42, diffStream) {} } // assert that changed DB data is equal to given endIndex assertDbEquals(repoId, endIndex) + return repoId } } diff --git a/libs/database/src/main/java/org/fdroid/database/App.kt b/libs/database/src/main/java/org/fdroid/database/App.kt index c0f774908..ec88c2873 100644 --- a/libs/database/src/main/java/org/fdroid/database/App.kt +++ b/libs/database/src/main/java/org/fdroid/database/App.kt @@ -140,9 +140,10 @@ internal fun MetadataV2.toAppMetadata( * the sqlite tokenizers available to us either handle those languages or do diacritics removals. * Since we can't remove diacritics here ourselves, we help the tokenizer for CJK languages instead. */ -internal fun LocalizedTextV2?.zero(): LocalizedTextV2? { +internal fun LocalizedTextV2?.zero(localeAllowList: Set? = null): LocalizedTextV2? { if (this == null) return null return toMutableMap().mapValues { (locale, text) -> + if (localeAllowList != null && locale !in localeAllowList) return@mapValues text if (locale.startsWith("zh") || locale.startsWith("ja") || locale.startsWith("ko")) { StringBuilder() .apply { diff --git a/libs/database/src/main/java/org/fdroid/database/AppDao.kt b/libs/database/src/main/java/org/fdroid/database/AppDao.kt index 30364c513..3c49756aa 100644 --- a/libs/database/src/main/java/org/fdroid/database/AppDao.kt +++ b/libs/database/src/main/java/org/fdroid/database/AppDao.kt @@ -28,6 +28,7 @@ import kotlinx.serialization.SerializationException import kotlinx.serialization.json.JsonNull import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.decodeFromJsonElement +import kotlinx.serialization.json.jsonObject import org.fdroid.LocaleChooser.getBestLocale import org.fdroid.database.AppListSortOrder.LAST_UPDATED import org.fdroid.database.AppListSortOrder.NAME @@ -280,16 +281,26 @@ internal interface AppDaoInt : AppDao { } // diff metadata val diffedApp = applyDiff(metadata, jsonObject) - val containsName = jsonObject.containsKey("name") - val containsSummary = jsonObject.containsKey("summary") - val containsDescription = jsonObject.containsKey("description") + val containsName = jsonObject["name"] is JsonObject + val containsSummary = jsonObject["summary"] is JsonObject + val containsDescription = jsonObject["description"] is JsonObject val updatedApp = if (containsName || containsSummary || containsDescription) { + // applies zero whitespace hack (needed for Fts search) for new/changed locales only + // also updates localizedName and localizedSummary cache diffedApp.copy( - name = if (containsName) diffedApp.name.zero() else diffedApp.name, - summary = if (containsSummary) diffedApp.summary.zero() else diffedApp.summary, + name = + if (containsName) { + diffedApp.name.zero(jsonObject["name"]?.jsonObject?.keys) + } else diffedApp.name, + summary = + if (containsSummary) { + diffedApp.summary.zero(jsonObject["summary"]?.jsonObject?.keys) + } else diffedApp.summary, description = - if (containsDescription) diffedApp.description.zero() else diffedApp.description, + if (containsDescription) { + diffedApp.description.zero(jsonObject["description"]?.jsonObject?.keys) + } else diffedApp.description, localizedName = diffedApp.name.getBestLocale(locales), localizedSummary = diffedApp.summary.getBestLocale(locales), )