From 42314c8377463087ea1f514e1e079b2afd833cb7 Mon Sep 17 00:00:00 2001 From: Torsten Grote Date: Tue, 17 Mar 2026 11:12:16 -0300 Subject: [PATCH] [db] Add SearchQueryRewriter and use it in AppSearchItemsTest This code comes from SearchManager, but making it available in the DB library makes sense since the queries are specific to the DB implementation such as zero-whitespace hack. --- .../org/fdroid/database/AppSearchItemsTest.kt | 54 ++++++++--------- .../fdroid/database/SearchQueryRewriter.kt | 58 +++++++++++++++++++ .../database/SearchQueryRewriterTest.kt | 54 +++++++++++++++++ 3 files changed, 139 insertions(+), 27 deletions(-) create mode 100644 libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt create mode 100644 libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt diff --git a/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt b/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt index fef8ec8b7..d6d4be0b2 100644 --- a/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt +++ b/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt @@ -4,6 +4,7 @@ import androidx.test.ext.junit.runners.AndroidJUnit4 import kotlin.test.assertEquals import kotlin.test.assertTrue import kotlinx.coroutines.runBlocking +import org.fdroid.database.SearchQueryRewriter.rewriteQuery import org.fdroid.index.v2.MetadataV2 import org.fdroid.test.TestRepoUtils.getRandomRepo import org.junit.Test @@ -17,7 +18,7 @@ internal class AppSearchItemsTest : DbTest() { fun findsByName() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "duckduckgo* browser*", + query = "duckduckgo browser", packageName = "com.duckduckgo.mobile.android", ) assertSearchTopResult(query = "F-Droid*", packageName = "org.fdroid.fdroid") @@ -26,14 +27,14 @@ internal class AppSearchItemsTest : DbTest() { @Test fun findsBySummary() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "alternative* frontend*", packageName = "com.github.libretube") + assertSearchTopResult(query = "alternative frontend", packageName = "com.github.libretube") } @Test fun findsByDescription() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "privacy* essentials*", + query = "privacy essentials", packageName = "com.duckduckgo.mobile.android", ) } @@ -41,27 +42,26 @@ internal class AppSearchItemsTest : DbTest() { @Test fun findsByAuthor() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "rahul* patel*", packageName = "com.aurora.store") - assertSearchTopResult(query = "bitfire*", packageName = "at.bitfire.davdroid") - assertSearchTopResult(query = "艾*", packageName = "com.aistra.hail") + assertSearchTopResult(query = "rahul patel", packageName = "com.aurora.store") + assertSearchTopResult(query = "bitfire", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "艾星", packageName = "com.aistra.hail") } @Test fun findsByPackageName() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "org* fdroid*", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "org.fdroid.fdroid", packageName = "org.fdroid.fdroid") } @Test fun findsCamelCase() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - // this is the actual query used by the search manager when searching for "game pad" - query = "game* pad* OR gamepad* OR \"game* pad*\"", + query = "game pad", packageName = "io.github.kitswas.virtualgamepadmobile", ) assertSearchTopResult( - query = "cal* dav* OR caldav* OR \"cal* dav*\"", + query = "cal dav", packageName = "at.bitfire.davdroid", ) } @@ -70,52 +70,52 @@ internal class AppSearchItemsTest : DbTest() { fun findsGermanText() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "privatsphäre* vereinfacht*", + query = "privatsphäre vereinfacht", packageName = "com.duckduckgo.mobile.android", ) - assertSearchTopResult(query = "installierbar*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "Synchronisierungs-App*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "installierbar", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "Synchronisierungs-App", packageName = "at.bitfire.davdroid") } @Test fun findsPortugueseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "loja* privacidade*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "sincronização*", packageName = "at.bitfire.davdroid") - assertSearchTopResult(query = "catálogo* instalável*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "catalogo* instalavel*", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "loja privacidade", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "sincronização", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "catálogo instalável", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "catalogo instalavel", packageName = "org.fdroid.fdroid") } @Test fun findsChineseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "地* 图*", packageName = "app.organicmaps") - assertSearchResultsContain(query = "隐* 私*", packageName = "com.duckduckgo.mobile.android") - assertSearchResultsContain(query = "隐* 私*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "阅* 读*", packageName = "com.capyreader.app") - assertSearchResultsContain(query = "同* 步*", packageName = "com.nextcloud.android.beta") - assertSearchResultsContain(query = "同* 步*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "地图", packageName = "app.organicmaps") + assertSearchResultsContain(query = "隐私", packageName = "com.duckduckgo.mobile.android") + assertSearchResultsContain(query = "隐私", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "阅读", packageName = "com.capyreader.app") + assertSearchResultsContain(query = "同步", packageName = "com.nextcloud.android.beta") + assertSearchResultsContain(query = "同步", packageName = "at.bitfire.davdroid") } @Test fun findsJapaneseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "同* 期*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "同期", packageName = "at.bitfire.davdroid") } @Test fun findsKoreanText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "동기* 클라이*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "동기 클라이", packageName = "at.bitfire.davdroid") } private suspend fun assertSearchTopResult(query: String, packageName: String) { - val items = appDao.getAppSearchItems(query) + val items = appDao.getAppSearchItems(rewriteQuery(query)) assertEquals(packageName, items.firstOrNull()?.packageName) } private suspend fun assertSearchResultsContain(query: String, packageName: String) { - val items = appDao.getAppSearchItems(query) + val items = appDao.getAppSearchItems(rewriteQuery(query)) assertTrue( items.any { it.packageName == packageName }, "Query '$query' did not find $packageName, but ${items.map { it.packageName }}", diff --git a/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt b/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt new file mode 100644 index 000000000..e9d10bf39 --- /dev/null +++ b/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt @@ -0,0 +1,58 @@ +package org.fdroid.database + +/** + * Rewrites search queries so that best results with sqlite Fts4 are obtained. Uses prefix searches + * and camel case searches for latin chars, and separate character searches for CJK chars. Also + * employing our zero whitespace hack. + * + * Attention: Quotes should be removed from the query before passing it in. + * + * see https://www.sqlite.org/fts3.html#full_text_index_queries + */ +public object SearchQueryRewriter { + + public fun rewriteQuery(query: String): String { + val splits = query.split(' ').filter { it.isNotBlank() } + var hasAnyCjk = false + return splits + .joinToString(" ") { word -> + var isCjk = false + // go through word and separate CJK chars (if needed) + val newString = + word.toList().joinToString("") { + if (Character.isIdeographic(it.code)) { + isCjk = true + hasAnyCjk = true + "$it* " + } else "$it" + } + // add * to enable prefix matches + if (isCjk) newString.trimEnd() else "$newString*" + } + .let { firstPassQuery -> + // if we had more than one word, make a more complex query + if (splits.size > 1 && !hasAnyCjk) { + "$firstPassQuery " + // search* term* (implicit AND and prefix search) + "OR ${splits.joinToString("")}* " + // camel case prefix + "OR \"${splits.joinToString("* ")}*\"" // phrase query + } else if (hasAnyCjk) { + val zeroSplits = + splits.map { word -> + if (word.any { Character.isIdeographic(it.code) }) { + // separate CJK chars with zero-width + word.toList().joinToString("\u200B") + } else { + word + } + } + // query using zero-width concatenation needs to be quoted as a phrase query + val zeroQuery = zeroSplits.joinToString(" ") { "\"$it*\"" } + "$firstPassQuery " + // search* term* (implicit AND and prefix search) + "OR $zeroQuery " + // zero whitespace concat as in DB + "OR ${splits.joinToString("* ")}*" // verbatim prefix for authorName searches + } else { + firstPassQuery + } + } + } +} diff --git a/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt b/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt new file mode 100644 index 000000000..cb8e4fcd5 --- /dev/null +++ b/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt @@ -0,0 +1,54 @@ +package org.fdroid.database + +import kotlin.test.assertEquals +import org.junit.Test + +internal class SearchQueryRewriterTest { + + @Test + fun rewritesBlankQueryToBlank() { + assertEquals("", SearchQueryRewriter.rewriteQuery(" ")) + } + + @Test + fun rewritesSingleLatinWordToPrefixQuery() { + assertEquals("foo*", SearchQueryRewriter.rewriteQuery("foo")) + } + + @Test + fun rewritesMultipleLatinWordsWithCamelCaseAndPhrase() { + assertEquals( + "foo* bar* OR foobar* OR \"foo* bar*\"", + SearchQueryRewriter.rewriteQuery("foo bar"), + ) + } + + @Test + fun rewritesMultipleLatinWordsAndIgnoresExtraWhitespace() { + assertEquals( + "foo* bar* OR foobar* OR \"foo* bar*\"", + SearchQueryRewriter.rewriteQuery(" foo bar "), + ) + } + + @Test + fun rewritesSingleCjkWordWithZeroWidthAndVerbatimAlternatives() { + assertEquals("測* 試* OR \"測\u200B試*\" OR 測試*", SearchQueryRewriter.rewriteQuery("測試")) + } + + @Test + fun rewritesMultiWordCjkQuery() { + assertEquals( + "測* 試* 艾* 星* OR \"測\u200B試*\" \"艾\u200B星*\" OR 測試* 艾星*", + SearchQueryRewriter.rewriteQuery("測試 艾星"), + ) + } + + @Test + fun rewritesMixedLatinAndCjkWordsUsingCjkBranch() { + assertEquals( + "foo* 測* 試* OR \"foo*\" \"測\u200B試*\" OR foo* 測試*", + SearchQueryRewriter.rewriteQuery("foo 測試"), + ) + } +}