diff --git a/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt b/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt index fef8ec8b7..d6d4be0b2 100644 --- a/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt +++ b/libs/database/src/dbTest/java/org/fdroid/database/AppSearchItemsTest.kt @@ -4,6 +4,7 @@ import androidx.test.ext.junit.runners.AndroidJUnit4 import kotlin.test.assertEquals import kotlin.test.assertTrue import kotlinx.coroutines.runBlocking +import org.fdroid.database.SearchQueryRewriter.rewriteQuery import org.fdroid.index.v2.MetadataV2 import org.fdroid.test.TestRepoUtils.getRandomRepo import org.junit.Test @@ -17,7 +18,7 @@ internal class AppSearchItemsTest : DbTest() { fun findsByName() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "duckduckgo* browser*", + query = "duckduckgo browser", packageName = "com.duckduckgo.mobile.android", ) assertSearchTopResult(query = "F-Droid*", packageName = "org.fdroid.fdroid") @@ -26,14 +27,14 @@ internal class AppSearchItemsTest : DbTest() { @Test fun findsBySummary() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "alternative* frontend*", packageName = "com.github.libretube") + assertSearchTopResult(query = "alternative frontend", packageName = "com.github.libretube") } @Test fun findsByDescription() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "privacy* essentials*", + query = "privacy essentials", packageName = "com.duckduckgo.mobile.android", ) } @@ -41,27 +42,26 @@ internal class AppSearchItemsTest : DbTest() { @Test fun findsByAuthor() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "rahul* patel*", packageName = "com.aurora.store") - assertSearchTopResult(query = "bitfire*", packageName = "at.bitfire.davdroid") - assertSearchTopResult(query = "艾*", packageName = "com.aistra.hail") + assertSearchTopResult(query = "rahul patel", packageName = "com.aurora.store") + assertSearchTopResult(query = "bitfire", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "艾星", packageName = "com.aistra.hail") } @Test fun findsByPackageName() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "org* fdroid*", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "org.fdroid.fdroid", packageName = "org.fdroid.fdroid") } @Test fun findsCamelCase() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - // this is the actual query used by the search manager when searching for "game pad" - query = "game* pad* OR gamepad* OR \"game* pad*\"", + query = "game pad", packageName = "io.github.kitswas.virtualgamepadmobile", ) assertSearchTopResult( - query = "cal* dav* OR caldav* OR \"cal* dav*\"", + query = "cal dav", packageName = "at.bitfire.davdroid", ) } @@ -70,52 +70,52 @@ internal class AppSearchItemsTest : DbTest() { fun findsGermanText() = runBlocking { populateDbWithExtractedApps() assertSearchTopResult( - query = "privatsphäre* vereinfacht*", + query = "privatsphäre vereinfacht", packageName = "com.duckduckgo.mobile.android", ) - assertSearchTopResult(query = "installierbar*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "Synchronisierungs-App*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "installierbar", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "Synchronisierungs-App", packageName = "at.bitfire.davdroid") } @Test fun findsPortugueseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "loja* privacidade*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "sincronização*", packageName = "at.bitfire.davdroid") - assertSearchTopResult(query = "catálogo* instalável*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "catalogo* instalavel*", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "loja privacidade", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "sincronização", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "catálogo instalável", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "catalogo instalavel", packageName = "org.fdroid.fdroid") } @Test fun findsChineseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "地* 图*", packageName = "app.organicmaps") - assertSearchResultsContain(query = "隐* 私*", packageName = "com.duckduckgo.mobile.android") - assertSearchResultsContain(query = "隐* 私*", packageName = "org.fdroid.fdroid") - assertSearchTopResult(query = "阅* 读*", packageName = "com.capyreader.app") - assertSearchResultsContain(query = "同* 步*", packageName = "com.nextcloud.android.beta") - assertSearchResultsContain(query = "同* 步*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "地图", packageName = "app.organicmaps") + assertSearchResultsContain(query = "隐私", packageName = "com.duckduckgo.mobile.android") + assertSearchResultsContain(query = "隐私", packageName = "org.fdroid.fdroid") + assertSearchTopResult(query = "阅读", packageName = "com.capyreader.app") + assertSearchResultsContain(query = "同步", packageName = "com.nextcloud.android.beta") + assertSearchResultsContain(query = "同步", packageName = "at.bitfire.davdroid") } @Test fun findsJapaneseText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "同* 期*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "同期", packageName = "at.bitfire.davdroid") } @Test fun findsKoreanText() = runBlocking { populateDbWithExtractedApps() - assertSearchTopResult(query = "동기* 클라이*", packageName = "at.bitfire.davdroid") + assertSearchTopResult(query = "동기 클라이", packageName = "at.bitfire.davdroid") } private suspend fun assertSearchTopResult(query: String, packageName: String) { - val items = appDao.getAppSearchItems(query) + val items = appDao.getAppSearchItems(rewriteQuery(query)) assertEquals(packageName, items.firstOrNull()?.packageName) } private suspend fun assertSearchResultsContain(query: String, packageName: String) { - val items = appDao.getAppSearchItems(query) + val items = appDao.getAppSearchItems(rewriteQuery(query)) assertTrue( items.any { it.packageName == packageName }, "Query '$query' did not find $packageName, but ${items.map { it.packageName }}", diff --git a/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt b/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt new file mode 100644 index 000000000..e9d10bf39 --- /dev/null +++ b/libs/database/src/main/java/org/fdroid/database/SearchQueryRewriter.kt @@ -0,0 +1,58 @@ +package org.fdroid.database + +/** + * Rewrites search queries so that best results with sqlite Fts4 are obtained. Uses prefix searches + * and camel case searches for latin chars, and separate character searches for CJK chars. Also + * employing our zero whitespace hack. + * + * Attention: Quotes should be removed from the query before passing it in. + * + * see https://www.sqlite.org/fts3.html#full_text_index_queries + */ +public object SearchQueryRewriter { + + public fun rewriteQuery(query: String): String { + val splits = query.split(' ').filter { it.isNotBlank() } + var hasAnyCjk = false + return splits + .joinToString(" ") { word -> + var isCjk = false + // go through word and separate CJK chars (if needed) + val newString = + word.toList().joinToString("") { + if (Character.isIdeographic(it.code)) { + isCjk = true + hasAnyCjk = true + "$it* " + } else "$it" + } + // add * to enable prefix matches + if (isCjk) newString.trimEnd() else "$newString*" + } + .let { firstPassQuery -> + // if we had more than one word, make a more complex query + if (splits.size > 1 && !hasAnyCjk) { + "$firstPassQuery " + // search* term* (implicit AND and prefix search) + "OR ${splits.joinToString("")}* " + // camel case prefix + "OR \"${splits.joinToString("* ")}*\"" // phrase query + } else if (hasAnyCjk) { + val zeroSplits = + splits.map { word -> + if (word.any { Character.isIdeographic(it.code) }) { + // separate CJK chars with zero-width + word.toList().joinToString("\u200B") + } else { + word + } + } + // query using zero-width concatenation needs to be quoted as a phrase query + val zeroQuery = zeroSplits.joinToString(" ") { "\"$it*\"" } + "$firstPassQuery " + // search* term* (implicit AND and prefix search) + "OR $zeroQuery " + // zero whitespace concat as in DB + "OR ${splits.joinToString("* ")}*" // verbatim prefix for authorName searches + } else { + firstPassQuery + } + } + } +} diff --git a/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt b/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt new file mode 100644 index 000000000..cb8e4fcd5 --- /dev/null +++ b/libs/database/src/test/java/org/fdroid/database/SearchQueryRewriterTest.kt @@ -0,0 +1,54 @@ +package org.fdroid.database + +import kotlin.test.assertEquals +import org.junit.Test + +internal class SearchQueryRewriterTest { + + @Test + fun rewritesBlankQueryToBlank() { + assertEquals("", SearchQueryRewriter.rewriteQuery(" ")) + } + + @Test + fun rewritesSingleLatinWordToPrefixQuery() { + assertEquals("foo*", SearchQueryRewriter.rewriteQuery("foo")) + } + + @Test + fun rewritesMultipleLatinWordsWithCamelCaseAndPhrase() { + assertEquals( + "foo* bar* OR foobar* OR \"foo* bar*\"", + SearchQueryRewriter.rewriteQuery("foo bar"), + ) + } + + @Test + fun rewritesMultipleLatinWordsAndIgnoresExtraWhitespace() { + assertEquals( + "foo* bar* OR foobar* OR \"foo* bar*\"", + SearchQueryRewriter.rewriteQuery(" foo bar "), + ) + } + + @Test + fun rewritesSingleCjkWordWithZeroWidthAndVerbatimAlternatives() { + assertEquals("測* 試* OR \"測\u200B試*\" OR 測試*", SearchQueryRewriter.rewriteQuery("測試")) + } + + @Test + fun rewritesMultiWordCjkQuery() { + assertEquals( + "測* 試* 艾* 星* OR \"測\u200B試*\" \"艾\u200B星*\" OR 測試* 艾星*", + SearchQueryRewriter.rewriteQuery("測試 艾星"), + ) + } + + @Test + fun rewritesMixedLatinAndCjkWordsUsingCjkBranch() { + assertEquals( + "foo* 測* 試* OR \"foo*\" \"測\u200B試*\" OR foo* 測試*", + SearchQueryRewriter.rewriteQuery("foo 測試"), + ) + } +}