From 6bb27980458104b5c623ccf5f183fa709a581de0 Mon Sep 17 00:00:00 2001 From: Ray c <20511786-ray2c@users.noreply.gitlab.com> Date: Tue, 4 Nov 2025 11:49:39 +0000 Subject: [PATCH] [libs] More robust handling of language and script in `getBestLocale` --- .../kotlin/org/fdroid/BestLocaleTest.kt | 204 +++++++++++++++++- .../kotlin/org/fdroid/LocaleChooser.kt | 95 ++++++-- 2 files changed, 272 insertions(+), 27 deletions(-) diff --git a/libs/index/src/androidInstrumentedTest/kotlin/org/fdroid/BestLocaleTest.kt b/libs/index/src/androidInstrumentedTest/kotlin/org/fdroid/BestLocaleTest.kt index e75ce41fa..18b4eb923 100644 --- a/libs/index/src/androidInstrumentedTest/kotlin/org/fdroid/BestLocaleTest.kt +++ b/libs/index/src/androidInstrumentedTest/kotlin/org/fdroid/BestLocaleTest.kt @@ -26,18 +26,210 @@ internal class BestLocaleTest { @Test fun testEmptyLocalesReturnsNull() { - assertNull(emptyMap().getBestLocale(getLocaleList("en-US, de-DE"))) - assertNull(getMap("en-US, de-DE").getBestLocale(getEmptyLocaleList())) + assertNull(emptyMap().getBestLocale(getLocaleList("en-US,de-DE"))) } @Test fun testFallbackToEn() { + assertEquals( + "en-US", + getMap("fr-FR", "en-US", "de-DE").getBestLocale(getEmptyLocaleList()) + ) + assertEquals( "en", getMap("de-AT", "de-DE", "en").getBestLocale(getLocaleList("fr-FR")), ) } + @Test + fun testFallbackToFirst() { + assertEquals( + "de-AT", + getMap("de-AT", "de-DE", "uk").getBestLocale(getLocaleList("fr-FR")), + ) + } + + @Test + fun testMatchLanguageAndScript() { + assertEquals( + "en", + getMap("en-Shaw", "en-Shaw-US", "en-GB", "en").getBestLocale(getLocaleList("en-NL")), + ) + + assertEquals( + "sr-Cyrl", + getMap("en", "sr-Cyrl", "sr-Latn").getBestLocale(getLocaleList("sr-RS")), + ) + + assertEquals( + "uz-Latn", + getMap("en", "uz-Cyrl", "uz-Latn").getBestLocale(getLocaleList("uz")), + ) + + assertEquals( + "zh-Hant", + getMap("en", "zh-Hans", "zh-Hant").getBestLocale(getLocaleList("zh-TW")), + ) + + assertEquals( + "sr-Latn", + getMap("en", "sr", "sr-RS", "sr-Latn").getBestLocale(getLocaleList("sr-Latn-RS")), + ) + + assertEquals( + "uz-Cyrl", + getMap("en", "uz", "uz-Cyrl").getBestLocale(getLocaleList("uz-Cyrl-UZ")), + ) + + assertEquals( + "zh-Hant", + getMap("en", "zh", "zh-Hant").getBestLocale(getLocaleList("zh-TW")), + ) + + assertEquals( + "zh-TW", + getMap("zh", "zh-CN", "zh-TW", "en").getBestLocale(getLocaleList("zh-HK,de")), + ) + + assertEquals( + "zh-Hans", + getMap("en", "zh-Hant", "zh-Hans").getBestLocale(getLocaleList("zh")), + ) + + assertEquals( + "zh-Hant", + getMap("en", "zh-Hans", "zh-Hant").getBestLocale(getLocaleList("zh-HK")), + ) + + assertEquals( + "de", + getMap("zh", "de", "en").getBestLocale(getLocaleList("zh-HK,de")), + ) + + assertEquals( + "zh-HK", + getMap("zh", "zh-CN", "zh-TW", "zh-HK").getBestLocale(getLocaleList("zh-Hant-HK")), + ) + + assertEquals( + "zh-Hant-HK", + getMap( + "zh", + "zh-Hans-CN", + "zh-Hant-TW", + "zh-Hant-HK" + ).getBestLocale(getLocaleList("zh-HK")), + ) + } + + @Test + fun testRankingPriority() { + // an exact match is the best match (and calls it a day) + assertEquals( + "en-US", + getMap( + "en-Shaw-US", + "en-Latn", + "en", + "en-US", + "en-Latn-US" + ).getBestLocale(getLocaleList("en-US")), + ) + + assertEquals( + "zh-TW", + getMap( + "zh", + "zh-CN", + "zh-Hant", + "zh-Hant-HK", + "zh-TW" + ).getBestLocale(getLocaleList("zh-TW")), + ) + + // else dive into the haystack in reverse order of specificity -- from specific to generic, + // starting from the most specific form: language-script-country + assertEquals( + "zh-Hant-HK", + getMap("zh", "zh-CN", "zh-Hant", "zh-Hant-HK").getBestLocale(getLocaleList("zh-HK")), + ) + + // followed by language-country and language-script + assertEquals( + "zh-TW", + getMap("zh", "zh-CN", "zh-Hant", "zh-TW").getBestLocale(getLocaleList("zh-Hant-TW")), + ) + + assertEquals( + "sr-RS", + getMap("en", "sr", "sr-Latn", "sr-RS").getBestLocale(getLocaleList("sr-Cyrl-RS")), + ) + + assertEquals( + "zh-MO", + getMap("en", "zh", "zh-Hant", "zh-MO").getBestLocale(getLocaleList("zh-Hant-MO")), + ) + + assertEquals( + "zh-Hans", + getMap("en", "zh", "zh-Hans", "zh-MO").getBestLocale(getLocaleList("zh-Hans-MO")), + ) + + assertEquals( + "zh-Hant", + getMap("zh", "zh-CN", "zh-Hant", "zh-Hant-HK").getBestLocale(getLocaleList("zh-TW")), + ) + + assertEquals( + "sr-Latn", + getMap("en", "sr", "sr-Latn", "sr-RS").getBestLocale(getLocaleList("sr-Latn-RS")), + ) + + assertEquals( + "en-Latn", + getMap( + "en-Shaw-US", + "en", + "en-US", + "en-Latn", + "en-Latn-US" + ).getBestLocale(getLocaleList("en-GB")), + ) + + // finally language only if script matches + assertEquals( + "de", + getMap("zh", "zh-CN", "en", "de").getBestLocale(getLocaleList("zh-HK,de")), + ) + + assertEquals( + "fr", + getMap("zh", "en", "fr").getBestLocale(getLocaleList("en-Shaw-GB,fr")), + ) + + assertEquals( + "en", + getMap("fr", "en", "sr").getBestLocale(getLocaleList("sr-Latn-RS,en")), + ) + + // failing which the first one with same script wins + assertEquals( + "en-GB", + getMap("en-Shaw-US", "en-GB", "en-US").getBestLocale(getLocaleList("en-NL")), + ) + + assertEquals( + "en-AR", + getMap("en-AR", "en-GB", "en-US").getBestLocale(getLocaleList("en-NL")), + ) + + assertEquals( + "zh-HK", + getMap("en", "zh", "zh-CN", "zh-HK", "zh-TW").getBestLocale(getLocaleList("zh-MO")), + ) + } + /** * Ported from old LocaleSelectionTest. */ @@ -46,7 +238,7 @@ internal class BestLocaleTest { // just select the matching en-US locale, nothing special here assertEquals( "en-US", - getMap("de-AT", "de-DE", "en-US").getBestLocale(getLocaleList("en-US, de-DE")), + getMap("de-AT", "de-DE", "en-US").getBestLocale(getLocaleList("en-US,de-DE")), ) // fall back to another en locale before de @@ -106,13 +298,13 @@ internal class BestLocaleTest { // underscores assertEquals( "en-US", - getMap("de_AT", "de_DE", "en-US").getBestLocale(getLocaleList("en-US, de-DE")), + getMap("de_AT", "de_DE", "en-US").getBestLocale(getLocaleList("en-US,de-DE")), ) // different case assertEquals( "en-US", - getMap("DE_at", "dE_De", "en-US").getBestLocale(getLocaleList("en-US, de-DE")), + getMap("DE_at", "dE_De", "en-US").getBestLocale(getLocaleList("en-US,de-DE")), ) // garbage in given locales @@ -125,7 +317,7 @@ internal class BestLocaleTest { "de_DE", "#$%#!$^#&^%#*", "en-US", - ).getBestLocale(getLocaleList("en-US, de-DE")), + ).getBestLocale(getLocaleList("en-US,de-DE")), ) } diff --git a/libs/index/src/androidMain/kotlin/org/fdroid/LocaleChooser.kt b/libs/index/src/androidMain/kotlin/org/fdroid/LocaleChooser.kt index 6faf7e84b..37d208cda 100644 --- a/libs/index/src/androidMain/kotlin/org/fdroid/LocaleChooser.kt +++ b/libs/index/src/androidMain/kotlin/org/fdroid/LocaleChooser.kt @@ -1,9 +1,11 @@ package org.fdroid import androidx.core.os.LocaleListCompat +import androidx.core.text.ICUCompat import org.fdroid.index.v2.LocalizedFileListV2 import org.fdroid.index.v2.LocalizedFileV2 import org.fdroid.index.v2.LocalizedTextV2 +import java.util.Locale public object LocaleChooser { @@ -13,34 +15,85 @@ public object LocaleChooser { */ public fun Map?.getBestLocale(localeList: LocaleListCompat): T? { if (isNullOrEmpty()) return null - val firstMatch = localeList.getFirstMatch(keys.toTypedArray()) ?: return null - val tag = firstMatch.toLanguageTag() - // try first matched tag first (usually has region tag, e.g. de-DE) - return get(tag) ?: run { - // split away stuff like script and try language and region only - val langCountryTag = "${firstMatch.language}-${firstMatch.country}" - getOrStartsWith(langCountryTag) ?: run { - // split away region tag and try language only - val langTag = firstMatch.language - // try language, then English and then just take the first of the list - getOrStartsWith(langTag) ?: get("en-US") ?: get("en") ?: values.first() + if (size == 1) return values.first() + return when (localeList.size()) { + 0 -> null + 1 -> localeList.get(0) + else -> localeList.getFirstMatch(keys.toTypedArray()) + }?.let { firstMatch -> + // try first matched tag first (usually has region tag, e.g. de-DE) + get(firstMatch.toLanguageTag()) ?: run { + // search by ranking priority if no exact match is found, + // determining its script if not supplied + val tried = (if (firstMatch.script.isNullOrEmpty()) 0 else 1) + + (if (firstMatch.country.isNullOrEmpty()) 0 else 2) + if (firstMatch.script.isNullOrEmpty()) { + ICUCompat.maximizeAndGetScript(firstMatch)?.takeUnless { it.isEmpty() } + ?.let { script -> getInRankingOrder(firstMatch, tried + 1, script, tried) } + } else if (tried > 1) { + getInRankingOrder(firstMatch, tried - 1, firstMatch.script, tried) + } else { + null + } + // then language and other countries if script matches + ?: if (tried == 0) null else get(firstMatch.language)?.takeIf { _ -> + LocaleListCompat.matchesLanguageAndScript( + getLocale(firstMatch.language), + firstMatch + ) + } ?: getFirstSameScript(firstMatch) } } + // or English and then just take the first of the list + ?: get("en-US") ?: get("en") ?: values.first() } - /** - * Returns the value from the map with the given key or if that key is not contained in the map, - * tries the first map key that starts with the given key. - * If nothing matches, null is returned. - * - * This is useful when looking for a language tag like `fr_CH` and falling back to `fr` - * in a map that has `fr_FR` as a key. - */ - private fun Map.getOrStartsWith(s: String): T? = get(s) ?: run { + private tailrec fun Map.getInRankingOrder( + locale: Locale, + rank: Int, + script: String?, + tried: Int + ): T? { + if (rank <= 0) return null + if (rank != tried) getRankingTag(locale, rank, script)?.let { get(it) }?.let { return it } + return getInRankingOrder(locale, rank - 1, script, tried) + } + + private fun Map.getFirstSameScript(locale: Locale): T? { + val langLen = locale.language.length entries.forEach { (key, value) -> - if (key.startsWith(s)) return value + if (key.length > langLen && + key.startsWith(locale.language) && + key[langLen] == '-' && + LocaleListCompat.matchesLanguageAndScript(Locale.forLanguageTag(key), locale) + ) return value } return null } + private fun getRankingTag(locale: Locale, rank: Int, script: String?): String? { + if (rank >= 2 && locale.country.isNullOrEmpty()) return null + if (rank != 2 && script.isNullOrEmpty()) return null + return when (rank) { + 3 -> "${locale.language}-$script-${locale.country}" + 2 -> if (script.isNullOrEmpty() || + script.equals( + ICUCompat.maximizeAndGetScript(getLocale(locale.language, locale.country)), + true + ) + ) "${locale.language}-${locale.country}" else null + + 1 -> "${locale.language}-$script" + else -> null + } + } + + private fun getLocale(language: String, script: String = "") = + if (android.os.Build.VERSION.SDK_INT >= 36) { + Locale.of(language, script) + } else { + @Suppress("DEPRECATION") + Locale(language, script) + } + }