[libs] More robust handling of language and script in getBestLocale

This commit is contained in:
Ray c
2025-11-04 11:49:39 +00:00
committed by Torsten Grote
parent f1ac12c6f5
commit 6bb2798045
2 changed files with 272 additions and 27 deletions

View File

@@ -26,18 +26,210 @@ internal class BestLocaleTest {
@Test
fun testEmptyLocalesReturnsNull() {
assertNull(emptyMap<String, String>().getBestLocale(getLocaleList("en-US, de-DE")))
assertNull(getMap("en-US, de-DE").getBestLocale(getEmptyLocaleList()))
assertNull(emptyMap<String, String>().getBestLocale(getLocaleList("en-US,de-DE")))
}
@Test
fun testFallbackToEn() {
assertEquals(
"en-US",
getMap("fr-FR", "en-US", "de-DE").getBestLocale(getEmptyLocaleList())
)
assertEquals(
"en",
getMap("de-AT", "de-DE", "en").getBestLocale(getLocaleList("fr-FR")),
)
}
@Test
fun testFallbackToFirst() {
assertEquals(
"de-AT",
getMap("de-AT", "de-DE", "uk").getBestLocale(getLocaleList("fr-FR")),
)
}
@Test
fun testMatchLanguageAndScript() {
assertEquals(
"en",
getMap("en-Shaw", "en-Shaw-US", "en-GB", "en").getBestLocale(getLocaleList("en-NL")),
)
assertEquals(
"sr-Cyrl",
getMap("en", "sr-Cyrl", "sr-Latn").getBestLocale(getLocaleList("sr-RS")),
)
assertEquals(
"uz-Latn",
getMap("en", "uz-Cyrl", "uz-Latn").getBestLocale(getLocaleList("uz")),
)
assertEquals(
"zh-Hant",
getMap("en", "zh-Hans", "zh-Hant").getBestLocale(getLocaleList("zh-TW")),
)
assertEquals(
"sr-Latn",
getMap("en", "sr", "sr-RS", "sr-Latn").getBestLocale(getLocaleList("sr-Latn-RS")),
)
assertEquals(
"uz-Cyrl",
getMap("en", "uz", "uz-Cyrl").getBestLocale(getLocaleList("uz-Cyrl-UZ")),
)
assertEquals(
"zh-Hant",
getMap("en", "zh", "zh-Hant").getBestLocale(getLocaleList("zh-TW")),
)
assertEquals(
"zh-TW",
getMap("zh", "zh-CN", "zh-TW", "en").getBestLocale(getLocaleList("zh-HK,de")),
)
assertEquals(
"zh-Hans",
getMap("en", "zh-Hant", "zh-Hans").getBestLocale(getLocaleList("zh")),
)
assertEquals(
"zh-Hant",
getMap("en", "zh-Hans", "zh-Hant").getBestLocale(getLocaleList("zh-HK")),
)
assertEquals(
"de",
getMap("zh", "de", "en").getBestLocale(getLocaleList("zh-HK,de")),
)
assertEquals(
"zh-HK",
getMap("zh", "zh-CN", "zh-TW", "zh-HK").getBestLocale(getLocaleList("zh-Hant-HK")),
)
assertEquals(
"zh-Hant-HK",
getMap(
"zh",
"zh-Hans-CN",
"zh-Hant-TW",
"zh-Hant-HK"
).getBestLocale(getLocaleList("zh-HK")),
)
}
@Test
fun testRankingPriority() {
// an exact match is the best match (and calls it a day)
assertEquals(
"en-US",
getMap(
"en-Shaw-US",
"en-Latn",
"en",
"en-US",
"en-Latn-US"
).getBestLocale(getLocaleList("en-US")),
)
assertEquals(
"zh-TW",
getMap(
"zh",
"zh-CN",
"zh-Hant",
"zh-Hant-HK",
"zh-TW"
).getBestLocale(getLocaleList("zh-TW")),
)
// else dive into the haystack in reverse order of specificity -- from specific to generic,
// starting from the most specific form: language-script-country
assertEquals(
"zh-Hant-HK",
getMap("zh", "zh-CN", "zh-Hant", "zh-Hant-HK").getBestLocale(getLocaleList("zh-HK")),
)
// followed by language-country and language-script
assertEquals(
"zh-TW",
getMap("zh", "zh-CN", "zh-Hant", "zh-TW").getBestLocale(getLocaleList("zh-Hant-TW")),
)
assertEquals(
"sr-RS",
getMap("en", "sr", "sr-Latn", "sr-RS").getBestLocale(getLocaleList("sr-Cyrl-RS")),
)
assertEquals(
"zh-MO",
getMap("en", "zh", "zh-Hant", "zh-MO").getBestLocale(getLocaleList("zh-Hant-MO")),
)
assertEquals(
"zh-Hans",
getMap("en", "zh", "zh-Hans", "zh-MO").getBestLocale(getLocaleList("zh-Hans-MO")),
)
assertEquals(
"zh-Hant",
getMap("zh", "zh-CN", "zh-Hant", "zh-Hant-HK").getBestLocale(getLocaleList("zh-TW")),
)
assertEquals(
"sr-Latn",
getMap("en", "sr", "sr-Latn", "sr-RS").getBestLocale(getLocaleList("sr-Latn-RS")),
)
assertEquals(
"en-Latn",
getMap(
"en-Shaw-US",
"en",
"en-US",
"en-Latn",
"en-Latn-US"
).getBestLocale(getLocaleList("en-GB")),
)
// finally language only if script matches
assertEquals(
"de",
getMap("zh", "zh-CN", "en", "de").getBestLocale(getLocaleList("zh-HK,de")),
)
assertEquals(
"fr",
getMap("zh", "en", "fr").getBestLocale(getLocaleList("en-Shaw-GB,fr")),
)
assertEquals(
"en",
getMap("fr", "en", "sr").getBestLocale(getLocaleList("sr-Latn-RS,en")),
)
// failing which the first one with same script wins
assertEquals(
"en-GB",
getMap("en-Shaw-US", "en-GB", "en-US").getBestLocale(getLocaleList("en-NL")),
)
assertEquals(
"en-AR",
getMap("en-AR", "en-GB", "en-US").getBestLocale(getLocaleList("en-NL")),
)
assertEquals(
"zh-HK",
getMap("en", "zh", "zh-CN", "zh-HK", "zh-TW").getBestLocale(getLocaleList("zh-MO")),
)
}
/**
* Ported from old LocaleSelectionTest.
*/
@@ -46,7 +238,7 @@ internal class BestLocaleTest {
// just select the matching en-US locale, nothing special here
assertEquals(
"en-US",
getMap("de-AT", "de-DE", "en-US").getBestLocale(getLocaleList("en-US, de-DE")),
getMap("de-AT", "de-DE", "en-US").getBestLocale(getLocaleList("en-US,de-DE")),
)
// fall back to another en locale before de
@@ -106,13 +298,13 @@ internal class BestLocaleTest {
// underscores
assertEquals(
"en-US",
getMap("de_AT", "de_DE", "en-US").getBestLocale(getLocaleList("en-US, de-DE")),
getMap("de_AT", "de_DE", "en-US").getBestLocale(getLocaleList("en-US,de-DE")),
)
// different case
assertEquals(
"en-US",
getMap("DE_at", "dE_De", "en-US").getBestLocale(getLocaleList("en-US, de-DE")),
getMap("DE_at", "dE_De", "en-US").getBestLocale(getLocaleList("en-US,de-DE")),
)
// garbage in given locales
@@ -125,7 +317,7 @@ internal class BestLocaleTest {
"de_DE",
"#$%#!$^#&^%#*",
"en-US",
).getBestLocale(getLocaleList("en-US, de-DE")),
).getBestLocale(getLocaleList("en-US,de-DE")),
)
}

View File

@@ -1,9 +1,11 @@
package org.fdroid
import androidx.core.os.LocaleListCompat
import androidx.core.text.ICUCompat
import org.fdroid.index.v2.LocalizedFileListV2
import org.fdroid.index.v2.LocalizedFileV2
import org.fdroid.index.v2.LocalizedTextV2
import java.util.Locale
public object LocaleChooser {
@@ -13,34 +15,85 @@ public object LocaleChooser {
*/
public fun <T> Map<String, T>?.getBestLocale(localeList: LocaleListCompat): T? {
if (isNullOrEmpty()) return null
val firstMatch = localeList.getFirstMatch(keys.toTypedArray()) ?: return null
val tag = firstMatch.toLanguageTag()
// try first matched tag first (usually has region tag, e.g. de-DE)
return get(tag) ?: run {
// split away stuff like script and try language and region only
val langCountryTag = "${firstMatch.language}-${firstMatch.country}"
getOrStartsWith(langCountryTag) ?: run {
// split away region tag and try language only
val langTag = firstMatch.language
// try language, then English and then just take the first of the list
getOrStartsWith(langTag) ?: get("en-US") ?: get("en") ?: values.first()
if (size == 1) return values.first()
return when (localeList.size()) {
0 -> null
1 -> localeList.get(0)
else -> localeList.getFirstMatch(keys.toTypedArray())
}?.let { firstMatch ->
// try first matched tag first (usually has region tag, e.g. de-DE)
get(firstMatch.toLanguageTag()) ?: run {
// search by ranking priority if no exact match is found,
// determining its script if not supplied
val tried = (if (firstMatch.script.isNullOrEmpty()) 0 else 1) +
(if (firstMatch.country.isNullOrEmpty()) 0 else 2)
if (firstMatch.script.isNullOrEmpty()) {
ICUCompat.maximizeAndGetScript(firstMatch)?.takeUnless { it.isEmpty() }
?.let { script -> getInRankingOrder(firstMatch, tried + 1, script, tried) }
} else if (tried > 1) {
getInRankingOrder(firstMatch, tried - 1, firstMatch.script, tried)
} else {
null
}
// then language and other countries if script matches
?: if (tried == 0) null else get(firstMatch.language)?.takeIf { _ ->
LocaleListCompat.matchesLanguageAndScript(
getLocale(firstMatch.language),
firstMatch
)
} ?: getFirstSameScript(firstMatch)
}
}
// or English and then just take the first of the list
?: get("en-US") ?: get("en") ?: values.first()
}
/**
* Returns the value from the map with the given key or if that key is not contained in the map,
* tries the first map key that starts with the given key.
* If nothing matches, null is returned.
*
* This is useful when looking for a language tag like `fr_CH` and falling back to `fr`
* in a map that has `fr_FR` as a key.
*/
private fun <T> Map<String, T>.getOrStartsWith(s: String): T? = get(s) ?: run {
private tailrec fun <T> Map<String, T>.getInRankingOrder(
locale: Locale,
rank: Int,
script: String?,
tried: Int
): T? {
if (rank <= 0) return null
if (rank != tried) getRankingTag(locale, rank, script)?.let { get(it) }?.let { return it }
return getInRankingOrder(locale, rank - 1, script, tried)
}
private fun <T> Map<String, T>.getFirstSameScript(locale: Locale): T? {
val langLen = locale.language.length
entries.forEach { (key, value) ->
if (key.startsWith(s)) return value
if (key.length > langLen &&
key.startsWith(locale.language) &&
key[langLen] == '-' &&
LocaleListCompat.matchesLanguageAndScript(Locale.forLanguageTag(key), locale)
) return value
}
return null
}
private fun getRankingTag(locale: Locale, rank: Int, script: String?): String? {
if (rank >= 2 && locale.country.isNullOrEmpty()) return null
if (rank != 2 && script.isNullOrEmpty()) return null
return when (rank) {
3 -> "${locale.language}-$script-${locale.country}"
2 -> if (script.isNullOrEmpty() ||
script.equals(
ICUCompat.maximizeAndGetScript(getLocale(locale.language, locale.country)),
true
)
) "${locale.language}-${locale.country}" else null
1 -> "${locale.language}-$script"
else -> null
}
}
private fun getLocale(language: String, script: String = "") =
if (android.os.Build.VERSION.SDK_INT >= 36) {
Locale.of(language, script)
} else {
@Suppress("DEPRECATION")
Locale(language, script)
}
}