Files
libkiwix/src/tools/languageTools.cpp
Veloman Yunkan 04bf1be9d6 Deprived pseudolang mul of a self-name
ICU package contains a special code "mul" with a self-name of "multiple
languages". libkiwix now suppresses that. As a result the self-name of
"mul" (like for any other unknown language code) is the code itself
(i.e. "mul").

The most prominent user-visible effect of this change is that the
language filter in the library page no longer contains a "Multiple
languages" entry if there is a legacy ZIM file with the language set to
"mul" - that entry now shows up as "Mul".
2024-09-09 16:57:07 +04:00

87 lines
2.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "tools.h"
#include "stringTools.h"
#include <mutex>
namespace kiwix
{
namespace
{
// These mappings are not provided by the ICU library, any such mappings can be manually added here
std::map<std::string, std::string> iso639_3 = {
{"ami", "Amis"},
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"blk", "ပအိုဝ်ႏ"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"fon", "fɔ̀ngbè"},
{"gcr", "Kriyòl gwiyannen"},
{"guw", "Gungbe"},
{"hbs", "srpskohrvatski"},
{"hyw", "հայերէն/հայերեն"},
{"ido", "ido"},
{"kbp", "kabɩ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"lld", "ladin"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"pwn", "Pinayuanan"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"skr", "سرائیکی"},
{"szy", "Sakizaya"},
{"tay", "Tayal"},
{"tgl", "Wikang Tagalog"},
{"twi", "Akwapem Twi"},
// ICU for Ubuntu versions <= focal (20.04) returns "" for the language code ""
// unlike the later versions - which returns "und". We map this value to "Undetermined" for a common ground.
{"", "Undetermined"},
};
std::once_flag fillLanguagesFlag;
void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const kiwix::ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
iso639_3.erase("mul");
}
} // unnamed namespace
std::string getLanguageSelfName(const std::string& lang)
{
std::call_once(fillLanguagesFlag, fillLanguagesMap);
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};
} // namespace kiwix