Compare commits

...

15 Commits

Author SHA1 Message Date
Veloman Yunkan
68c9702772 User affix file can be used for spelling correction
This is a temporary change to facilitate playing with different affix
rules.
2025-11-24 17:53:04 +04:00
Veloman Yunkan
88d8f2788e Use nuspell for spelling correction
This is the initial version of using nuspell for spelling correction,
which yet has to be tuned.

Note that libnuspell must be available as a dependency.

Xapian-based code for spelling correction is not deleted.
2025-11-21 18:32:46 +04:00
Kelson
33f22eb966 Merge pull request #1241 from vighnesh-sawant/mustache-tag-escaping
Avoid interpretation of content coming from zim by mustache
2025-11-10 20:10:16 +01:00
Vighnesh
55c13c3d24 Avoid interpretation of content coming from zim by mustache 2025-11-10 20:10:06 +01:00
Veloman Yunkan
2b1f556c20 Merge pull request #1239 from kiwix/translatewiki
Localisation updates from https://translatewiki.net.
2025-11-10 18:41:03 +04:00
translatewiki.net
e0cd5a1642 Localisation updates from https://translatewiki.net. 2025-11-10 13:13:07 +01:00
Kelson
0a9ba9b678 Merge pull request #1237 from kiwix/release-14.1.0
Release 14.1.0
2025-10-31 15:17:48 +01:00
Emmanuel Engelhart
db9607e55e 14.1.0 changelog 2025-10-31 15:12:00 +01:00
Emmanuel Engelhart
592e22732e Bump-up version to 14.1.0 2025-10-31 15:12:00 +01:00
Kelson
17f0ad2cf4 Merge pull request #1234 from vighnesh-sawant/standard-port-enhancment
Add functions which return displayable addresses
2025-10-31 14:55:35 +01:00
Vighnesh
4928509991 Implement a function which returns server access url 2025-10-31 18:11:22 +05:30
Vighnesh
c2df0a99fe Normalize m_root in Server itself 2025-10-31 18:11:22 +05:30
Vighnesh
cffca3ad85 Sync m_addr of Server and InternalServer 2025-10-31 18:11:22 +05:30
Kelson
0a2bebe7a3 Merge pull request #1218 from kiwix/translatewiki
Localisation updates from https://translatewiki.net.
2025-10-31 12:44:38 +01:00
translatewiki.net
bdb1f09884 Localisation updates from https://translatewiki.net. 2025-10-30 13:12:50 +01:00
14 changed files with 202 additions and 86 deletions

View File

@@ -1,3 +1,23 @@
libkiwix 14.1.0
===============
* Server:
- Viewer detects & tracks intrapage navigation anchors too (@veloman-yunkan #1213)
- Add support for catalog only mode (@veloman-yunkan #1219)
- Add API which returns server access url (@vighnesh-sawant #1234)
- Fix chrome searchbar placeholder text overflow (@aditii2712 #1185)
- Fix magnet link queryStyring (@rgaudin #1160)
- Improve chrome printing stylesheet (@kelson42 #1202)
- Default white background (@kelson42 #1205)
* Other:
- Switched to the new libzim illustrations API (@veloman-yunkan #1226)
- Stop building Windows with DEBUG symbols in CI (@kelson42 #1165)
- Update many things in the CI/CD (@kelson42 #1203 #1194 #1209 #1207 #1235)
- Requires now libzim 9.4.0 (@kelson42 #1231)
- Fix compilation for FreeBSD (@OICe2 #1173 #1174)
- Wait up to 1s to let aria2c to start before complaining (@kelson42 #1169)
libkiwix 14.0.0
===============

View File

@@ -69,6 +69,7 @@ namespace kiwix
int getPort() const;
IpAddress getAddress() const;
IpMode getIpMode() const;
std::vector<std::string> getServerAccessUrls() const;
protected:
std::shared_ptr<Library> mp_library;

View File

@@ -35,6 +35,14 @@ namespace Xapian
class Database;
}
namespace nuspell
{
inline namespace v5
{
class Dictionary;
}
}
namespace kiwix
{
@@ -51,6 +59,7 @@ public: // functions
private: // data
std::unique_ptr<Xapian::Database> impl_;
std::unique_ptr<nuspell::Dictionary> nuspell_;
};
} // namespace kiwix

View File

@@ -1,5 +1,5 @@
project('libkiwix', 'cpp',
version : '14.0.0',
version : '14.1.0',
license : 'GPLv3+',
default_options : ['c_std=c11', 'cpp_std=c++17', 'werror=true'])
@@ -61,6 +61,7 @@ libcurl_dep = dependency('libcurl', static:static_deps)
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)
zlib_dep = dependency('zlib', static:static_deps)
xapian_dep = dependency('xapian-core', static:static_deps)
libnuspell_dep = dependency('libnuspell', static:static_deps)
if compiler.has_header('mustache.hpp')
extra_include = []
@@ -94,7 +95,7 @@ endif
# Dependencies as string
all_deps = [thread_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
all_deps = [thread_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep, libnuspell_dep]
# Dependencies as array
all_deps += libicu_deps

View File

@@ -29,6 +29,22 @@
namespace kiwix {
namespace
{
std::string makeServerUrl(std::string host, int port, std::string root)
{
const int httpDefaultPort = 80;
if (port == httpDefaultPort) {
return "http://" + host + root;
} else {
return "http://" + host + ":" + std::to_string(port) + root;
}
}
} // unnamed namespace
Server::Server(LibraryPtr library, std::shared_ptr<NameMapper> nameMapper) :
mp_library(library),
mp_nameMapper(nameMapper),
@@ -56,7 +72,13 @@ bool Server::start() {
m_ipConnectionLimit,
m_catalogOnlyMode,
m_contentServerUrl));
return mp_server->start();
if (mp_server->start()) {
// this syncs m_addr of InternalServer and Server as they may diverge
m_addr = mp_server->getAddress();
return true;
} else {
return false;
}
}
void Server::stop() {
@@ -69,12 +91,12 @@ void Server::stop() {
void Server::setRoot(const std::string& root)
{
m_root = root;
if (m_root[0] != '/') {
m_root = "/" + m_root;
}
if (m_root.back() == '/') {
m_root.erase(m_root.size() - 1);
}
while (!m_root.empty() && m_root.back() == '/')
m_root.pop_back();
while (!m_root.empty() && m_root.front() == '/')
m_root = m_root.substr(1);
m_root = m_root.empty() ? m_root : "/" + m_root;
}
void Server::setAddress(const std::string& addr)
@@ -93,12 +115,12 @@ void Server::setAddress(const std::string& addr)
int Server::getPort() const
{
return mp_server->getPort();
return m_port;
}
IpAddress Server::getAddress() const
{
return mp_server->getAddress();
return m_addr;
}
IpMode Server::getIpMode() const
@@ -106,4 +128,16 @@ IpMode Server::getIpMode() const
return mp_server->getIpMode();
}
std::vector<std::string> Server::getServerAccessUrls() const
{
std::vector<std::string> result;
if (!m_addr.addr.empty()) {
result.push_back(makeServerUrl(m_addr.addr, m_port, m_root));
}
if (!m_addr.addr6.empty()) {
result.push_back(makeServerUrl("[" + m_addr.addr6 + "]", m_port, m_root));
}
return result;
}
}

View File

@@ -99,16 +99,6 @@ bool ipAvailable(const std::string addr)
return false;
}
inline std::string normalizeRootUrl(std::string rootUrl)
{
while ( !rootUrl.empty() && rootUrl.back() == '/' )
rootUrl.pop_back();
while ( !rootUrl.empty() && rootUrl.front() == '/' )
rootUrl = rootUrl.substr(1);
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}
std::string
fullURL2LocalURL(const std::string& fullUrl, const std::string& rootLocation)
{
@@ -440,7 +430,7 @@ InternalServer::InternalServer(LibraryPtr library,
std::string contentServerUrl) :
m_addr(addr),
m_port(port),
m_root(normalizeRootUrl(root)),
m_root(root),
m_rootPrefixOfDecodedURL(m_root),
m_nbThreads(nbThreads),
m_multizimSearchLimit(multizimSearchLimit),

View File

@@ -20,10 +20,12 @@
#include "spelling_correction.h"
#include "zim/archive.h"
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <xapian.h>
#include <nuspell/dictionary.hxx>
namespace kiwix
{
@@ -80,10 +82,39 @@ std::unique_ptr<Xapian::Database> openOrCreateXapianDB(std::filesystem::path cac
}
}
const char nuspellAffFileData[] = R"(
SET UTF-8
TRY qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM
)";
std::unique_ptr<std::istream> getAffDataStream()
{
const char* const userAffFilePath = ::getenv("KIWIX_NUSPELL_AFF_FILE_PATH");
if ( userAffFilePath ) {
return std::make_unique<std::ifstream>(userAffFilePath);
}
return std::make_unique<std::istringstream>(nuspellAffFileData);
}
std::unique_ptr<nuspell::Dictionary> createNuspellDictionary(const zim::Archive& archive)
{
auto d = std::make_unique<nuspell::Dictionary>();
const auto& allTitles = getAllTitles(archive);
std::stringstream dicSS;
dicSS << allTitles.size() << "\n";
for ( const auto& t : allTitles ) {
dicSS << t << "\n";
}
d->load_aff_dic(*getAffDataStream(), dicSS);
return d;
}
} // unnamed namespace
SpellingsDB::SpellingsDB(const zim::Archive& archive, std::filesystem::path cacheDirPath)
: impl_(openOrCreateXapianDB(cacheDirPath, archive))
, nuspell_(createNuspellDictionary(archive))
{
}
@@ -93,14 +124,13 @@ SpellingsDB::~SpellingsDB()
std::vector<std::string> SpellingsDB::getSpellingCorrections(const std::string& word, uint32_t maxCount) const
{
if ( maxCount > 1 ) {
throw std::runtime_error("More than one spelling correction was requested");
}
std::vector<std::string> result;
const auto term = impl_->get_spelling_suggestion(word, 3);
if ( !term.empty() ) {
result.push_back(term);
nuspell_->suggest(word, result);
if ( result.size() > maxCount ) {
result.resize(maxCount);
}
if ( result.size() == 1 && result[0] == word ) {
result.clear();
}
return result;
}

View File

@@ -26,10 +26,21 @@
"new-404-page-heading": "Ах! Страницата не е пронајдена.",
"404-img-text": "Не е најдено!",
"path-was-not-found": "Не ја најдов побараната патека:",
"404-advice.p1": "Содржината што ја барате може сепак да е достапна, но може да се наоѓа на друго место во рамките на ZIM-податотеката.",
"404-advice.p2": "Ве молиме:",
"404-advice.p3": "Пробајте да ја употребите функцијата за пребарување за да ја најдете содржината што ви треба",
"404-advice.p4": "Барајте клучни зборови или наслови поврзани со информациите што ви требаат",
"404-advice.p5": "Овој приод треба да ви помогне да ја најдете саканата содржина, дури и ако изворната врска не работи правилно.",
"500-page-title": "Внатрешна грешка во опслужувачот",
"500-page-heading": "Внатрешна грешка во опслужувачот",
"500-page-text": "Настана внатрешна грешка во опслужувачот. Жал ни е :/",
"500-page-heading": "Страницата не работи.",
"500-page-text": "Побараната патека не може правилно да се достави:",
"500-img-text": "Страницата не работи.",
"external-link-detected": "Најдена е надворешна врска",
"caution-warning": "Внимание!",
"external-link-intro": "На пат сте да го напуштите ZIM-читачот на Кивикс за да појдете на",
"external-link-advice.p1": "Врската што пробувате да ја отворите не е дел од вашиот вонмрежен пакет и бара семрежна врска.",
"external-link-advice.p2": "Ако можете да се поврзете со семрежнето, пробајте да ја отворите врската.",
"external-link-advice.p3": "Во спротивно можете повторно да пробате да ја отворите вонмрежната содржина на вашиот ZIM стискајќи на копчето за враќање назад на вашиот прелистувач.",
"fulltext-search-unavailable": "Целотекстното пребарување е недостапно",
"no-search-results": "Погонот за целотекстно пребарување не е достапен за оваа содржина.",
"search-results-page-title": "Пребарување: {{SEARCH_PATTERN}}",
@@ -38,9 +49,9 @@
"search-result-book-info": "од {{BOOK_TITLE}}",
"word-count": "{{COUNT}} зборови",
"library-button-text": "Оди на воведната страница",
"home-button-text": "Оди на главната страница на „{{BOOK_TITLE}}“",
"home-button-text": "Оди на главната страница на „{{{BOOK_TITLE}}}“",
"random-page-button-text": "Оди на случајно избрана страница",
"searchbox-tooltip": "Пребарај го „{{BOOK_TITLE}}“",
"searchbox-tooltip": "Пребарај по „{{{BOOK_TITLE}}}“",
"confusion-of-tongues": "Во пребарувањето ќе учествуваат две или повеќе книги на различни јазици, што може да довете до збунувачки исход.",
"welcome-page-overzealous-filter": "Нема исход. Дали би сакале да го <a href=\"{{URL}}\">поништите филтерот</a>?",
"powered-by-kiwix-html": "Овозможено од&nbsp;<a href=\"https://kiwix.org\">Кивикс</a>",

View File

@@ -1,6 +1,7 @@
{
"@metadata": {
"authors": [
"Apq",
"Jopparn",
"Larsa",
"Rofiatmustapha12",
@@ -25,9 +26,25 @@
"400-page-heading": "Ogiltig begäran",
"404-page-title": "Innehållet hittades inte",
"404-page-heading": "Hittades inte",
"new-404-page-title": "Sidan kunde inte hittas",
"new-404-page-heading": "Hoppsan. Sidan hittades inte.",
"404-img-text": "Hittades ej!",
"path-was-not-found": "Den begärda sökvägen hittades ej:",
"404-advice.p1": "Innehållet du letar efter kan fortfarande vara tillgängligt, men det kan finnas på en annan plats i ZIM-filen.",
"404-advice.p2": "Vänligen:",
"404-advice.p3": "Försök att använda sökfunktionen för att hitta det innehåll du vill ha",
"404-advice.p4": "Leta efter nyckelord eller titlar relaterade till den information du söker",
"404-advice.p5": "Den här metoden bör hjälpa dig att hitta önskat innehåll, även om den ursprungliga länken inte fungerar korrekt.",
"500-page-title": "Internt serverfel",
"500-page-heading": "Internt serverfel",
"500-page-text": "Ett internt serverfel uppstod. Vi ber om ursäkt för det :/",
"500-page-heading": "Hoppsan. Sidan fungerar inte.",
"500-page-text": "Den begärda sökvägen kan inte levereras korrekt:",
"500-img-text": "Sidan fungerar ej",
"external-link-detected": "Extern länk upptäckt",
"caution-warning": "Varning!",
"external-link-intro": "Du är på väg att lämna Kiwix ZIM-läsare för att gå online till",
"external-link-advice.p1": "Länken du försöker komma åt är inte en del av ditt offlinepaket och kräver en internetanslutning.",
"external-link-advice.p2": "Om du kan gå online kan du försöka öppna länken.",
"external-link-advice.p3": "Du kan annars återgå till ditt ZIM-innehåll offline genom att använda webbläsarens bakåtknapp.",
"fulltext-search-unavailable": "Fulltextsökning är inte tillgänglig",
"no-search-results": "Sökmaskinen för fulltext är inte tillgänglig för detta innehåll.",
"search-results-page-title": "Sök: {{SEARCH_PATTERN}}",
@@ -36,9 +53,9 @@
"search-result-book-info": "från {{BOOK_TITLE}}",
"word-count": "{{COUNT}} ord",
"library-button-text": "Gå till hemsidan",
"home-button-text": "Gå till huvudsidan för \"{{BOOK_TITLE}}\"",
"home-button-text": "Gå till huvudsidan för '{{{BOOK_TITLE}}}'",
"random-page-button-text": "Gå till en slumpmässigt utvald sida",
"searchbox-tooltip": "Sök efter \"{{BOOK_TITLE}}\"",
"searchbox-tooltip": "Sök '{{{BOOK_TITLE}}}'",
"confusion-of-tongues": "Två eller fler böcker på olika språk skulle delta i sökningen, vilket kan ge förvirrande resultat.",
"welcome-page-overzealous-filter": "Inga resultat. Vill du <a href=\"{{URL}}\">återställa filtret</a>?",
"powered-by-kiwix-html": "Drivs av&nbsp;<a href=\"https://kiwix.org\">Kiwix</a>",
@@ -73,5 +90,6 @@
"book-category.wikiversity": "Wikiversity",
"book-category.wikivoyage": "Wikivoyage",
"book-category.wiktionary": "Wiktionary",
"book-category.other": "Övriga"
"book-category.other": "Övriga",
"text-loading-content": "Laddar innehåll"
}

View File

@@ -2,7 +2,8 @@
"@metadata": {
"authors": [
"Hedda",
"Rofiatmustapha12"
"Rofiatmustapha12",
"SaldırganSincap"
]
},
"name": "Türkçe",
@@ -23,8 +24,8 @@
"404-page-title": "içerik bulunamadı",
"404-page-heading": "Bulunamadı",
"500-page-title": "İç Sunucu Hatası",
"500-page-heading": "İç Sunucu Hatası",
"500-page-text": "Dahili bir sunucu hatası oluştu. Bunun için üzgünüz :/",
"500-page-heading": "Üzgünüz. Sayfa çalışmıyor.",
"500-page-text": "İstenen yol düzgün bir şekilde teslim edilemiyor:",
"fulltext-search-unavailable": "Tam metin araması kullanılamıyor",
"no-search-results": "Tam metin arama motoru bu içerik için kullanılamaz.",
"search-results-page-title": "Arama: {{SEARCH_PATTERN}}",
@@ -33,9 +34,9 @@
"search-result-book-info": "{{BOOK_TITLE}} adlı kitaptan",
"word-count": "{{COUNT}} kelime",
"library-button-text": "Karşılama sayfasına git",
"home-button-text": "'{{BOOK_TITLE}}' anasayfasına gidin",
"home-button-text": "'{{{BOOK_TITLE}}}' ana sayfasına git",
"random-page-button-text": "Rastgele seçilen bir sayfaya git",
"searchbox-tooltip": "'{{BOOK_TITLE}}' ara",
"searchbox-tooltip": "'{{{BOOK_TITLE}}}' ara",
"confusion-of-tongues": "Aramaya farklı dillerde iki veya daha fazla kitap katılacak ve bu da kafa karıştırıcı sonuçlara yol açabilecektir.",
"welcome-page-overzealous-filter": "Sonuç yok. <a href=\"{{URL}}\">Filtreyi sıfırlamak</a> ister misiniz?",
"powered-by-kiwix-html": "<a href=\"https://kiwix.org\">Kiwix</a> tarafından desteklenmektedir",
@@ -45,16 +46,16 @@
"count-of-matching-books": "{{COUNT}} kitap",
"download": "İndir",
"direct-download-link-text": "Doğrudan",
"direct-download-alt-text": "direkt indirme",
"hash-download-link-text": "Sha256 haşesi",
"hash-download-alt-text": "csv indir",
"direct-download-alt-text": "Doğrudan HTTP(S) üzerinden indir",
"hash-download-link-text": "SHA-256 sağlama toplamı",
"hash-download-alt-text": "SHA-256 dosya toplam kontrolünü görüntüle",
"magnet-link-text": "Mıknatıs bağlantısı",
"magnet-alt-text": "mıknatısı indir",
"torrent-download-link-text": "Hedef dosya",
"torrent-download-alt-text": "torrenti indir",
"magnet-alt-text": "Mıknatıs bağlantısıyla indir",
"torrent-download-link-text": "BitTorrent",
"torrent-download-alt-text": "BitTorrent üzerinden indir",
"library-opds-feed-all-entries": "Kütüphane OPDS Akışı - Tüm girişler",
"filter-by-tag": "\"{{TAG}}\" etiketine göre filtrele",
"stop-filtering-by-tag": "\"{{TAG}}\" etiketine göre filtrelemeyi durdur",
"filter-by-tag": "\"{{{TAG}}}\" etiketine göre filtrele",
"stop-filtering-by-tag": "\"{{{TAG}}}\" etiketine göre filtrelemeyi durdur",
"library-opds-feed-parameterised": "Kütüphane OPDS Özet Akışı - {{#LANG}}\nLanguage: {{LANG}} {{/LANG}}{{#CATEGORY}}\nCategory: {{CATEGORY}} {{/CATEGORY}} ile eşleşen girişler {{#TAG}}\nTag: {{TAG}} {{/TAG}}{{#Q}}\nQuery: {{Q}} {{/Q}}",
"welcome-to-kiwix-server": "Kiwix Sunucusuna Hoş Geldiniz",
"download-links-heading": "<b><i>{{BOOK_TITLE}}</i></b> için indirme bağlantıları",

View File

@@ -1,6 +1,7 @@
{
"@metadata": {
"authors": [
"Cyanjiang",
"GuoPC",
"IceButBin",
"Kichin",
@@ -29,9 +30,25 @@
"400-page-heading": "无效请求",
"404-page-title": "未找到内容",
"404-page-heading": "未找到",
"new-404-page-title": "找不到页面",
"new-404-page-heading": "哎呀。页面未找到。",
"404-img-text": "未找到!",
"path-was-not-found": "未找到请求的路径:",
"404-advice.p1": "您正在寻找的内容可能仍然可用,但它可能位于 ZIM 文件中的不同位置。",
"404-advice.p2": "请:",
"404-advice.p3": "尝试使用搜索功能来查找您想要的内容",
"404-advice.p4": "查找与你正在寻找的信息相关的关键字或标题",
"404-advice.p5": "即使原始链接无法正常工作,这种方法也应该可以帮助您找到所需的内容。",
"500-page-title": "内部服务器错误",
"500-page-heading": "内部服务器错误",
"500-page-text": "内部服务器出现错误。真的十分抱歉 (;⁠ŏ⁠﹏⁠ŏ~)",
"500-page-heading": "哎呀。页面无法正常工作。",
"500-page-text": "请求的路径无法正确传递:",
"500-img-text": "页面无法正常工作",
"external-link-detected": "检测到外部链接",
"caution-warning": "警告!",
"external-link-intro": "你即将离开Kiwix的ZIM阅读器并打开网页",
"external-link-advice.p1": "你试图访问的链接不在你的离线包中,需要联网才能访问。",
"external-link-advice.p2": "如果您可以上网,您可以尝试打开该链接。",
"external-link-advice.p3": "您也可以使用浏览器的后退按钮返回 ZIM 的离线内容。",
"fulltext-search-unavailable": "全文搜索不可用",
"no-search-results": "全文搜索引擎不适用于该内容。",
"search-results-page-title": "搜索:{{SEARCH_PATTERN}}",

View File

@@ -117,7 +117,7 @@
{{title}}
</a>
{{#snippet}}
<cite>{{>snippet}}...</cite>
<cite>{{{snippet}}}...</cite>
{{/snippet}}
{{#bookInfo}}
<div class="book-title">{{{bookInfo}}}</div>

View File

@@ -21,7 +21,7 @@
<title>{{title}}</title>
<link>{{absolutePath}}</link>
{{#snippet}}
<description>{{>snippet}}...</description>
<description>{{{snippet}}}...</description>
{{/snippet}}
{{#bookTitle}}
<book>

View File

@@ -78,21 +78,7 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
EXPECT_SPELLING_CORRECTION("beissen", 1, ({"beißen"}));
EXPECT_SPELLING_CORRECTION("Camera", 1, ({"Kamera"}));
EXPECT_SPELLING_CORRECTION("Kaos", 1, ({"Chaos"}));
// The spelling correction "Lax -> Lachs" is affected by commit
// https://github.com/xapian/xapian/commit/0cbe35de5c392623388946e6769aa03f912fdde4
// which caps the edit distance at (length(query_word) - 1). As a result, the
// max edit distance parameter that we pass into get_spelling_suggestion() is
// reduced from 3 to 2 and is below the edit distance of "Lachs" from "Lax".
const auto xapianVersion = std::make_tuple(Xapian::major_version(),
Xapian::minor_version(),
Xapian::revision());
if ( xapianVersion < std::make_tuple(1, 4, 19) ) {
EXPECT_SPELLING_CORRECTION("Lax", 1, ({"Lachs"}));
} else {
EXPECT_SPELLING_CORRECTION("Lax", 1, ({}));
}
EXPECT_SPELLING_CORRECTION("Lax", 1, ({"Lachs"}));
EXPECT_SPELLING_CORRECTION("Mont", 1, ({"Mond"}));
EXPECT_SPELLING_CORRECTION("Umweltstandart", 1, ({"Umweltstandard"}));
EXPECT_SPELLING_CORRECTION("seid", 1, ({"seit"}));
@@ -144,24 +130,26 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
// Exact match is not considered a spelling correction
EXPECT_SPELLING_CORRECTION("Führerschein", 1, ({}));
// Max edit distance is 3
// Max edit distance can be quite large
EXPECT_SPELLING_CORRECTION( "Führersch", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Führersc", 1, ({}));
// Case matters in edit distance
EXPECT_SPELLING_CORRECTION("führersch", 1, ({}));
EXPECT_SPELLING_CORRECTION("Führersc", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Führ", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("h", 1, ({}));
// Case doesn't matter in edit distance
EXPECT_SPELLING_CORRECTION("führ", 1, ({"Führerschein"}));
// Diacritics matters in edit distance
EXPECT_SPELLING_CORRECTION("Fuhrersch", 1, ({}));
// Mismatch in diacritics counts as 1 in edit distance (this is not trivial,
// because from the UTF-8 perspective it is a one-byte vs two-byte encoding
// of a Unicode codepoint).
EXPECT_SPELLING_CORRECTION("Führersche", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Fuhr", 1, ({}));
EXPECT_SPELLING_CORRECTION("Führershine", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Führershyne", 1, ({}));
EXPECT_SPELLING_CORRECTION("führershine", 1, ({}));
EXPECT_SPELLING_CORRECTION("Führershyne", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("führershine", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Führerschrom", 1, ({"Führerschein"}));
EXPECT_SPELLING_CORRECTION("Führerscdrom", 1, ({}));
EXPECT_SPELLING_CORRECTION("Führerscdrom", 1, ({"Führerschein"}));
// More than one spelling correction can be requested
EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
EXPECT_SPELLING_CORRECTION("Kung", 3, ({"King", "Kong"}));
//////////////////////////////////////////////////////////////////////////////
// Shortcomings of the proof-of-concept implementation
@@ -171,10 +159,6 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
EXPECT_SPELLING_CORRECTION("Laurem", 1, ({}));
EXPECT_SPELLING_CORRECTION("ibsum", 1, ({}));
EXPECT_SPELLING_CORRECTION("Loremipsum", 1, ({"Lorem ipsum"}));
// Only one spelling correction can be requested
// EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
EXPECT_THROW(spellingsDB.getSpellingCorrections("Kung", 2), std::runtime_error);
}
using StrCollection = std::vector<std::string>;