mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-12-24 06:57:59 -05:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42a2ce2534 | ||
|
|
3945dda5d0 | ||
|
|
d65dd859da | ||
|
|
d94d2c1e8a | ||
|
|
a20b135f80 | ||
|
|
6d520a8aa7 | ||
|
|
f82bfc068f | ||
|
|
e6335be897 | ||
|
|
1074e833b7 | ||
|
|
9da5fbad1e | ||
|
|
1869fb4e8e | ||
|
|
536198fa38 | ||
|
|
ca808718f7 | ||
|
|
b65074f961 | ||
|
|
8b7d1ef9ec | ||
|
|
8b0f01fa9b |
@@ -1,3 +1,11 @@
|
||||
libkiwix 14.1.1
|
||||
===============
|
||||
|
||||
* Server:
|
||||
- Fix regression for kiwix-serve --nosearchbar (@veloman-yunkan #1250)
|
||||
- Avoid results content interpretation... crash in fulltext search (@vighnesh-sawant #1241)
|
||||
- Fix for intermittent /content/blank.html errors (@veloman-yunkan #1249)
|
||||
|
||||
libkiwix 14.1.0
|
||||
===============
|
||||
|
||||
|
||||
@@ -155,6 +155,15 @@ class Manager
|
||||
const std::string& url = "",
|
||||
const bool checkMetaData = false);
|
||||
|
||||
/**
|
||||
* Add all books from the directory tree into the library.
|
||||
*
|
||||
* @param path The path of the directory to scan.
|
||||
* @param verboseFlag Verbose logs flag.
|
||||
*/
|
||||
void addBooksFromDirectory(const std::string& path,
|
||||
const bool verboseFlag = false);
|
||||
|
||||
std::string writableLibraryPath;
|
||||
|
||||
bool m_hasSearchResult = false;
|
||||
|
||||
@@ -35,14 +35,6 @@ namespace Xapian
|
||||
class Database;
|
||||
}
|
||||
|
||||
namespace nuspell
|
||||
{
|
||||
inline namespace v5
|
||||
{
|
||||
class Dictionary;
|
||||
}
|
||||
}
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
@@ -59,7 +51,6 @@ public: // functions
|
||||
|
||||
private: // data
|
||||
std::unique_ptr<Xapian::Database> impl_;
|
||||
std::unique_ptr<nuspell::Dictionary> nuspell_;
|
||||
};
|
||||
|
||||
} // namespace kiwix
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
project('libkiwix', 'cpp',
|
||||
version : '14.1.0',
|
||||
version : '14.1.1',
|
||||
license : 'GPLv3+',
|
||||
default_options : ['c_std=c11', 'cpp_std=c++17', 'werror=true'])
|
||||
|
||||
@@ -61,7 +61,6 @@ libcurl_dep = dependency('libcurl', static:static_deps)
|
||||
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)
|
||||
zlib_dep = dependency('zlib', static:static_deps)
|
||||
xapian_dep = dependency('xapian-core', static:static_deps)
|
||||
libnuspell_dep = dependency('libnuspell', static:static_deps)
|
||||
|
||||
if compiler.has_header('mustache.hpp')
|
||||
extra_include = []
|
||||
@@ -95,7 +94,7 @@ endif
|
||||
|
||||
|
||||
# Dependencies as string
|
||||
all_deps = [thread_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep, libnuspell_dep]
|
||||
all_deps = [thread_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
|
||||
|
||||
# Dependencies as array
|
||||
all_deps += libicu_deps
|
||||
|
||||
@@ -23,6 +23,14 @@
|
||||
#include "tools/pathTools.h"
|
||||
|
||||
#include <pugixml.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <queue>
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
@@ -251,6 +259,58 @@ bool Manager::addBookFromPath(const std::string& pathToOpen,
|
||||
.empty());
|
||||
}
|
||||
|
||||
void Manager::addBooksFromDirectory(const std::string& path,
|
||||
const bool verboseFlag)
|
||||
{
|
||||
std::set<std::string> iteratedDirs;
|
||||
std::queue<std::string> dirQueue;
|
||||
dirQueue.push(fs::absolute(path).u8string());
|
||||
int totalBooksAdded = 0;
|
||||
if (verboseFlag)
|
||||
std::cout << "Adding books from the directory tree: " << dirQueue.front() << std::endl;
|
||||
|
||||
while (!dirQueue.empty()) {
|
||||
const auto currentPath = dirQueue.front();
|
||||
dirQueue.pop();
|
||||
if (verboseFlag)
|
||||
std::cout << "Visiting directory: " << currentPath << std::endl;
|
||||
for (const auto& dirEntry : fs::directory_iterator(currentPath)) {
|
||||
auto resolvedPath = dirEntry.path();
|
||||
if (fs::is_symlink(dirEntry)) {
|
||||
try {
|
||||
resolvedPath = fs::canonical(dirEntry.path());
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Could not resolve symlink " << resolvedPath.u8string() << " to a valid path. Skipping..." << std::endl;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const std::string pathString = resolvedPath.u8string();
|
||||
std::string resolvedPathExtension = resolvedPath.extension();
|
||||
std::transform(resolvedPathExtension.begin(), resolvedPathExtension.end(), resolvedPathExtension.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); });
|
||||
if (fs::is_directory(resolvedPath)) {
|
||||
if (iteratedDirs.find(pathString) == iteratedDirs.end())
|
||||
dirQueue.push(pathString);
|
||||
else if (verboseFlag)
|
||||
std::cout << "Already iterated over " << pathString << ". Skipping..." << std::endl;
|
||||
} else if (resolvedPathExtension == ".zim" || resolvedPathExtension == ".zimaa") {
|
||||
if (!this->addBookFromPath(pathString, pathString, "", false)) {
|
||||
std::cerr << "Could not add " << pathString << " into the library." << std::endl;
|
||||
} else if (verboseFlag) {
|
||||
std::cout << "Added " << pathString << " into the library." << std::endl;
|
||||
totalBooksAdded++;
|
||||
}
|
||||
} else if (verboseFlag) {
|
||||
std::cout << "Skipped " << pathString << " - unsupported file type or permission denied." << std::endl;
|
||||
}
|
||||
}
|
||||
iteratedDirs.insert(currentPath);
|
||||
}
|
||||
|
||||
if (verboseFlag)
|
||||
std::cout << "Traversal completed. Total books added: " << totalBooksAdded << std::endl;
|
||||
}
|
||||
|
||||
bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
|
||||
{
|
||||
std::string tmp_path = path;
|
||||
|
||||
@@ -20,12 +20,10 @@
|
||||
#include "spelling_correction.h"
|
||||
#include "zim/archive.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <xapian.h>
|
||||
#include <nuspell/dictionary.hxx>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
@@ -82,39 +80,10 @@ std::unique_ptr<Xapian::Database> openOrCreateXapianDB(std::filesystem::path cac
|
||||
}
|
||||
}
|
||||
|
||||
const char nuspellAffFileData[] = R"(
|
||||
SET UTF-8
|
||||
TRY qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM
|
||||
)";
|
||||
|
||||
std::unique_ptr<std::istream> getAffDataStream()
|
||||
{
|
||||
const char* const userAffFilePath = ::getenv("KIWIX_NUSPELL_AFF_FILE_PATH");
|
||||
if ( userAffFilePath ) {
|
||||
return std::make_unique<std::ifstream>(userAffFilePath);
|
||||
}
|
||||
|
||||
return std::make_unique<std::istringstream>(nuspellAffFileData);
|
||||
}
|
||||
|
||||
std::unique_ptr<nuspell::Dictionary> createNuspellDictionary(const zim::Archive& archive)
|
||||
{
|
||||
auto d = std::make_unique<nuspell::Dictionary>();
|
||||
const auto& allTitles = getAllTitles(archive);
|
||||
std::stringstream dicSS;
|
||||
dicSS << allTitles.size() << "\n";
|
||||
for ( const auto& t : allTitles ) {
|
||||
dicSS << t << "\n";
|
||||
}
|
||||
d->load_aff_dic(*getAffDataStream(), dicSS);
|
||||
return d;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
SpellingsDB::SpellingsDB(const zim::Archive& archive, std::filesystem::path cacheDirPath)
|
||||
: impl_(openOrCreateXapianDB(cacheDirPath, archive))
|
||||
, nuspell_(createNuspellDictionary(archive))
|
||||
{
|
||||
}
|
||||
|
||||
@@ -124,13 +93,14 @@ SpellingsDB::~SpellingsDB()
|
||||
|
||||
std::vector<std::string> SpellingsDB::getSpellingCorrections(const std::string& word, uint32_t maxCount) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
nuspell_->suggest(word, result);
|
||||
if ( result.size() > maxCount ) {
|
||||
result.resize(maxCount);
|
||||
if ( maxCount > 1 ) {
|
||||
throw std::runtime_error("More than one spelling correction was requested");
|
||||
}
|
||||
if ( result.size() == 1 && result[0] == word ) {
|
||||
result.clear();
|
||||
|
||||
std::vector<std::string> result;
|
||||
const auto term = impl_->get_spelling_suggestion(word, 3);
|
||||
if ( !term.empty() ) {
|
||||
result.push_back(term);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
{
|
||||
"@metadata": {
|
||||
"authors": [
|
||||
"Jimkats",
|
||||
"Kelson",
|
||||
"Norhorn",
|
||||
"Ανώνυμος Βικιπαιδιστής"
|
||||
]
|
||||
},
|
||||
"name": "Αγγλικά",
|
||||
"suggest-full-text-search": "περιέχει '{{{SEARCH_TERMS}}}'...",
|
||||
"no-such-book": "Δεν υπάρχει τέτοιο βιβλίο: {{BOOK_NAME}}",
|
||||
"caution-warning": "Προσοχή!",
|
||||
"search-result-book-info": "από {{BOOK_TITLE}}",
|
||||
"word-count": "{{COUNT}} λέξεις",
|
||||
"welcome-page-overzealous-filter": "Κανένα αποτέλεσμα. Θέλετε να <a href=\"{{URL}}\">επαναφέρετε το φίλτρο</a>;",
|
||||
"powered-by-kiwix-html": "Με την υποστήριξη by <a href=\"https://kiwix.org\">Kiwix</a>",
|
||||
"search": "Αναζήτηση",
|
||||
@@ -19,10 +24,10 @@
|
||||
"direct-download-alt-text": "άμεση λήψη",
|
||||
"hash-download-alt-text": "λήψη αναγνωριστικού",
|
||||
"magnet-alt-text": "λήψη μαγνήτη",
|
||||
"torrent-download-link-text": "Αρχείο torrent",
|
||||
"torrent-download-alt-text": "λήψη torrent",
|
||||
"filter-by-tag": "Φίλτρο ανά ετικέτα \"{{TAG}}\"",
|
||||
"stop-filtering-by-tag": "Διακοπή φίλτρου ανά ετικέτα \"{{TAG}}\"",
|
||||
"torrent-download-link-text": "BitTorrent",
|
||||
"torrent-download-alt-text": "Λήψη μέσω BitTorrent",
|
||||
"filter-by-tag": "Φιλτράρισμα κατά ετικέτα \"{{{TAG}}}\"",
|
||||
"stop-filtering-by-tag": "Διακοπή φιλτραρίσματος κατά ετικέτα \"{{{TAG}}}\"",
|
||||
"welcome-to-kiwix-server": "Καλώς ορίσατε στον διακομιστή Kiwix",
|
||||
"download-links-heading": "Λήψη συνδέσμων για <b><i>{{BOOK_TITLE}}</i></b>",
|
||||
"download-links-title": "Κατεβάστε το βιβλίο",
|
||||
|
||||
@@ -271,10 +271,12 @@ function translateErrorPageIfNeeded() {
|
||||
let iframeLocationHref = null;
|
||||
|
||||
function handle_content_url_change() {
|
||||
if ( iframeLocationHref == contentIframe.contentWindow.location.href )
|
||||
const iframeLocation = contentIframe.contentWindow.location;
|
||||
|
||||
if ( iframeLocationHref == iframeLocation.href ||
|
||||
!iframeLocation.pathname.startsWith(root + '/content/') )
|
||||
return;
|
||||
|
||||
const iframeLocation = contentIframe.contentWindow.location;
|
||||
iframeLocationHref = iframeLocation.href;
|
||||
console.log('handle_content_url_change: ' + iframeLocation.href);
|
||||
document.title = contentIframe.contentDocument.title;
|
||||
@@ -431,8 +433,6 @@ function setup_chaperon_mode() {
|
||||
}
|
||||
}
|
||||
|
||||
let viewerSetupComplete = false;
|
||||
|
||||
function on_content_load() {
|
||||
const loader = document.getElementById("kiwix__loader");
|
||||
|
||||
@@ -588,6 +588,7 @@ function setupViewer() {
|
||||
|
||||
const kiwixToolBarWrapper = document.getElementById('kiwixtoolbarwrapper');
|
||||
if ( ! viewerSettings.toolbarEnabled ) {
|
||||
finishViewerSetup();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -636,10 +637,13 @@ function updateUIText() {
|
||||
function finishViewerSetupOnceTranslationsAreLoaded()
|
||||
{
|
||||
updateUIText();
|
||||
finishViewerSetup();
|
||||
}
|
||||
|
||||
function finishViewerSetup()
|
||||
{
|
||||
handle_location_hash_change();
|
||||
|
||||
window.onhashchange = handle_location_hash_change;
|
||||
window.onpopstate = handle_history_state_change;
|
||||
|
||||
viewerSetupComplete = true;
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ const ResourceCollection resources200Compressible{
|
||||
{ DYNAMIC_CONTENT, "/ROOT%23%3F/skin/taskbar.css" },
|
||||
{ STATIC_CONTENT, "/ROOT%23%3F/skin/taskbar.css?cacheid=42e90cb9" },
|
||||
{ DYNAMIC_CONTENT, "/ROOT%23%3F/skin/viewer.js" },
|
||||
{ STATIC_CONTENT, "/ROOT%23%3F/skin/viewer.js?cacheid=3208c3ed" },
|
||||
{ STATIC_CONTENT, "/ROOT%23%3F/skin/viewer.js?cacheid=00e0fdf3" },
|
||||
{ DYNAMIC_CONTENT, "/ROOT%23%3F/skin/fonts/Poppins.ttf" },
|
||||
{ STATIC_CONTENT, "/ROOT%23%3F/skin/fonts/Poppins.ttf?cacheid=af705837" },
|
||||
{ DYNAMIC_CONTENT, "/ROOT%23%3F/skin/fonts/Roboto.ttf" },
|
||||
@@ -338,7 +338,7 @@ R"EXPECTEDRESULT( <link type="text/css" href="./skin/kiwix.css?cacheid=b4e29e
|
||||
<script type="text/javascript" src="./skin/polyfills.js?cacheid=a0e0343d"></script>
|
||||
<script type="module" src="./skin/i18n.js?cacheid=e9a10ac1" defer></script>
|
||||
<script type="text/javascript" src="./skin/languages.js?cacheid=08955948" defer></script>
|
||||
<script type="text/javascript" src="./skin/viewer.js?cacheid=3208c3ed" defer></script>
|
||||
<script type="text/javascript" src="./skin/viewer.js?cacheid=00e0fdf3" defer></script>
|
||||
<script type="text/javascript" src="./skin/autoComplete/autoComplete.min.js?cacheid=1191aaaf"></script>
|
||||
const blankPageUrl = root + "/skin/blank.html?cacheid=6b1fa032";
|
||||
<label for="kiwix_button_show_toggle"><img src="./skin/caret.png?cacheid=22b942b4" alt=""></label>
|
||||
|
||||
@@ -78,7 +78,21 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
|
||||
EXPECT_SPELLING_CORRECTION("beissen", 1, ({"beißen"}));
|
||||
EXPECT_SPELLING_CORRECTION("Camera", 1, ({"Kamera"}));
|
||||
EXPECT_SPELLING_CORRECTION("Kaos", 1, ({"Chaos"}));
|
||||
EXPECT_SPELLING_CORRECTION("Lax", 1, ({"Lachs"}));
|
||||
|
||||
// The spelling correction "Lax -> Lachs" is affected by commit
|
||||
// https://github.com/xapian/xapian/commit/0cbe35de5c392623388946e6769aa03f912fdde4
|
||||
// which caps the edit distance at (length(query_word) - 1). As a result, the
|
||||
// max edit distance parameter that we pass into get_spelling_suggestion() is
|
||||
// reduced from 3 to 2 and is below the edit distance of "Lachs" from "Lax".
|
||||
const auto xapianVersion = std::make_tuple(Xapian::major_version(),
|
||||
Xapian::minor_version(),
|
||||
Xapian::revision());
|
||||
if ( xapianVersion < std::make_tuple(1, 4, 19) ) {
|
||||
EXPECT_SPELLING_CORRECTION("Lax", 1, ({"Lachs"}));
|
||||
} else {
|
||||
EXPECT_SPELLING_CORRECTION("Lax", 1, ({}));
|
||||
}
|
||||
|
||||
EXPECT_SPELLING_CORRECTION("Mont", 1, ({"Mond"}));
|
||||
EXPECT_SPELLING_CORRECTION("Umweltstandart", 1, ({"Umweltstandard"}));
|
||||
EXPECT_SPELLING_CORRECTION("seid", 1, ({"seit"}));
|
||||
@@ -130,26 +144,24 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
|
||||
// Exact match is not considered a spelling correction
|
||||
EXPECT_SPELLING_CORRECTION("Führerschein", 1, ({}));
|
||||
|
||||
// Max edit distance can be quite large
|
||||
// Max edit distance is 3
|
||||
EXPECT_SPELLING_CORRECTION( "Führersch", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führersc", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führ", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Füh", 1, ({}));
|
||||
// Case doesn't matter in edit distance
|
||||
EXPECT_SPELLING_CORRECTION("führ", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führersc", 1, ({}));
|
||||
// Case matters in edit distance
|
||||
EXPECT_SPELLING_CORRECTION("führersch", 1, ({}));
|
||||
// Diacritics matters in edit distance
|
||||
EXPECT_SPELLING_CORRECTION("Fuhr", 1, ({}));
|
||||
EXPECT_SPELLING_CORRECTION("Fuhrersch", 1, ({}));
|
||||
// Mismatch in diacritics counts as 1 in edit distance (this is not trivial,
|
||||
// because from the UTF-8 perspective it is a one-byte vs two-byte encoding
|
||||
// of a Unicode codepoint).
|
||||
EXPECT_SPELLING_CORRECTION("Führersche", 1, ({"Führerschein"}));
|
||||
|
||||
EXPECT_SPELLING_CORRECTION("Führershine", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führershyne", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("führershine", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führershyne", 1, ({}));
|
||||
EXPECT_SPELLING_CORRECTION("führershine", 1, ({}));
|
||||
|
||||
EXPECT_SPELLING_CORRECTION("Führerschrom", 1, ({"Führerschein"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führerscdrom", 1, ({"Führerschein"}));
|
||||
|
||||
// More than one spelling correction can be requested
|
||||
EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
|
||||
EXPECT_SPELLING_CORRECTION("Kung", 3, ({"King", "Kong"}));
|
||||
EXPECT_SPELLING_CORRECTION("Führerscdrom", 1, ({}));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Shortcomings of the proof-of-concept implementation
|
||||
@@ -159,6 +171,10 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
|
||||
EXPECT_SPELLING_CORRECTION("Laurem", 1, ({}));
|
||||
EXPECT_SPELLING_CORRECTION("ibsum", 1, ({}));
|
||||
EXPECT_SPELLING_CORRECTION("Loremipsum", 1, ({"Lorem ipsum"}));
|
||||
|
||||
// Only one spelling correction can be requested
|
||||
// EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
|
||||
EXPECT_THROW(spellingsDB.getSpellingCorrections("Kung", 2), std::runtime_error);
|
||||
}
|
||||
|
||||
using StrCollection = std::vector<std::string>;
|
||||
|
||||
Reference in New Issue
Block a user