Compare commits

..

6 Commits

Author SHA1 Message Date
Nikhil Tanwar
42a2ce2534 fixup! fixup! fixup! fixup! Introduce Manager::addBooksFromDirectory() 2025-11-30 20:05:44 +01:00
Nikhil Tanwar
3945dda5d0 fixup! fixup! fixup! Introduce Manager::addBooksFromDirectory() 2025-11-30 20:05:44 +01:00
Nikhil Tanwar
d65dd859da fixup! fixup! Introduce Manager::addBooksFromDirectory() 2025-11-30 20:05:44 +01:00
Nikhil Tanwar
d94d2c1e8a Remove skipInvalid flag
skipInvalid=false makes little sense, if an invalid book is found, we can simply choose to ignore it rather stopping the whole operation midway.
2025-11-30 20:05:44 +01:00
Nikhil Tanwar
a20b135f80 fixup! Introduce Manager::addBooksFromDirectory() 2025-11-30 20:05:44 +01:00
Nikhil Tanwar
6d520a8aa7 Introduce Manager::addBooksFromDirectory()
Added a function to load books from a directory. Requires rootPath to iterate over.
2025-11-30 20:05:44 +01:00
5 changed files with 95 additions and 32 deletions

View File

@@ -155,6 +155,15 @@ class Manager
const std::string& url = "",
const bool checkMetaData = false);
/**
* Add all books from the directory tree into the library.
*
* @param path The path of the directory to scan.
* @param verboseFlag Verbose logs flag.
*/
void addBooksFromDirectory(const std::string& path,
const bool verboseFlag = false);
std::string writableLibraryPath;
bool m_hasSearchResult = false;

View File

@@ -32,7 +32,7 @@ class Archive;
namespace Xapian
{
class WritableDatabase;
class Database;
}
namespace kiwix
@@ -50,7 +50,7 @@ public: // functions
std::vector<std::string> getSpellingCorrections(const std::string& word, uint32_t maxCount) const;
private: // data
std::unique_ptr<Xapian::WritableDatabase> impl_;
std::unique_ptr<Xapian::Database> impl_;
};
} // namespace kiwix

View File

@@ -23,6 +23,14 @@
#include "tools/pathTools.h"
#include <pugixml.hpp>
#include <filesystem>
#include <iostream>
#include <set>
#include <queue>
#include <cctype>
#include <algorithm>
namespace fs = std::filesystem;
namespace kiwix
{
@@ -251,6 +259,58 @@ bool Manager::addBookFromPath(const std::string& pathToOpen,
.empty());
}
void Manager::addBooksFromDirectory(const std::string& path,
const bool verboseFlag)
{
std::set<std::string> iteratedDirs;
std::queue<std::string> dirQueue;
dirQueue.push(fs::absolute(path).u8string());
int totalBooksAdded = 0;
if (verboseFlag)
std::cout << "Adding books from the directory tree: " << dirQueue.front() << std::endl;
while (!dirQueue.empty()) {
const auto currentPath = dirQueue.front();
dirQueue.pop();
if (verboseFlag)
std::cout << "Visiting directory: " << currentPath << std::endl;
for (const auto& dirEntry : fs::directory_iterator(currentPath)) {
auto resolvedPath = dirEntry.path();
if (fs::is_symlink(dirEntry)) {
try {
resolvedPath = fs::canonical(dirEntry.path());
} catch (const std::exception& e) {
std::cerr << "Could not resolve symlink " << resolvedPath.u8string() << " to a valid path. Skipping..." << std::endl;
continue;
}
}
const std::string pathString = resolvedPath.u8string();
std::string resolvedPathExtension = resolvedPath.extension();
std::transform(resolvedPathExtension.begin(), resolvedPathExtension.end(), resolvedPathExtension.begin(),
[](unsigned char c){ return std::tolower(c); });
if (fs::is_directory(resolvedPath)) {
if (iteratedDirs.find(pathString) == iteratedDirs.end())
dirQueue.push(pathString);
else if (verboseFlag)
std::cout << "Already iterated over " << pathString << ". Skipping..." << std::endl;
} else if (resolvedPathExtension == ".zim" || resolvedPathExtension == ".zimaa") {
if (!this->addBookFromPath(pathString, pathString, "", false)) {
std::cerr << "Could not add " << pathString << " into the library." << std::endl;
} else if (verboseFlag) {
std::cout << "Added " << pathString << " into the library." << std::endl;
totalBooksAdded++;
}
} else if (verboseFlag) {
std::cout << "Skipped " << pathString << " - unsupported file type or permission denied." << std::endl;
}
}
iteratedDirs.insert(currentPath);
}
if (verboseFlag)
std::cout << "Traversal completed. Total books added: " << totalBooksAdded << std::endl;
}
bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
{
std::string tmp_path = path;

View File

@@ -43,11 +43,15 @@ std::vector<std::string> getAllTitles(const zim::Archive& a)
void createXapianDB(std::string path, const zim::Archive& archive)
{
const int flags = Xapian::DB_BACKEND_GLASS|Xapian::DB_CREATE;
Xapian::WritableDatabase db(path, flags);
const auto tmpDbPath = path + ".tmp";
Xapian::WritableDatabase db(tmpDbPath, flags);
for (const auto& t : getAllTitles(archive)) {
db.add_spelling(t);
}
db.commit();
db.compact(path, Xapian::DBCOMPACT_SINGLE_FILE);
db.close();
std::filesystem::remove_all(tmpDbPath);
}
std::string spellingsDBPathForZIMArchive(std::filesystem::path cacheDirPath, const zim::Archive& a)
@@ -55,27 +59,24 @@ std::string spellingsDBPathForZIMArchive(std::filesystem::path cacheDirPath, con
// The version of spellings DB must be updated each time an important change
// to the implementation is made that renders using the previous version
// impossible or undesirable.
const char SPELLINGS_DB_VERSION[] = "0.2";
const char SPELLINGS_DB_VERSION[] = "0.1";
std::ostringstream filename;
filename << a.getUuid() << ".spellingsdb.v" << SPELLINGS_DB_VERSION;
return (cacheDirPath / filename.str()).string();
}
std::unique_ptr<Xapian::WritableDatabase> openOrCreateXapianDB(std::filesystem::path cacheDirPath, const zim::Archive& archive)
std::unique_ptr<Xapian::Database> openOrCreateXapianDB(std::filesystem::path cacheDirPath, const zim::Archive& archive)
{
const auto path = spellingsDBPathForZIMArchive(cacheDirPath, archive);
try
{
{
Xapian::Database checkIfDbAlreadyExists(path);
}
return std::make_unique<Xapian::WritableDatabase>(path);
return std::make_unique<Xapian::Database>(path);
}
catch (const Xapian::DatabaseOpeningError& )
{
createXapianDB(path, archive);
return std::make_unique<Xapian::WritableDatabase>(path);
return std::make_unique<Xapian::Database>(path);
}
}
@@ -92,23 +93,15 @@ SpellingsDB::~SpellingsDB()
std::vector<std::string> SpellingsDB::getSpellingCorrections(const std::string& word, uint32_t maxCount) const
{
if ( maxCount > 1 ) {
throw std::runtime_error("More than one spelling correction was requested");
}
std::vector<std::string> result;
while ( result.size() < maxCount ) {
const auto term = impl_->get_spelling_suggestion(word, 3);
if ( term.empty() )
break;
const auto term = impl_->get_spelling_suggestion(word, 3);
if ( !term.empty() ) {
result.push_back(term);
// temporarily remove this term so that another spellings could be obtained
impl_->remove_spelling(term);
}
// restore temporarily removed terms
for (const auto& t : result) {
impl_->add_spelling(t);
}
return result;
}

View File

@@ -173,7 +173,8 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
EXPECT_SPELLING_CORRECTION("Loremipsum", 1, ({"Lorem ipsum"}));
// Only one spelling correction can be requested
EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
// EXPECT_SPELLING_CORRECTION("Kung", 2, ({"King", "Kong"}));
EXPECT_THROW(spellingsDB.getSpellingCorrections("Kung", 2), std::runtime_error);
}
using StrCollection = std::vector<std::string>;
@@ -189,21 +190,21 @@ StrCollection directoryEntries(std::filesystem::path dirPath)
TEST_F(SpellingCorrectionTest, allInOne)
{
//const auto tmpDirModTime0 = std::filesystem::last_write_time(tmpDirPath);
const auto tmpDirModTime0 = std::filesystem::last_write_time(tmpDirPath);
ASSERT_TRUE(directoryEntries(tmpDirPath).empty());
{
const kiwix::SpellingsDB spellingsDB(*archive, tmpDirPath);
testSpellingCorrections(spellingsDB);
}
//const auto tmpDirModTime1 = std::filesystem::last_write_time(tmpDirPath);
const auto tmpDirModTime1 = std::filesystem::last_write_time(tmpDirPath);
const auto spellingsDbPath = tmpDirPath / "554c9707-897e-097a-53ba-1b1306d8bb88.spellingsdb.v0.2";
const auto spellingsDbPath = tmpDirPath / "554c9707-897e-097a-53ba-1b1306d8bb88.spellingsdb.v0.1";
const StrCollection EXPECTED_DIR_CONTENT{ spellingsDbPath.string() };
ASSERT_EQ(directoryEntries(tmpDirPath), EXPECTED_DIR_CONTENT);
//ASSERT_LT(tmpDirModTime0, tmpDirModTime1);
//const auto fileModTime = std::filesystem::last_write_time(spellingsDbPath);
ASSERT_LT(tmpDirModTime0, tmpDirModTime1);
const auto fileModTime = std::filesystem::last_write_time(spellingsDbPath);
{
const kiwix::SpellingsDB spellingsDB(*archive, tmpDirPath);
@@ -211,6 +212,6 @@ TEST_F(SpellingCorrectionTest, allInOne)
}
ASSERT_EQ(directoryEntries(tmpDirPath), EXPECTED_DIR_CONTENT );
//ASSERT_EQ(tmpDirModTime1, std::filesystem::last_write_time(tmpDirPath));
//ASSERT_EQ(fileModTime, std::filesystem::last_write_time(spellingsDbPath));
ASSERT_EQ(tmpDirModTime1, std::filesystem::last_write_time(tmpDirPath));
ASSERT_EQ(fileModTime, std::filesystem::last_write_time(spellingsDbPath));
}