SpellingsDB reuses an existing database

Also the underlying Xapian database is now in a single-file format.
This commit is contained in:
Veloman Yunkan
2025-10-04 18:17:17 +04:00
parent e0491adc85
commit 39672f0532
3 changed files with 74 additions and 17 deletions

View File

@@ -20,6 +20,7 @@
#ifndef KIWIX_SPELLING_CORRECTION_H
#define KIWIX_SPELLING_CORRECTION_H
#include <filesystem>
#include <memory>
#include <string>
#include <vector>
@@ -40,7 +41,7 @@ namespace kiwix
class SpellingsDB
{
public: // functions
explicit SpellingsDB(const zim::Archive& archive, std::string path);
SpellingsDB(const zim::Archive& archive, std::filesystem::path path);
~SpellingsDB();
SpellingsDB(const SpellingsDB& ) = delete;

View File

@@ -39,19 +39,37 @@ std::vector<std::string> getAllTitles(const zim::Archive& a)
return result;
}
void createXapianDB(std::string path, const zim::Archive& archive)
{
const int flags = Xapian::DB_BACKEND_GLASS|Xapian::DB_CREATE;
const auto tmpDbPath = path + ".tmp";
Xapian::WritableDatabase db(tmpDbPath, flags);
for (const auto& t : getAllTitles(archive)) {
db.add_spelling(t);
}
db.commit();
db.compact(path, Xapian::DBCOMPACT_SINGLE_FILE);
db.close();
std::filesystem::remove_all(tmpDbPath);
}
std::unique_ptr<Xapian::Database> openOrCreateXapianDB(std::string path, const zim::Archive& archive)
{
auto db(std::make_unique<Xapian::WritableDatabase>(path, Xapian::DB_BACKEND_GLASS));
for (const auto& t : getAllTitles(archive)) {
db->add_spelling(t);
try
{
return std::make_unique<Xapian::Database>(path);
}
catch (const Xapian::DatabaseOpeningError& )
{
createXapianDB(path, archive);
return std::make_unique<Xapian::Database>(path);
}
return std::move(db);
}
} // unnamed namespace
SpellingsDB::SpellingsDB(const zim::Archive& archive, std::string path)
: impl_(openOrCreateXapianDB(path, archive))
SpellingsDB::SpellingsDB(const zim::Archive& archive, std::filesystem::path path)
: impl_(openOrCreateXapianDB(path.string(), archive))
{
}

View File

@@ -19,27 +19,45 @@
#include "gtest/gtest.h"
#include "../include/spelling_correction.h"
#include "../src/tools/pathTools.h"
#include "zim/archive.h"
#include <filesystem>
#include <xapian.h>
const std::string TEST_DB_PATH = "./spellings.db";
class SpellingCorrectionTest : public ::testing::Test
{
void removeDb()
{
std::filesystem::remove_all(TEST_DB_PATH);
}
protected:
void SetUp() override {
removeDb();
tmpDirPath = makeTmpDirectory();
archive = std::make_unique<zim::Archive>("./test/spelling_correction_test.zim");
}
void TearDown() override {
removeDb();
std::filesystem::permissions(
tmpDirPath,
std::filesystem::perms::owner_write,
std::filesystem::perm_options::add
);
std::filesystem::remove_all(tmpDirPath);
}
void makeTmpDirReadOnly() {
using std::filesystem::perms;
std::filesystem::permissions(
tmpDirPath,
perms::owner_write | perms::group_write | perms::others_write,
std::filesystem::perm_options::remove
);
}
protected:
std::filesystem::path tmpDirPath;
std::unique_ptr<zim::Archive> archive;
};
void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
@@ -169,9 +187,29 @@ void testSpellingCorrections(const kiwix::SpellingsDB& spellingsDB)
EXPECT_THROW(spellingsDB.getSpellingCorrections("Kung", 2), std::runtime_error);
}
TEST_F(SpellingCorrectionTest, SpellingsDBCannotBeCreatedInAReadOnlyDirectory)
{
makeTmpDirReadOnly();
EXPECT_THROW(
const kiwix::SpellingsDB spellingsDB(*archive, tmpDirPath / "spellings.db"),
Xapian::DatabaseCreateError
);
}
TEST_F(SpellingCorrectionTest, allInOne)
{
const auto archive = zim::Archive("./test/spelling_correction_test.zim");
kiwix::SpellingsDB spellingsDB(archive, TEST_DB_PATH);
testSpellingCorrections(spellingsDB);
const auto spellingsDbPath = tmpDirPath / "spellings.db";
{
const kiwix::SpellingsDB spellingsDB(*archive, spellingsDbPath);
testSpellingCorrections(spellingsDB);
}
makeTmpDirReadOnly();
{
const kiwix::SpellingsDB spellingsDB(*archive, spellingsDbPath);
testSpellingCorrections(spellingsDB);
}
}