Compare commits

...

6 Commits

Author SHA1 Message Date
Nikhil Tanwar
2cd057941e Add test for aliasName filtering
Adds test to check if filtering by alias name works.
2022-09-24 20:16:20 +02:00
Nikhil Tanwar
8a3a0b08c2 fixup! Add catalog filtering using ZIM aliasname 2022-09-24 20:16:20 +02:00
Nikhil Tanwar
956c597e80 fixup! Extract parseQuery() 2022-09-24 20:16:20 +02:00
Nikhil Tanwar
bd38ea97f9 Multivalue support for book query
Adds support for putting multiple `book` query parameter.
2022-09-24 20:16:20 +02:00
Nikhil Tanwar
48a0b3bdc7 Add catalog filtering using ZIM aliasname
Adds mechanism to get a ZIM using its alias name.
To make a search, one needs to visit:
`TLD/?book=aliasNameHere`
2022-09-24 20:16:20 +02:00
Nikhil Tanwar
b84eaad748 Extract parseQuery()
Extracts the duplicate code from publisherQuery() and nameQuery() into a new function parseQuery().
2022-09-24 20:16:20 +02:00
6 changed files with 72 additions and 13 deletions

View File

@@ -54,6 +54,7 @@ enum supportedListMode {
class Filter {
public: // types
using Tags = std::vector<std::string>;
using AliasNames = std::vector<std::string>;
private: // data
uint64_t activeFilters;
@@ -67,6 +68,7 @@ class Filter {
std::string _query;
bool _queryIsPartial;
std::string _name;
AliasNames _aliasNames;
public: // functions
Filter();
@@ -112,6 +114,7 @@ class Filter {
Filter& maxSize(size_t size);
Filter& query(std::string query, bool partial=true);
Filter& name(std::string name);
Filter& aliasNames(const AliasNames& aliasNames);
bool hasQuery() const;
const std::string& getQuery() const { return _query; }
@@ -135,6 +138,8 @@ class Filter {
const Tags& getAcceptTags() const { return _acceptTags; }
const Tags& getRejectTags() const { return _rejectTags; }
const AliasNames& getAliasNames() const { return _aliasNames; }
private: // functions
friend class Library;

View File

@@ -54,6 +54,7 @@ class HumanReadableNameMapper : public NameMapper {
virtual ~HumanReadableNameMapper() = default;
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;
static std::string removeDateFromBookId(const std::string& bookId);
};
class UpdatableNameMapper : public NameMapper {

View File

@@ -28,6 +28,7 @@
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/concurrent_cache.h"
#include "name_mapper.h"
#include <pugixml.hpp>
#include <algorithm>
@@ -461,6 +462,9 @@ void Library::updateBookDB(const Book& book)
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");
const auto bookName = book.getHumanReadableIdFromPath();
const auto aliasName = HumanReadableNameMapper::removeDateFromBookId(bookName);
indexer.index_text(normalizeText(aliasName), 1, "XF");
for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) {
doc.add_boolean_term("XT" + tag);
@@ -505,6 +509,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
queryParser.add_prefix("publisher", "XP");
queryParser.add_prefix("creator", "A");
queryParser.add_prefix("tag", "XT");
queryParser.add_prefix("filename", "XF");
const auto partialQueryFlag = filter.queryIsPartial()
? Xapian::QueryParser::FLAG_PARTIAL
: 0;
@@ -521,6 +526,16 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
return queryParser.parse_query(normalizeText(filter.getQuery()), flags);
}
Xapian::Query makePhraseQuery(const std::string& query, const std::string& prefix)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(query), flags, prefix);
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query nameQuery(const std::string& name)
{
return Xapian::Query("XN" + normalizeText(name));
@@ -531,6 +546,18 @@ Xapian::Query categoryQuery(const std::string& category)
return Xapian::Query("XC" + normalizeText(category));
}
Xapian::Query aliasNamesQuery(const Filter::AliasNames& aliasNames)
{
Xapian::Query q = Xapian::Query(std::string());
std::vector<Xapian::Query> queryVec;
for (const auto& aliasName : aliasNames) {
queryVec.push_back(makePhraseQuery(aliasName, "XF"));
}
Xapian::Query combinedQuery(Xapian::Query::OP_OR, queryVec.begin(), queryVec.end());
q = Xapian::Query(Xapian::Query::OP_FILTER, q, combinedQuery);
return q;
}
Xapian::Query langQuery(const std::string& lang)
{
return Xapian::Query("L" + normalizeText(lang));
@@ -538,22 +565,12 @@ Xapian::Query langQuery(const std::string& lang)
Xapian::Query publisherQuery(const std::string& publisher)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
return makePhraseQuery(publisher, "XP");
}
Xapian::Query creatorQuery(const std::string& creator)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(creator), flags, "A");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
return makePhraseQuery(creator, "A");
}
Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags)
@@ -593,6 +610,9 @@ Xapian::Query buildXapianQuery(const Filter& filter)
const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags());
q = Xapian::Query(Xapian::Query::OP_AND, q, tq);;
}
if ( !filter.getAliasNames().empty() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, aliasNamesQuery(filter.getAliasNames()));
}
return q;
}
@@ -742,6 +762,7 @@ enum filterTypes {
QUERY = FLAG(12),
NAME = FLAG(13),
CATEGORY = FLAG(14),
ALIASNAMES = FLAG(15),
};
Filter& Filter::local(bool accept)
@@ -844,6 +865,13 @@ Filter& Filter::name(std::string name)
return *this;
}
Filter& Filter::aliasNames(const AliasNames& aliasNames)
{
_aliasNames = aliasNames;
activeFilters |= ALIASNAMES;
return *this;
}
#define ACTIVE(X) (activeFilters & (X))
#define FILTER(TAG, TEST) if (ACTIVE(TAG) && !(TEST)) { return false; }
bool Filter::hasQuery() const

View File

@@ -34,7 +34,7 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w
if (!withAlias)
continue;
auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$");
auto aliasName = removeDateFromBookId(bookName);
if (aliasName == bookName) {
continue;
}
@@ -51,6 +51,10 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w
}
}
std::string HumanReadableNameMapper::removeDateFromBookId(const std::string& bookId) {
return replaceRegex(bookId, "", "_[[:digit:]]{4}-[[:digit:]]{2}$");
}
std::string HumanReadableNameMapper::getNameForId(const std::string& id) const {
return m_idToName.at(id);
}

View File

@@ -119,6 +119,9 @@ Filter get_search_filter(const RequestContext& request, const std::string& prefi
try {
filter.rejectTags(kiwix::split(request.get_argument(prefix+"notag"), ";"));
} catch (...) {}
try {
filter.aliasNames(request.get_arguments(prefix + "book"));
} catch (...) {}
return filter;
}

View File

@@ -500,6 +500,24 @@ TEST_F(LibraryTest, filterByTags)
);
}
TEST_F(LibraryTest, filterByAliasNames)
{
// filtering for one book
EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"zimfile"}),
"Ray Charles"
);
// filerting for more than one book
EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"zimfile", "example"}),
"An example ZIM archive",
"Ray Charles"
);
// filtering by alias name requires full text match
EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"wrong_name"}),
/* no results */
);
}
TEST_F(LibraryTest, filterByQuery)
{