mirror of
https://github.com/kiwix/libkiwix.git
synced 2026-02-19 15:44:12 -05:00
70 is a too small limit for the number of results. Users need at least 100. As the html rendering will fails with more than 144 results, explicitly limits the number of search to 140. Fixes kiwix/kiwix-tools#92
399 lines
10 KiB
C++
399 lines
10 KiB
C++
/*
|
|
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "searcher.h"
|
|
#include "kiwixlib-resources.h"
|
|
#include "reader.h"
|
|
#include "xapianSearcher.h"
|
|
|
|
#include <zim/search.h>
|
|
|
|
#ifdef ENABLE_CTPP2
|
|
#include <ctpp2/CDT.hpp>
|
|
#include <ctpp2/CTPP2FileLogger.hpp>
|
|
#include <ctpp2/CTPP2SimpleVM.hpp>
|
|
#include "ctpp2/CTPP2VMStringLoader.hpp"
|
|
|
|
using namespace CTPP;
|
|
#endif
|
|
|
|
#define MAX_SEARCH_LEN 140
|
|
|
|
namespace kiwix
|
|
{
|
|
class _Result : public Result
|
|
{
|
|
public:
|
|
_Result(Searcher* searcher, zim::Search::iterator& iterator);
|
|
virtual ~_Result(){};
|
|
|
|
virtual std::string get_url();
|
|
virtual std::string get_title();
|
|
virtual int get_score();
|
|
virtual std::string get_snippet();
|
|
virtual std::string get_content();
|
|
virtual int get_wordCount();
|
|
virtual int get_size();
|
|
virtual int get_readerIndex();
|
|
|
|
private:
|
|
Searcher* searcher;
|
|
zim::Search::iterator iterator;
|
|
};
|
|
|
|
struct SearcherInternal {
|
|
const zim::Search* _search;
|
|
XapianSearcher* _xapianSearcher;
|
|
zim::Search::iterator current_iterator;
|
|
|
|
SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {}
|
|
~SearcherInternal()
|
|
{
|
|
if (_search != NULL) {
|
|
delete _search;
|
|
}
|
|
if (_xapianSearcher != NULL) {
|
|
delete _xapianSearcher;
|
|
}
|
|
}
|
|
};
|
|
|
|
/* Constructor */
|
|
Searcher::Searcher(const string& xapianDirectoryPath,
|
|
Reader* reader,
|
|
const string& humanReadableName)
|
|
: internal(new SearcherInternal()),
|
|
searchPattern(""),
|
|
protocolPrefix("zim://"),
|
|
searchProtocolPrefix("search://?"),
|
|
resultCountPerPage(0),
|
|
estimatedResultCount(0),
|
|
resultStart(0),
|
|
resultEnd(0)
|
|
{
|
|
loadICUExternalTables();
|
|
if (!reader || !reader->hasFulltextIndex()) {
|
|
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
|
|
}
|
|
this->contentHumanReadableId = humanReadableName;
|
|
this->humanReaderNames.push_back(humanReadableName);
|
|
}
|
|
|
|
Searcher::Searcher()
|
|
: internal(new SearcherInternal()),
|
|
searchPattern(""),
|
|
protocolPrefix("zim://"),
|
|
searchProtocolPrefix("search://?"),
|
|
resultCountPerPage(0),
|
|
estimatedResultCount(0),
|
|
resultStart(0),
|
|
resultEnd(0)
|
|
{
|
|
loadICUExternalTables();
|
|
}
|
|
|
|
/* Destructor */
|
|
Searcher::~Searcher()
|
|
{
|
|
delete internal;
|
|
}
|
|
|
|
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
|
{
|
|
this->readers.push_back(reader);
|
|
this->humanReaderNames.push_back(humanReadableName);
|
|
}
|
|
|
|
/* Search strings in the database */
|
|
void Searcher::search(std::string& search,
|
|
unsigned int resultStart,
|
|
unsigned int resultEnd,
|
|
const bool verbose)
|
|
{
|
|
this->reset();
|
|
|
|
if (verbose == true) {
|
|
cout << "Performing query `" << search << "'" << endl;
|
|
}
|
|
|
|
/* If resultEnd & resultStart inverted */
|
|
if (resultStart > resultEnd) {
|
|
resultEnd += resultStart;
|
|
resultStart = resultEnd - resultStart;
|
|
resultEnd -= resultStart;
|
|
}
|
|
|
|
/* Try to find results */
|
|
if (resultStart != resultEnd) {
|
|
/* Avoid big researches */
|
|
this->resultCountPerPage = resultEnd - resultStart;
|
|
if (this->resultCountPerPage > MAX_SEARCH_LEN) {
|
|
resultEnd = resultStart + MAX_SEARCH_LEN;
|
|
this->resultCountPerPage = MAX_SEARCH_LEN;
|
|
}
|
|
|
|
/* Perform the search */
|
|
this->searchPattern = search;
|
|
this->resultStart = resultStart;
|
|
this->resultEnd = resultEnd;
|
|
string unaccentedSearch = removeAccents(search);
|
|
if (internal->_xapianSearcher) {
|
|
internal->_xapianSearcher->searchInIndex(
|
|
unaccentedSearch, resultStart, resultEnd, verbose);
|
|
this->estimatedResultCount
|
|
= internal->_xapianSearcher->results.get_matches_estimated();
|
|
} else {
|
|
std::vector<const zim::File*> zims;
|
|
for (auto current = this->readers.begin(); current != this->readers.end();
|
|
current++) {
|
|
zims.push_back((*current)->getZimFileHandler());
|
|
}
|
|
zim::Search* search = new zim::Search(zims);
|
|
search->set_query(unaccentedSearch);
|
|
search->set_range(resultStart, resultEnd);
|
|
internal->_search = search;
|
|
internal->current_iterator = internal->_search->begin();
|
|
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void Searcher::restart_search()
|
|
{
|
|
if (internal->_xapianSearcher) {
|
|
internal->_xapianSearcher->restart_search();
|
|
} else {
|
|
internal->current_iterator = internal->_search->begin();
|
|
}
|
|
}
|
|
|
|
Result* Searcher::getNextResult()
|
|
{
|
|
if (internal->_xapianSearcher) {
|
|
return internal->_xapianSearcher->getNextResult();
|
|
} else if (internal->current_iterator != internal->_search->end()) {
|
|
Result* result = new _Result(this, internal->current_iterator);
|
|
internal->current_iterator++;
|
|
return result;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* Reset the results */
|
|
void Searcher::reset()
|
|
{
|
|
this->estimatedResultCount = 0;
|
|
this->searchPattern = "";
|
|
return;
|
|
}
|
|
|
|
void Searcher::suggestions(std::string& search, const bool verbose)
|
|
{
|
|
this->reset();
|
|
|
|
if (verbose == true) {
|
|
cout << "Performing suggestion query `" << search << "`" << endl;
|
|
}
|
|
|
|
this->searchPattern = search;
|
|
this->resultStart = 0;
|
|
this->resultEnd = 10;
|
|
string unaccentedSearch = removeAccents(search);
|
|
|
|
if (internal->_xapianSearcher) {
|
|
/* [TODO] Suggestion on a external database ?
|
|
* We do not support that. */
|
|
this->estimatedResultCount = 0;
|
|
} else {
|
|
std::vector<const zim::File*> zims;
|
|
for (auto current = this->readers.begin(); current != this->readers.end();
|
|
current++) {
|
|
zims.push_back((*current)->getZimFileHandler());
|
|
}
|
|
zim::Search* search = new zim::Search(zims);
|
|
search->set_query(unaccentedSearch);
|
|
search->set_range(resultStart, resultEnd);
|
|
search->set_suggestion_mode(true);
|
|
internal->_search = search;
|
|
internal->current_iterator = internal->_search->begin();
|
|
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
|
}
|
|
}
|
|
|
|
/* Return the result count estimation */
|
|
unsigned int Searcher::getEstimatedResultCount()
|
|
{
|
|
return this->estimatedResultCount;
|
|
}
|
|
|
|
bool Searcher::setProtocolPrefix(const std::string prefix)
|
|
{
|
|
this->protocolPrefix = prefix;
|
|
return true;
|
|
}
|
|
|
|
bool Searcher::setSearchProtocolPrefix(const std::string prefix)
|
|
{
|
|
this->searchProtocolPrefix = prefix;
|
|
return true;
|
|
}
|
|
|
|
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator)
|
|
: searcher(searcher), iterator(iterator)
|
|
{
|
|
}
|
|
|
|
std::string _Result::get_url()
|
|
{
|
|
return iterator.get_url();
|
|
}
|
|
std::string _Result::get_title()
|
|
{
|
|
return iterator.get_title();
|
|
}
|
|
int _Result::get_score()
|
|
{
|
|
return iterator.get_score();
|
|
}
|
|
std::string _Result::get_snippet()
|
|
{
|
|
return iterator.get_snippet();
|
|
}
|
|
std::string _Result::get_content()
|
|
{
|
|
if (iterator->good()) {
|
|
return iterator->getData();
|
|
}
|
|
return "";
|
|
}
|
|
int _Result::get_size()
|
|
{
|
|
return iterator.get_size();
|
|
}
|
|
int _Result::get_wordCount()
|
|
{
|
|
return iterator.get_wordCount();
|
|
}
|
|
int _Result::get_readerIndex()
|
|
{
|
|
return iterator.get_fileIndex();
|
|
}
|
|
#ifdef ENABLE_CTPP2
|
|
|
|
string Searcher::getHtml()
|
|
{
|
|
SimpleVM oSimpleVM(
|
|
1024, //iIMaxFunctions (default value)
|
|
4096, //iIMaxArgStackSize (default value)
|
|
4096, //iIMaxCodeStackSize (default value)
|
|
10240 * 2 //iIMaxSteps (default*2)
|
|
);
|
|
|
|
// Fill data
|
|
CDT oData;
|
|
CDT resultsCDT(CDT::ARRAY_VAL);
|
|
|
|
this->restart_search();
|
|
Result* p_result = NULL;
|
|
while ((p_result = this->getNextResult())) {
|
|
CDT result;
|
|
result["title"] = p_result->get_title();
|
|
result["url"] = p_result->get_url();
|
|
result["snippet"] = p_result->get_snippet();
|
|
result["contentId"] = humanReaderNames[p_result->get_readerIndex()];
|
|
|
|
if (p_result->get_size() >= 0) {
|
|
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
|
}
|
|
|
|
if (p_result->get_wordCount() >= 0) {
|
|
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
|
|
}
|
|
|
|
resultsCDT.PushBack(result);
|
|
delete p_result;
|
|
}
|
|
this->restart_search();
|
|
oData["results"] = resultsCDT;
|
|
|
|
// pages
|
|
CDT pagesCDT(CDT::ARRAY_VAL);
|
|
|
|
unsigned int pageStart
|
|
= this->resultStart / this->resultCountPerPage >= 5
|
|
? this->resultStart / this->resultCountPerPage - 4
|
|
: 0;
|
|
unsigned int pageCount
|
|
= this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
|
|
|
|
if (pageCount > 10) {
|
|
pageCount = 10;
|
|
} else if (pageCount == 1) {
|
|
pageCount = 0;
|
|
}
|
|
|
|
for (unsigned int i = pageStart; i < pageStart + pageCount; i++) {
|
|
CDT page;
|
|
page["label"] = i + 1;
|
|
page["start"] = i * this->resultCountPerPage;
|
|
page["end"] = (i + 1) * this->resultCountPerPage;
|
|
|
|
if (i * this->resultCountPerPage == this->resultStart) {
|
|
page["selected"] = true;
|
|
}
|
|
|
|
pagesCDT.PushBack(page);
|
|
}
|
|
oData["pages"] = pagesCDT;
|
|
|
|
oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount);
|
|
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
|
|
oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
|
|
oData["resultStart"] = this->resultStart + 1;
|
|
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount
|
|
? this->estimatedResultCount
|
|
: this->resultEnd);
|
|
oData["resultRange"] = this->resultCountPerPage;
|
|
oData["resultLastPageStart"]
|
|
= this->estimatedResultCount > this->resultCountPerPage
|
|
? this->estimatedResultCount - this->resultCountPerPage
|
|
: 0;
|
|
oData["protocolPrefix"] = this->protocolPrefix;
|
|
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
|
|
oData["contentId"] = this->contentHumanReadableId;
|
|
|
|
std::string template_ct2 = RESOURCE::results_ct2;
|
|
VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size());
|
|
|
|
FileLogger oLogger(stderr);
|
|
|
|
// DEBUG only (write output to stdout)
|
|
// oSimpleVM.Run(oData, oLoader, stdout, oLogger);
|
|
|
|
std::string sResult;
|
|
oSimpleVM.Run(oData, oLoader, sResult, oLogger);
|
|
|
|
return sResult;
|
|
}
|
|
|
|
#endif
|
|
}
|