mirror of
https://github.com/kiwix/libkiwix.git
synced 2026-01-01 19:08:04 -05:00
Compare commits
42 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a205ff00c8 | ||
|
|
96f199a327 | ||
|
|
0be3aa9d38 | ||
|
|
4f57e765e5 | ||
|
|
2bcd43af98 | ||
|
|
eb2c750431 | ||
|
|
7132775d67 | ||
|
|
c44b2acb56 | ||
|
|
0343c23f82 | ||
|
|
7005b65901 | ||
|
|
d360b9143c | ||
|
|
9963c73150 | ||
|
|
41d6f9884c | ||
|
|
8823880348 | ||
|
|
ac169558c4 | ||
|
|
2e43b7e82d | ||
|
|
4485cc8d0f | ||
|
|
3be4d92c53 | ||
|
|
44a77f5846 | ||
|
|
9abdc6ce02 | ||
|
|
5ca419bee7 | ||
|
|
37f29da63e | ||
|
|
94670847ef | ||
|
|
93b53cc6d0 | ||
|
|
cf273a06b4 | ||
|
|
43e9763091 | ||
|
|
ef661a2e25 | ||
|
|
7baa1b9e62 | ||
|
|
e28dbe7c7e | ||
|
|
2906202056 | ||
|
|
ce6c782b66 | ||
|
|
9771506985 | ||
|
|
b8d950c1a0 | ||
|
|
998db0eb2b | ||
|
|
46fab22a73 | ||
|
|
72e41082ca | ||
|
|
c06a041100 | ||
|
|
cecb65e314 | ||
|
|
62d26c27ff | ||
|
|
074c1bcffa | ||
|
|
9be2abedf3 | ||
|
|
83d27255cf |
13
.travis.yml
Normal file
13
.travis.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
language: cpp
|
||||
dist: trusty
|
||||
sudo: required
|
||||
cache: ccache
|
||||
install: travis/install_deps.sh
|
||||
script: travis/compile.sh
|
||||
env:
|
||||
- PLATFORM="native_static"
|
||||
- PLATFORM="native_dyn"
|
||||
- PLATFORM="win32_static"
|
||||
- PLATFORM="win32_dyn"
|
||||
- PLATFORM="android_arm"
|
||||
- PLATFORM="android_arm64"
|
||||
16
ChangeLog
Normal file
16
ChangeLog
Normal file
@@ -0,0 +1,16 @@
|
||||
kiwix-lib 0.2.0
|
||||
===============
|
||||
|
||||
* Generate the snippet from the article content if the snippet is not
|
||||
directly in the database.
|
||||
This provide better snippets as they now depending of the query.
|
||||
* Use the stopwords and the language stored in the fulltext index database to
|
||||
parse the user query.
|
||||
* Remove the indexer functionnality.
|
||||
* Move to C++11 standard.
|
||||
* Use the fulltext search of the zimlib.
|
||||
We still have the fulltext search code in kiwix-lib to be able to search in
|
||||
fulltext index by side of a zim file. (To be remove in the future)
|
||||
* Few API hanges
|
||||
* Change a lot of `Reader` methods to const methods.
|
||||
* Fix some crashes.
|
||||
@@ -20,13 +20,7 @@
|
||||
#ifndef KIWIX_STRINGTOOLS_H
|
||||
#define KIWIX_STRINGTOOLS_H
|
||||
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/unistr.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_INDEXER_H
|
||||
#define KIWIX_INDEXER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <pthread.h>
|
||||
#include "common/stringTools.h"
|
||||
#include "common/otherTools.h"
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include "reader.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
struct indexerToken {
|
||||
string url;
|
||||
string accentedTitle;
|
||||
string title;
|
||||
string keywords;
|
||||
string content;
|
||||
string snippet;
|
||||
string size;
|
||||
string wordCount;
|
||||
};
|
||||
|
||||
class Indexer {
|
||||
|
||||
typedef void (* ProgressCallback)(const unsigned int processedArticleCount, const unsigned int totalArticleCount);
|
||||
|
||||
public:
|
||||
Indexer();
|
||||
virtual ~Indexer();
|
||||
|
||||
bool start(const string zimPath, const string indexPath, ProgressCallback callback = NULL);
|
||||
bool stop();
|
||||
bool isRunning();
|
||||
unsigned int getProgression();
|
||||
void setVerboseFlag(const bool value);
|
||||
|
||||
protected:
|
||||
virtual void indexingPrelude(const string indexPath) = 0;
|
||||
virtual void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount) = 0;
|
||||
virtual void flush() = 0;
|
||||
virtual void indexingPostlude(const string indexPath) = 0;
|
||||
|
||||
/* Stop words */
|
||||
std::vector<std::string> stopWords;
|
||||
void readStopWords(const string languageCode);
|
||||
|
||||
/* Others */
|
||||
unsigned int countWords(const string &text);
|
||||
|
||||
/* Boost factor */
|
||||
unsigned int keywordsBoostFactor;
|
||||
inline unsigned int getTitleBoostFactor(const unsigned int contentLength) {
|
||||
return contentLength / 500 + 1;
|
||||
}
|
||||
|
||||
/* Verbose */
|
||||
pthread_mutex_t verboseMutex;
|
||||
bool getVerboseFlag();
|
||||
bool verboseFlag;
|
||||
|
||||
private:
|
||||
ProgressCallback progressCallback;
|
||||
pthread_mutex_t threadIdsMutex;
|
||||
|
||||
/* Article extraction */
|
||||
pthread_t articleExtractor;
|
||||
pthread_mutex_t articleExtractorRunningMutex;
|
||||
static void *extractArticles(void *ptr);
|
||||
bool articleExtractorRunningFlag;
|
||||
bool isArticleExtractorRunning();
|
||||
void articleExtractorRunning(bool value);
|
||||
|
||||
/* Article parsing */
|
||||
pthread_t articleParser;
|
||||
pthread_mutex_t articleParserRunningMutex;
|
||||
static void *parseArticles(void *ptr);
|
||||
bool articleParserRunningFlag;
|
||||
bool isArticleParserRunning();
|
||||
void articleParserRunning(bool value);
|
||||
|
||||
/* Index writting */
|
||||
pthread_t articleIndexer;
|
||||
pthread_mutex_t articleIndexerRunningMutex;
|
||||
static void *indexArticles(void *ptr);
|
||||
bool articleIndexerRunningFlag;
|
||||
bool isArticleIndexerRunning();
|
||||
void articleIndexerRunning(bool value);
|
||||
|
||||
/* To parse queue */
|
||||
std::queue<indexerToken> toParseQueue;
|
||||
pthread_mutex_t toParseQueueMutex;
|
||||
void pushToParseQueue(indexerToken &token);
|
||||
bool popFromToParseQueue(indexerToken &token);
|
||||
bool isToParseQueueEmpty();
|
||||
|
||||
/* To index queue */
|
||||
std::queue<indexerToken> toIndexQueue;
|
||||
pthread_mutex_t toIndexQueueMutex;
|
||||
void pushToIndexQueue(indexerToken &token);
|
||||
bool popFromToIndexQueue(indexerToken &token);
|
||||
bool isToIndexQueueEmpty();
|
||||
|
||||
/* Article Count & Progression */
|
||||
unsigned int articleCount;
|
||||
pthread_mutex_t articleCountMutex;
|
||||
void setArticleCount(const unsigned int articleCount);
|
||||
unsigned int getArticleCount();
|
||||
|
||||
/* Progression */
|
||||
unsigned int progression;
|
||||
pthread_mutex_t progressionMutex;
|
||||
void setProgression(const unsigned int progression);
|
||||
/* getProgression() is public */
|
||||
|
||||
/* ZIM path */
|
||||
pthread_mutex_t zimPathMutex;
|
||||
string zimPath;
|
||||
void setZimPath(const string path);
|
||||
string getZimPath();
|
||||
|
||||
/* Index path */
|
||||
pthread_mutex_t indexPathMutex;
|
||||
string indexPath;
|
||||
void setIndexPath(const string path);
|
||||
string getIndexPath();
|
||||
|
||||
/* ZIM id */
|
||||
pthread_mutex_t zimIdMutex;
|
||||
string zimId;
|
||||
void setZimId(const string id);
|
||||
string getZimId();
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -5,12 +5,8 @@ headers = [
|
||||
'searcher.h'
|
||||
]
|
||||
|
||||
if not get_option('android')
|
||||
headers += ['indexer.h']
|
||||
endif
|
||||
|
||||
if xapian_dep.found()
|
||||
headers += ['xapianIndexer.h', 'xapianSearcher.h']
|
||||
headers += ['xapianSearcher.h']
|
||||
endif
|
||||
|
||||
install_headers(headers, subdir:'kiwix')
|
||||
|
||||
@@ -43,45 +43,45 @@ namespace kiwix {
|
||||
~Reader();
|
||||
|
||||
void reset();
|
||||
unsigned int getArticleCount();
|
||||
unsigned int getMediaCount();
|
||||
unsigned int getGlobalCount();
|
||||
string getZimFilePath();
|
||||
string getId();
|
||||
string getRandomPageUrl();
|
||||
string getFirstPageUrl();
|
||||
string getMainPageUrl();
|
||||
bool getMetatag(const string &url, string &content);
|
||||
string getTitle();
|
||||
string getDescription();
|
||||
string getLanguage();
|
||||
string getName();
|
||||
string getTags();
|
||||
string getDate();
|
||||
string getCreator();
|
||||
string getPublisher();
|
||||
string getOrigId();
|
||||
bool getFavicon(string &content, string &mimeType);
|
||||
bool getPageUrlFromTitle(const string &title, string &url);
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType);
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
unsigned int getArticleCount() const;
|
||||
unsigned int getMediaCount() const;
|
||||
unsigned int getGlobalCount() const;
|
||||
string getZimFilePath() const;
|
||||
string getId() const;
|
||||
string getRandomPageUrl() const;
|
||||
string getFirstPageUrl() const;
|
||||
string getMainPageUrl() const;
|
||||
bool getMetatag(const string &url, string &content) const;
|
||||
string getTitle() const;
|
||||
string getDescription() const;
|
||||
string getLanguage() const;
|
||||
string getName() const;
|
||||
string getTags() const;
|
||||
string getDate() const;
|
||||
string getCreator() const;
|
||||
string getPublisher() const;
|
||||
string getOrigId() const;
|
||||
bool getFavicon(string &content, string &mimeType) const;
|
||||
bool getPageUrlFromTitle(const string &title, string &url) const;
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType) const;
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
|
||||
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
|
||||
bool urlExists(const string &url);
|
||||
bool hasFulltextIndex();
|
||||
std::vector<std::string> getTitleVariants(const std::string &title);
|
||||
bool urlExists(const string &url) const;
|
||||
bool hasFulltextIndex() const;
|
||||
std::vector<std::string> getTitleVariants(const std::string &title) const;
|
||||
bool getNextSuggestion(string &title);
|
||||
bool getNextSuggestion(string &title, string &url);
|
||||
bool canCheckIntegrity();
|
||||
bool isCorrupted();
|
||||
bool parseUrl(const string &url, char *ns, string &title);
|
||||
unsigned int getFileSize();
|
||||
zim::File* getZimFileHandler();
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article);
|
||||
bool canCheckIntegrity() const;
|
||||
bool isCorrupted() const;
|
||||
bool parseUrl(const string &url, char *ns, string &title) const;
|
||||
unsigned int getFileSize() const;
|
||||
zim::File* getZimFileHandler() const;
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const;
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
@@ -96,7 +96,7 @@ namespace kiwix {
|
||||
std::vector< std::vector<std::string> >::iterator suggestionsOffset;
|
||||
|
||||
private:
|
||||
std::map<std::string, unsigned int> parseCounterMetadata();
|
||||
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -35,27 +35,31 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Result
|
||||
{
|
||||
string url;
|
||||
string title;
|
||||
int score;
|
||||
string snippet;
|
||||
int wordCount;
|
||||
int size;
|
||||
};
|
||||
|
||||
namespace kiwix {
|
||||
class Reader;
|
||||
class Result {
|
||||
public:
|
||||
virtual ~Result() {};
|
||||
virtual std::string get_url() = 0;
|
||||
virtual std::string get_title() = 0;
|
||||
virtual int get_score() = 0;
|
||||
virtual std::string get_snippet() = 0;
|
||||
virtual int get_wordCount() = 0;
|
||||
virtual int get_size() = 0;
|
||||
};
|
||||
|
||||
|
||||
struct SearcherInternal;
|
||||
class Searcher {
|
||||
|
||||
public:
|
||||
Searcher();
|
||||
virtual ~Searcher();
|
||||
Searcher(const string &xapianDirectoryPath, Reader* reader);
|
||||
~Searcher();
|
||||
|
||||
void search(std::string &search, unsigned int resultStart,
|
||||
unsigned int resultEnd, const bool verbose=false);
|
||||
bool getNextResult(string &url, string &title, unsigned int &score);
|
||||
Result* getNextResult();
|
||||
void restart_search();
|
||||
unsigned int getEstimatedResultCount();
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
@@ -68,12 +72,12 @@ namespace kiwix {
|
||||
|
||||
protected:
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
virtual void closeIndex() = 0;
|
||||
virtual void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false) = 0;
|
||||
void closeIndex() ;
|
||||
void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false);
|
||||
|
||||
std::vector<Result> results;
|
||||
std::vector<Result>::iterator resultOffset;
|
||||
Reader* reader;
|
||||
SearcherInternal* internal;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
std::string searchProtocolPrefix;
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_INDEXER_H
|
||||
#define KIWIX_XAPIAN_INDEXER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "indexer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class XapianIndexer : public Indexer {
|
||||
|
||||
public:
|
||||
XapianIndexer();
|
||||
|
||||
protected:
|
||||
void indexingPrelude(const string indexPath);
|
||||
void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount);
|
||||
void flush();
|
||||
void indexingPostlude(const string indexPath);
|
||||
|
||||
Xapian::WritableDatabase writableDatabase;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::TermGenerator indexer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -22,31 +22,67 @@
|
||||
|
||||
#include <xapian.h>
|
||||
#include "searcher.h"
|
||||
#include "reader.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class XapianSearcher;
|
||||
|
||||
class XapianResult : public Result {
|
||||
public:
|
||||
XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator);
|
||||
virtual ~XapianResult() {};
|
||||
|
||||
virtual std::string get_url();
|
||||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
|
||||
private:
|
||||
XapianSearcher* searcher;
|
||||
Xapian::MSetIterator iterator;
|
||||
Xapian::Document document;
|
||||
};
|
||||
|
||||
class NoXapianIndexInZim: public exception {
|
||||
virtual const char* what() const throw() {
|
||||
return "There is no fulltext index in the zim file";
|
||||
}
|
||||
};
|
||||
|
||||
class XapianSearcher : public Searcher {
|
||||
|
||||
class XapianSearcher {
|
||||
friend class XapianResult;
|
||||
public:
|
||||
XapianSearcher(const string &xapianDirectoryPath);
|
||||
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
|
||||
virtual ~XapianSearcher() {};
|
||||
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
|
||||
const bool verbose=false);
|
||||
virtual Result* getNextResult();
|
||||
void restart_search();
|
||||
|
||||
Xapian::MSet results;
|
||||
|
||||
protected:
|
||||
void closeIndex();
|
||||
void openIndex(const string &xapianDirectoryPath);
|
||||
void setup_queryParser();
|
||||
|
||||
Reader* reader;
|
||||
Xapian::Database readableDatabase;
|
||||
std::string language;
|
||||
std::string stopwords;
|
||||
Xapian::QueryParser queryParser;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::MSetIterator current_result;
|
||||
std::map<std::string, int> valuesmap;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
project('kiwixlib', 'cpp',
|
||||
version : '0.1.0',
|
||||
license : 'GPL')
|
||||
version : '0.2.0',
|
||||
license : 'GPL',
|
||||
default_options : ['c_std=c11', 'cpp_std=c++11'])
|
||||
|
||||
compiler = meson.get_compiler('cpp')
|
||||
find_library_in_compiler = meson.version().version_compare('>=0.31.0')
|
||||
@@ -10,7 +11,7 @@ libicu_dep = dependency('icu-i18n')
|
||||
libzim_dep = dependency('libzim')
|
||||
pugixml_dep = dependency('pugixml')
|
||||
|
||||
|
||||
ctpp2_include_path = ''
|
||||
has_ctpp2_dep = false
|
||||
ctpp2_prefix_install = get_option('ctpp2-install-prefix')
|
||||
ctpp2_link_args = []
|
||||
|
||||
13
src/android/AndroidManifest.xml
Normal file
13
src/android/AndroidManifest.xml
Normal file
@@ -0,0 +1,13 @@
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
|
||||
package="kiwix.org.kiwixlib"
|
||||
>
|
||||
|
||||
<application android:allowBackup="true"
|
||||
android:label="@string/app_name"
|
||||
android:supportsRtl="true"
|
||||
>
|
||||
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#include "unicode/putil.h"
|
||||
#include "reader.h"
|
||||
#include "xapianSearcher.h"
|
||||
#include "searcher.h"
|
||||
#include "common/base64.h"
|
||||
|
||||
#include <android/log.h>
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
/* global variables */
|
||||
kiwix::Reader *reader = NULL;
|
||||
kiwix::XapianSearcher *searcher = NULL;
|
||||
kiwix::Searcher *searcher = NULL;
|
||||
|
||||
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
@@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
|
||||
searcher = NULL;
|
||||
try {
|
||||
if (searcher != NULL) delete searcher;
|
||||
searcher = new kiwix::XapianSearcher(cPath);
|
||||
searcher = new kiwix::Searcher(cPath, reader);
|
||||
} catch (...) {
|
||||
searcher = NULL;
|
||||
retVal = JNI_FALSE;
|
||||
@@ -460,19 +460,18 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
|
||||
(JNIEnv *env, jclass obj, jstring query, jint count) {
|
||||
std::string cQuery = jni2c(query, env);
|
||||
unsigned int cCount = jni2c(count);
|
||||
std::string url;
|
||||
std::string title;
|
||||
kiwix::Result *p_result;
|
||||
std::string result;
|
||||
unsigned int score;
|
||||
|
||||
pthread_mutex_lock(&searcherLock);
|
||||
try {
|
||||
if (searcher != NULL) {
|
||||
searcher->search(cQuery, 0, count);
|
||||
while (searcher->getNextResult(url, title, score) &&
|
||||
!title.empty() &&
|
||||
!url.empty()) {
|
||||
result += title + "\n";
|
||||
while ( (p_result = searcher->getNextResult()) &&
|
||||
!(p_result->get_title().empty()) &&
|
||||
!(p_result->get_url().empty())) {
|
||||
result += p_result->get_title() + "\n";
|
||||
delete p_result;
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
|
||||
@@ -11,3 +11,7 @@ kiwix_jni = custom_target('jni',
|
||||
)
|
||||
|
||||
kiwix_sources += ['android/kiwix.cpp', kiwix_jni]
|
||||
|
||||
install_subdir('org', install_dir: 'kiwix-lib/java')
|
||||
install_subdir('res', install_dir: 'kiwix-lib')
|
||||
install_data('AndroidManifest.xml', install_dir: 'kiwix-lib')
|
||||
|
||||
3
src/android/res/values/strings.xml
Normal file
3
src/android/res/values/strings.xml
Normal file
@@ -0,0 +1,3 @@
|
||||
<resources>
|
||||
<string name="app_name">Kiwix Lib</string>
|
||||
</resources>
|
||||
@@ -85,7 +85,7 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
|
||||
|
||||
/* some systems have ifr_addr.sa_len and adjust the length that
|
||||
* way, but not mine. weird */
|
||||
#ifndef linux
|
||||
#ifndef __linux__
|
||||
len=IFNAMSIZ + ifreq->ifr_addr.sa_len;
|
||||
#else
|
||||
len=sizeof *ifreq;
|
||||
|
||||
@@ -19,6 +19,13 @@
|
||||
|
||||
#include <common/stringTools.h>
|
||||
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
/* tell ICU where to find its dat file (tables) */
|
||||
void kiwix::loadICUExternalTables() {
|
||||
#ifdef __APPLE__
|
||||
|
||||
528
src/indexer.cpp
528
src/indexer.cpp
@@ -1,528 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "indexer.h"
|
||||
#include "xapian/myhtmlparse.h"
|
||||
#include "kiwixlib-resources.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Count word */
|
||||
unsigned int Indexer::countWords(const string &text) {
|
||||
unsigned int numWords = 1;
|
||||
unsigned int length = text.size();
|
||||
|
||||
for(unsigned int i=0; i<length;) {
|
||||
while(i<length && text[i] != ' ') {
|
||||
i++;
|
||||
}
|
||||
numWords++;
|
||||
i++;
|
||||
}
|
||||
|
||||
return numWords;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
Indexer::Indexer() :
|
||||
keywordsBoostFactor(3),
|
||||
verboseFlag(false) {
|
||||
|
||||
/* Initialize mutex */
|
||||
pthread_mutex_init(&threadIdsMutex, NULL);
|
||||
pthread_mutex_init(&toParseQueueMutex, NULL);
|
||||
pthread_mutex_init(&toIndexQueueMutex, NULL);
|
||||
pthread_mutex_init(&articleExtractorRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleParserRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleIndexerRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleCountMutex, NULL);
|
||||
pthread_mutex_init(&zimPathMutex, NULL);
|
||||
pthread_mutex_init(&zimIdMutex, NULL);
|
||||
pthread_mutex_init(&indexPathMutex, NULL);
|
||||
pthread_mutex_init(&progressionMutex, NULL);
|
||||
pthread_mutex_init(&verboseMutex, NULL);
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Indexer::~Indexer() {
|
||||
}
|
||||
|
||||
/* Read the stopwords */
|
||||
void Indexer::readStopWords(const string languageCode) {
|
||||
std::string stopWord;
|
||||
std::istringstream file(getResource("stopwords/" + languageCode));
|
||||
|
||||
this->stopWords.clear();
|
||||
|
||||
while (getline(file, stopWord, '\n')) {
|
||||
this->stopWords.push_back(stopWord);
|
||||
}
|
||||
|
||||
if (this->verboseFlag) {
|
||||
std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Extractor
|
||||
|
||||
/* Article extractor methods */
|
||||
void *Indexer::extractArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
|
||||
/* Get the number of article to index and the ZIM id */
|
||||
kiwix::Reader reader(self->getZimPath());
|
||||
unsigned int articleCount = reader.getArticleCount();
|
||||
self->setArticleCount(articleCount);
|
||||
string zimId = reader.getId();
|
||||
self->setZimId(zimId);
|
||||
|
||||
/* Progression */
|
||||
unsigned int readArticleCount = 0;
|
||||
unsigned int currentProgression = 0;
|
||||
self->setProgression(currentProgression);
|
||||
unsigned int newProgress;
|
||||
|
||||
/* StopWords */
|
||||
self->readStopWords(reader.getLanguage());
|
||||
|
||||
/* Goes trough all articles */
|
||||
zim::File *zimHandler = reader.getZimFileHandler();
|
||||
unsigned int currentOffset = zimHandler->getNamespaceBeginOffset('A');
|
||||
unsigned int lastOffset = zimHandler->getNamespaceEndOffset('A');
|
||||
zim::Article currentArticle;
|
||||
|
||||
while (currentOffset < lastOffset) {
|
||||
currentArticle = zimHandler->getArticle(currentOffset);
|
||||
|
||||
if (!currentArticle.isRedirect()) {
|
||||
/* Add articles to the queue */
|
||||
indexerToken token;
|
||||
token.title = currentArticle.getTitle();
|
||||
token.url = currentArticle.getLongUrl();
|
||||
token.content = string(currentArticle.getData().data(), currentArticle.getData().size());
|
||||
self->pushToParseQueue(token);
|
||||
readArticleCount += 1;
|
||||
|
||||
/* Update progress */
|
||||
if (self->progressCallback) {
|
||||
self->progressCallback(readArticleCount, articleCount);
|
||||
}
|
||||
newProgress = (unsigned int)((float)readArticleCount / (float)articleCount * 100);
|
||||
if (newProgress != currentProgression) {
|
||||
self->setProgression(newProgress);
|
||||
}
|
||||
}
|
||||
|
||||
currentOffset += 1;
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
self->articleExtractorRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleExtractorRunning(bool value) {
|
||||
pthread_mutex_lock(&articleExtractorRunningMutex);
|
||||
this->articleExtractorRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleExtractorRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleExtractorRunning() {
|
||||
pthread_mutex_lock(&articleExtractorRunningMutex);
|
||||
bool retVal = this->articleExtractorRunningFlag;
|
||||
pthread_mutex_unlock(&articleExtractorRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Parser
|
||||
|
||||
/* Article parser methods */
|
||||
void *Indexer::parseArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
size_t found;
|
||||
indexerToken token;
|
||||
|
||||
while (self->popFromToParseQueue(token)) {
|
||||
MyHtmlParser htmlParser;
|
||||
|
||||
/* The parser generate a lot of exceptions which should be avoided */
|
||||
try {
|
||||
htmlParser.parse_html(token.content, "UTF-8", true);
|
||||
} catch (...) {
|
||||
}
|
||||
|
||||
/* If content does not have the noindex meta tag */
|
||||
/* Seems that the parser generates an exception in such case */
|
||||
found = htmlParser.dump.find("NOINDEX");
|
||||
|
||||
if (found == string::npos) {
|
||||
/* Get the accented title */
|
||||
token.accentedTitle = (htmlParser.title.empty() ? token.title : htmlParser.title);
|
||||
|
||||
/* count words */
|
||||
stringstream countWordStringStream;
|
||||
countWordStringStream << self->countWords(htmlParser.dump);
|
||||
token.wordCount = countWordStringStream.str();
|
||||
|
||||
/* snippet */
|
||||
std::string snippet = std::string(htmlParser.dump, 0, 300);
|
||||
std::string::size_type last = snippet.find_last_of('.');
|
||||
if (last == snippet.npos)
|
||||
last = snippet.find_last_of(' ');
|
||||
if (last != snippet.npos)
|
||||
snippet = snippet.substr(0, last);
|
||||
token.snippet = snippet;
|
||||
|
||||
/* size */
|
||||
stringstream sizeStringStream;
|
||||
sizeStringStream << token.content.size() / 1024;
|
||||
token.size = sizeStringStream.str();
|
||||
|
||||
/* Remove accent */
|
||||
token.title = kiwix::removeAccents(token.accentedTitle);
|
||||
token.keywords = kiwix::removeAccents(htmlParser.keywords);
|
||||
token.content = kiwix::removeAccents(htmlParser.dump);
|
||||
self->pushToIndexQueue(token);
|
||||
}
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
self->articleParserRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleParserRunning(bool value) {
|
||||
pthread_mutex_lock(&articleParserRunningMutex);
|
||||
this->articleParserRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleParserRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleParserRunning() {
|
||||
pthread_mutex_lock(&articleParserRunningMutex);
|
||||
bool retVal = this->articleParserRunningFlag;
|
||||
pthread_mutex_unlock(&articleParserRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Indexer
|
||||
|
||||
/* Article indexer methods */
|
||||
void *Indexer::indexArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
unsigned int indexedArticleCount = 0;
|
||||
indexerToken token;
|
||||
|
||||
self->indexingPrelude(self->getIndexPath());
|
||||
|
||||
while (self->popFromToIndexQueue(token)) {
|
||||
self->index(token.url,
|
||||
token.accentedTitle,
|
||||
token.title,
|
||||
token.keywords,
|
||||
token.content,
|
||||
token.snippet,
|
||||
token.size,
|
||||
token.wordCount
|
||||
);
|
||||
|
||||
indexedArticleCount += 1;
|
||||
|
||||
/* Make a hard-disk flush every 10.000 articles */
|
||||
if (indexedArticleCount % 5000 == 0) {
|
||||
self->flush();
|
||||
}
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
self->indexingPostlude(self->getIndexPath());
|
||||
|
||||
/* Write content id file */
|
||||
string path = appendToDirectory(self->getIndexPath(), "content.id");
|
||||
writeTextFile(path, self->getZimId());
|
||||
|
||||
self->setProgression(100);
|
||||
kiwix::sleep(100);
|
||||
|
||||
self->articleIndexerRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleIndexerRunning(bool value) {
|
||||
pthread_mutex_lock(&articleIndexerRunningMutex);
|
||||
this->articleIndexerRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleIndexerRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleIndexerRunning() {
|
||||
pthread_mutex_lock(&articleIndexerRunningMutex);
|
||||
bool retVal = this->articleIndexerRunningFlag;
|
||||
pthread_mutex_unlock(&articleIndexerRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Parse Queue
|
||||
|
||||
/* ToParseQueue methods */
|
||||
bool Indexer::isToParseQueueEmpty() {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
bool retVal = this->toParseQueue.empty();
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::pushToParseQueue(indexerToken &token) {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
this->toParseQueue.push(token);
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
kiwix::sleep(int(this->toParseQueue.size() / 200) / 10 * 1000);
|
||||
}
|
||||
|
||||
bool Indexer::popFromToParseQueue(indexerToken &token) {
|
||||
while (this->isToParseQueueEmpty() && this->isArticleExtractorRunning()) {
|
||||
kiwix::sleep(500);
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl;
|
||||
}
|
||||
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
if (!this->isToParseQueueEmpty()) {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
token = this->toParseQueue.front();
|
||||
this->toParseQueue.pop();
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - Index Queue
|
||||
|
||||
/* ToIndexQueue methods */
|
||||
bool Indexer::isToIndexQueueEmpty() {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
bool retVal = this->toIndexQueue.empty();
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::pushToIndexQueue(indexerToken &token) {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
this->toIndexQueue.push(token);
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
kiwix::sleep(int(this->toIndexQueue.size() / 200) / 10 * 1000);
|
||||
}
|
||||
|
||||
bool Indexer::popFromToIndexQueue(indexerToken &token) {
|
||||
while (this->isToIndexQueueEmpty() && this->isArticleParserRunning()) {
|
||||
kiwix::sleep(500);
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Waiting... ToIndexQueue is empty for now..." << std::endl;
|
||||
}
|
||||
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
if (!this->isToIndexQueueEmpty()) {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
token = this->toIndexQueue.front();
|
||||
this->toIndexQueue.pop();
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - Properties Getter & Setter
|
||||
|
||||
/* ZIM & Index methods */
|
||||
void Indexer::setZimPath(const string path) {
|
||||
pthread_mutex_lock(&zimPathMutex);
|
||||
this->zimPath = path;
|
||||
pthread_mutex_unlock(&zimPathMutex);
|
||||
}
|
||||
|
||||
string Indexer::getZimPath() {
|
||||
pthread_mutex_lock(&zimPathMutex);
|
||||
string retVal = this->zimPath;
|
||||
pthread_mutex_unlock(&zimPathMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setIndexPath(const string path) {
|
||||
pthread_mutex_lock(&indexPathMutex);
|
||||
this->indexPath = path;
|
||||
pthread_mutex_unlock(&indexPathMutex);
|
||||
}
|
||||
|
||||
string Indexer::getIndexPath() {
|
||||
pthread_mutex_lock(&indexPathMutex);
|
||||
string retVal = this->indexPath;
|
||||
pthread_mutex_unlock(&indexPathMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setArticleCount(const unsigned int articleCount) {
|
||||
pthread_mutex_lock(&articleCountMutex);
|
||||
this->articleCount = articleCount;
|
||||
pthread_mutex_unlock(&articleCountMutex);
|
||||
}
|
||||
|
||||
unsigned int Indexer::getArticleCount() {
|
||||
pthread_mutex_lock(&articleCountMutex);
|
||||
unsigned int retVal = this->articleCount;
|
||||
pthread_mutex_unlock(&articleCountMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setProgression(const unsigned int progression) {
|
||||
pthread_mutex_lock(&progressionMutex);
|
||||
this->progression = progression;
|
||||
pthread_mutex_unlock(&progressionMutex);
|
||||
}
|
||||
|
||||
unsigned int Indexer::getProgression() {
|
||||
pthread_mutex_lock(&progressionMutex);
|
||||
unsigned int retVal = this->progression;
|
||||
pthread_mutex_unlock(&progressionMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setZimId(const string id) {
|
||||
pthread_mutex_lock(&zimIdMutex);
|
||||
this->zimId = id;
|
||||
pthread_mutex_unlock(&zimIdMutex);
|
||||
}
|
||||
|
||||
string Indexer::getZimId() {
|
||||
pthread_mutex_lock(&zimIdMutex);
|
||||
string retVal = this->zimId;
|
||||
pthread_mutex_unlock(&zimIdMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Status Management
|
||||
|
||||
/* Manage */
|
||||
bool Indexer::start(const string zimPath, const string indexPath, ProgressCallback callback) {
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
|
||||
}
|
||||
|
||||
if (callback) {
|
||||
this->progressCallback = callback;
|
||||
}
|
||||
|
||||
this->setArticleCount(0);
|
||||
this->setProgression(0);
|
||||
this->setZimPath(zimPath);
|
||||
this->setIndexPath(indexPath);
|
||||
|
||||
pthread_mutex_lock(&threadIdsMutex);
|
||||
this->articleExtractorRunning(true);
|
||||
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
|
||||
pthread_detach(this->articleExtractor);
|
||||
|
||||
while(this->isArticleExtractorRunning() && this->getArticleCount() == 0) {
|
||||
kiwix::sleep(100);
|
||||
}
|
||||
|
||||
this->articleParserRunning(true);
|
||||
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
|
||||
pthread_detach(this->articleParser);
|
||||
|
||||
this->articleIndexerRunning(true);
|
||||
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
|
||||
pthread_detach(this->articleIndexer);
|
||||
pthread_mutex_unlock(&threadIdsMutex);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Indexer::isRunning() {
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "isArticleExtractor running: " << (this->isArticleExtractorRunning() ? "yes" : "no") << std::endl;
|
||||
std::cout << "isArticleParser running: " << (this->isArticleParserRunning() ? "yes" : "no") << std::endl;
|
||||
std::cout << "isArticleIndexer running: " << (this->isArticleIndexerRunning() ? "yes" : "no") << std::endl;
|
||||
}
|
||||
|
||||
return this->isArticleExtractorRunning() || this->isArticleIndexerRunning() || this->isArticleParserRunning();
|
||||
}
|
||||
|
||||
bool Indexer::stop() {
|
||||
if (this->isRunning()) {
|
||||
bool isArticleExtractorRunning = this->isArticleExtractorRunning();
|
||||
bool isArticleIndexerRunning = this->isArticleIndexerRunning();
|
||||
bool isArticleParserRunning = this->isArticleParserRunning();
|
||||
|
||||
pthread_mutex_lock(&threadIdsMutex);
|
||||
|
||||
if (isArticleIndexerRunning) {
|
||||
pthread_cancel(this->articleIndexer);
|
||||
this->articleIndexerRunning(false);
|
||||
}
|
||||
if (isArticleParserRunning) {
|
||||
pthread_cancel(this->articleParser);
|
||||
this->articleParserRunning(false);
|
||||
}
|
||||
if (isArticleExtractorRunning) {
|
||||
pthread_cancel(this->articleExtractor);
|
||||
this->articleExtractorRunning(false);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&threadIdsMutex);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - verbose
|
||||
|
||||
/* Manage the verboseFlag */
|
||||
void Indexer::setVerboseFlag(const bool value) {
|
||||
pthread_mutex_lock(&verboseMutex);
|
||||
this->verboseFlag = value;
|
||||
pthread_mutex_unlock(&verboseMutex);
|
||||
}
|
||||
|
||||
bool Indexer::getVerboseFlag() {
|
||||
bool value;
|
||||
pthread_mutex_lock(&verboseMutex);
|
||||
value = this->verboseFlag;
|
||||
pthread_mutex_unlock(&verboseMutex);
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -16,17 +16,16 @@ kiwix_sources += lib_resources
|
||||
|
||||
if xapian_dep.found()
|
||||
kiwix_sources += ['xapianSearcher.cpp']
|
||||
if not get_option('android')
|
||||
kiwix_sources += ['xapianIndexer.cpp']
|
||||
endif
|
||||
endif
|
||||
|
||||
if not get_option('android')
|
||||
kiwix_sources += ['indexer.cpp']
|
||||
else
|
||||
if get_option('android')
|
||||
subdir('android')
|
||||
install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi')
|
||||
else
|
||||
install_dir = get_option('libdir')
|
||||
endif
|
||||
|
||||
|
||||
if has_ctpp2_dep
|
||||
kiwix_sources += ['ctpp2/CTPP2VMStringLoader.cpp']
|
||||
endif
|
||||
@@ -40,5 +39,6 @@ kiwixlib = library('kiwix',
|
||||
kiwix_sources,
|
||||
include_directories : inc,
|
||||
dependencies : all_deps,
|
||||
version: '1.0.0',
|
||||
install : true)
|
||||
version: meson.project_version(),
|
||||
install: true,
|
||||
install_dir: install_dir)
|
||||
|
||||
296
src/reader.cpp
296
src/reader.cpp
@@ -87,7 +87,7 @@ namespace kiwix {
|
||||
}
|
||||
}
|
||||
|
||||
zim::File* Reader::getZimFileHandler() {
|
||||
zim::File* Reader::getZimFileHandler() const {
|
||||
return this->zimFileHandler;
|
||||
}
|
||||
|
||||
@@ -96,22 +96,24 @@ namespace kiwix {
|
||||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
}
|
||||
|
||||
std::map<std::string, unsigned int> Reader::parseCounterMetadata() {
|
||||
std::map<std::string, unsigned int> counters;
|
||||
string content, mimeType, item, counterString;
|
||||
unsigned int contentLength, counter;
|
||||
string counterUrl = "/M/Counter";
|
||||
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const {
|
||||
std::map<const std::string, unsigned int> counters;
|
||||
string mimeType, item, counterString;
|
||||
unsigned int counter;
|
||||
|
||||
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
||||
stringstream ssContent(content);
|
||||
zim::Article article = this->zimFileHandler->getArticle('M',"Counter");
|
||||
|
||||
while(getline(ssContent, item, ';')) {
|
||||
stringstream ssItem(item);
|
||||
getline(ssItem, mimeType, '=');
|
||||
getline(ssItem, counterString, '=');
|
||||
if (!counterString.empty() && !mimeType.empty()) {
|
||||
sscanf(counterString.c_str(), "%u", &counter);
|
||||
counters.insert(pair<string, int>(mimeType, counter));
|
||||
if ( article.good() ) {
|
||||
stringstream ssContent(article.getData());
|
||||
|
||||
while(getline(ssContent, item, ';')) {
|
||||
stringstream ssItem(item);
|
||||
getline(ssItem, mimeType, '=');
|
||||
getline(ssItem, counterString, '=');
|
||||
if (!counterString.empty() && !mimeType.empty()) {
|
||||
sscanf(counterString.c_str(), "%u", &counter);
|
||||
counters.insert(pair<string, int>(mimeType, counter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,14 +121,14 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Get the count of articles which can be indexed/displayed */
|
||||
unsigned int Reader::getArticleCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int Reader::getArticleCount() const {
|
||||
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsACount;
|
||||
} else {
|
||||
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
||||
auto it = counterMap.find("text/html");
|
||||
if (it != counterMap.end())
|
||||
counter = it->second;
|
||||
}
|
||||
@@ -135,16 +137,14 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Get the count of medias content in the ZIM file */
|
||||
unsigned int Reader::getMediaCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int Reader::getMediaCount() const {
|
||||
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty())
|
||||
counter = this->nsICount;
|
||||
else {
|
||||
std::map<std::string, unsigned int>::const_iterator it;
|
||||
|
||||
it = counterMap.find("image/jpeg");
|
||||
auto it = counterMap.find("image/jpeg");
|
||||
if (it != counterMap.end())
|
||||
counter += it->second;
|
||||
|
||||
@@ -161,43 +161,38 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Get the total of all items of a ZIM file, redirects included */
|
||||
unsigned int Reader::getGlobalCount() {
|
||||
unsigned int Reader::getGlobalCount() const {
|
||||
return this->zimFileHandler->getCountArticles();
|
||||
}
|
||||
|
||||
/* Return the UID of the ZIM file */
|
||||
string Reader::getId() {
|
||||
string Reader::getId() const {
|
||||
std::ostringstream s;
|
||||
s << this->zimFileHandler->getFileheader().getUuid();
|
||||
return s.str();
|
||||
}
|
||||
|
||||
/* Return a page url from a title */
|
||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) {
|
||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) const {
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findxByTitle('A', title);
|
||||
zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
|
||||
/* Get the article */
|
||||
zim::Article article = *resultPair.second;
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
if ( ! article.good() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return an URL from a title*/
|
||||
string Reader::getRandomPageUrl() {
|
||||
string Reader::getRandomPageUrl() const {
|
||||
zim::Article article;
|
||||
zim::size_type idx;
|
||||
std::string mainPageUrl = this->getMainPageUrl();
|
||||
@@ -208,11 +203,11 @@ namespace kiwix {
|
||||
article = zimFileHandler->getArticle(idx);
|
||||
} while (article.getLongUrl() == mainPageUrl);
|
||||
|
||||
return article.getLongUrl().c_str();
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() {
|
||||
string Reader::getMainPageUrl() const {
|
||||
string url = "";
|
||||
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
||||
@@ -229,7 +224,7 @@ namespace kiwix {
|
||||
return url;
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string &content, string &mimeType) {
|
||||
bool Reader::getFavicon(string &content, string &mimeType) const {
|
||||
unsigned int contentLength = 0;
|
||||
|
||||
this->getContentByUrl( "/-/favicon.png", content,
|
||||
@@ -254,12 +249,12 @@ namespace kiwix {
|
||||
return content.empty() ? false : true;
|
||||
}
|
||||
|
||||
string Reader::getZimFilePath() {
|
||||
string Reader::getZimFilePath() const {
|
||||
return this->zimFilePath;
|
||||
}
|
||||
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string &name, string &value) {
|
||||
bool Reader::getMetatag(const string &name, string &value) const {
|
||||
unsigned int contentLength = 0;
|
||||
string contentType = "";
|
||||
|
||||
@@ -267,7 +262,7 @@ namespace kiwix {
|
||||
contentLength, contentType);
|
||||
}
|
||||
|
||||
string Reader::getTitle() {
|
||||
string Reader::getTitle() const {
|
||||
string value;
|
||||
this->getMetatag("Title", value);
|
||||
if (value.empty()) {
|
||||
@@ -279,19 +274,19 @@ namespace kiwix {
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getName() {
|
||||
string Reader::getName() const {
|
||||
string value;
|
||||
this->getMetatag("Name", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getTags() {
|
||||
string Reader::getTags() const {
|
||||
string value;
|
||||
this->getMetatag("Tags", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDescription() {
|
||||
string Reader::getDescription() const{
|
||||
string value;
|
||||
this->getMetatag("Description", value);
|
||||
|
||||
@@ -303,31 +298,31 @@ namespace kiwix {
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getLanguage() {
|
||||
string Reader::getLanguage() const {
|
||||
string value;
|
||||
this->getMetatag("Language", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDate() {
|
||||
string Reader::getDate() const {
|
||||
string value;
|
||||
this->getMetatag("Date", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getCreator() {
|
||||
string Reader::getCreator() const {
|
||||
string value;
|
||||
this->getMetatag("Creator", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getPublisher() {
|
||||
string Reader::getPublisher() const {
|
||||
string value;
|
||||
this->getMetatag("Publisher", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getOrigId() {
|
||||
string Reader::getOrigId() const {
|
||||
string value;
|
||||
this->getMetatag("startfileuid", value);
|
||||
if(value.empty())
|
||||
@@ -355,17 +350,13 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Return the first page URL */
|
||||
string Reader::getFirstPageUrl() {
|
||||
string url;
|
||||
|
||||
string Reader::getFirstPageUrl() const {
|
||||
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
||||
url = article.getLongUrl();
|
||||
|
||||
return url;
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string &url, char *ns, string &title) {
|
||||
bool Reader::parseUrl(const string &url, char *ns, string &title) const {
|
||||
/* Offset to visit the url */
|
||||
unsigned int urlLength = url.size();
|
||||
unsigned int offset = 0;
|
||||
@@ -395,130 +386,113 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Return article by url */
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
|
||||
/* Main page */
|
||||
if (titleStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
article = zimFileHandler->getArticle(resultPair.second.getIndex());
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const {
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string urlStr;
|
||||
this->parseUrl(url, &ns, urlStr);
|
||||
|
||||
/* Main page */
|
||||
if (urlStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, urlStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
article = zimFileHandler->getArticle(ns, urlStr);
|
||||
return article.good();
|
||||
}
|
||||
|
||||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
retVal = true;
|
||||
} else {
|
||||
mimeType = "";
|
||||
}
|
||||
|
||||
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const {
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = article.getMimeType();
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
mimeType = "";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get a content from a zim file */
|
||||
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
return this->getContentByEncodedUrl(url, content, contentLength, contentType);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
bool retVal = false;
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
|
||||
content="";
|
||||
contentType="";
|
||||
contentLength = 0;
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos &&
|
||||
content.find("<body") == std::string::npos &&
|
||||
content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
/* Set return value */
|
||||
retVal = true;
|
||||
}
|
||||
zim::Article article;
|
||||
if ( ! this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos &&
|
||||
content.find("<body") == std::string::npos &&
|
||||
content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if an article exists */
|
||||
bool Reader::urlExists(const string &url) {
|
||||
bool Reader::urlExists(const string &url) const {
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
@@ -528,7 +502,7 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() {
|
||||
bool Reader::hasFulltextIndex() const {
|
||||
return this->urlExists("/Z/fulltextIndex/xapian");
|
||||
}
|
||||
|
||||
@@ -604,7 +578,7 @@ namespace kiwix {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<std::string> Reader::getTitleVariants(const std::string &title) {
|
||||
std::vector<std::string> Reader::getTitleVariants(const std::string &title) const {
|
||||
std::vector<std::string> variants;
|
||||
variants.push_back(title);
|
||||
variants.push_back(kiwix::ucFirst(title));
|
||||
@@ -660,12 +634,12 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Check if the file has as checksum */
|
||||
bool Reader::canCheckIntegrity() {
|
||||
bool Reader::canCheckIntegrity() const {
|
||||
return this->zimFileHandler->getChecksum() != "";
|
||||
}
|
||||
|
||||
/* Return true if corrupted, false otherwise */
|
||||
bool Reader::isCorrupted() {
|
||||
bool Reader::isCorrupted() const {
|
||||
try {
|
||||
if (this->zimFileHandler->verify() == true)
|
||||
return false;
|
||||
@@ -678,7 +652,7 @@ namespace kiwix {
|
||||
}
|
||||
|
||||
/* Return the file size, works also for splitted files */
|
||||
unsigned int Reader::getFileSize() {
|
||||
unsigned int Reader::getFileSize() const {
|
||||
zim::File *file = this->getZimFileHandler();
|
||||
zim::offset_type size = 0;
|
||||
|
||||
|
||||
160
src/searcher.cpp
160
src/searcher.cpp
@@ -18,8 +18,12 @@
|
||||
*/
|
||||
|
||||
#include "searcher.h"
|
||||
#include "xapianSearcher.h"
|
||||
#include "reader.h"
|
||||
#include "kiwixlib-resources.h"
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
#include <ctpp2/CDT.hpp>
|
||||
#include <ctpp2/CTPP2FileLogger.hpp>
|
||||
@@ -32,8 +36,46 @@ using namespace CTPP;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class _Result : public Result {
|
||||
public:
|
||||
_Result(Searcher* searcher, zim::Search::iterator& iterator);
|
||||
virtual ~_Result() {};
|
||||
|
||||
virtual std::string get_url();
|
||||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
|
||||
private:
|
||||
Searcher* searcher;
|
||||
zim::Search::iterator iterator;
|
||||
};
|
||||
|
||||
struct SearcherInternal {
|
||||
const zim::Search *_search;
|
||||
XapianSearcher *_xapianSearcher;
|
||||
zim::Search::iterator current_iterator;
|
||||
|
||||
|
||||
SearcherInternal() :
|
||||
_search(NULL),
|
||||
_xapianSearcher(NULL)
|
||||
{}
|
||||
~SearcherInternal() {
|
||||
if ( _search != NULL )
|
||||
delete _search;
|
||||
if ( _xapianSearcher != NULL )
|
||||
delete _xapianSearcher;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* Constructor */
|
||||
Searcher::Searcher() :
|
||||
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
|
||||
reader(reader),
|
||||
internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
@@ -44,10 +86,15 @@ namespace kiwix {
|
||||
{
|
||||
template_ct2 = RESOURCE::results_ct2;
|
||||
loadICUExternalTables();
|
||||
if ( !reader || !reader->hasFulltextIndex() ) {
|
||||
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Searcher::~Searcher() {}
|
||||
Searcher::~Searcher() {
|
||||
delete internal;
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void Searcher::search(std::string &search, unsigned int resultStart,
|
||||
@@ -80,17 +127,41 @@ namespace kiwix {
|
||||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
this->resultOffset = this->results.begin();
|
||||
if ( internal->_xapianSearcher ) {
|
||||
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
|
||||
} else {
|
||||
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Searcher::restart_search() {
|
||||
if ( internal->_xapianSearcher ) {
|
||||
internal->_xapianSearcher->restart_search();
|
||||
} else {
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
}
|
||||
}
|
||||
|
||||
Result* Searcher::getNextResult() {
|
||||
if ( internal->_xapianSearcher ) {
|
||||
return internal->_xapianSearcher->getNextResult();
|
||||
} else if (internal->current_iterator != internal->_search->end()) {
|
||||
Result* result = new _Result(this, internal->current_iterator);
|
||||
internal->current_iterator++;
|
||||
return result;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* Reset the results */
|
||||
void Searcher::reset() {
|
||||
this->results.clear();
|
||||
this->resultOffset = this->results.begin();
|
||||
this->estimatedResultCount = 0;
|
||||
this->searchPattern = "";
|
||||
return;
|
||||
@@ -101,30 +172,6 @@ namespace kiwix {
|
||||
return this->estimatedResultCount;
|
||||
}
|
||||
|
||||
/* Get next result */
|
||||
bool Searcher::getNextResult(string &url, string &title, unsigned int &score) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->resultOffset != this->results.end()) {
|
||||
|
||||
/* url */
|
||||
url = this->resultOffset->url;
|
||||
|
||||
/* title */
|
||||
title = this->resultOffset->title;
|
||||
|
||||
/* score */
|
||||
score = this->resultOffset->score;
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->resultOffset++;
|
||||
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Searcher::setProtocolPrefix(const std::string prefix) {
|
||||
this->protocolPrefix = prefix;
|
||||
return true;
|
||||
@@ -139,6 +186,36 @@ namespace kiwix {
|
||||
this->contentHumanReadableId = contentHumanReadableId;
|
||||
}
|
||||
|
||||
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator):
|
||||
searcher(searcher),
|
||||
iterator(iterator)
|
||||
{
|
||||
}
|
||||
|
||||
std::string _Result::get_url() {
|
||||
return iterator.get_url();
|
||||
}
|
||||
|
||||
std::string _Result::get_title() {
|
||||
return iterator.get_title();
|
||||
}
|
||||
|
||||
int _Result::get_score() {
|
||||
return iterator.get_score();
|
||||
}
|
||||
|
||||
std::string _Result::get_snippet() {
|
||||
return iterator.get_snippet();
|
||||
}
|
||||
|
||||
int _Result::get_size() {
|
||||
return iterator.get_size();
|
||||
}
|
||||
|
||||
int _Result::get_wordCount() {
|
||||
return iterator.get_wordCount();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
|
||||
string Searcher::getHtml() {
|
||||
@@ -149,23 +226,24 @@ namespace kiwix {
|
||||
CDT oData;
|
||||
CDT resultsCDT(CDT::ARRAY_VAL);
|
||||
|
||||
this->resultOffset = this->results.begin();
|
||||
while (this->resultOffset != this->results.end()) {
|
||||
this->restart_search();
|
||||
Result * p_result = NULL;
|
||||
while ( (p_result = this->getNextResult()) ) {
|
||||
CDT result;
|
||||
result["title"] = this->resultOffset->title;
|
||||
result["url"] = this->resultOffset->url;
|
||||
result["snippet"] = this->resultOffset->snippet;
|
||||
result["title"] = p_result->get_title();
|
||||
result["url"] = p_result->get_url();
|
||||
result["snippet"] = p_result->get_snippet();
|
||||
|
||||
if (this->resultOffset->size >= 0)
|
||||
result["size"] = kiwix::beautifyInteger(this->resultOffset->size);
|
||||
if (p_result->get_size() >= 0)
|
||||
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
||||
|
||||
if (this->resultOffset->wordCount >= 0)
|
||||
result["wordCount"] = kiwix::beautifyInteger(this->resultOffset->wordCount);
|
||||
if (p_result->get_wordCount() >= 0)
|
||||
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
|
||||
|
||||
resultsCDT.PushBack(result);
|
||||
this->resultOffset++;
|
||||
delete p_result;
|
||||
}
|
||||
this->resultOffset = this->results.begin();
|
||||
this->restart_search();
|
||||
oData["results"] = resultsCDT;
|
||||
|
||||
// pages
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "xapianIndexer.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
XapianIndexer::XapianIndexer() {
|
||||
/*
|
||||
stemmer(Xapian::Stem("french")) {
|
||||
this->indexer.set_stemmer(this->stemmer);
|
||||
*/
|
||||
}
|
||||
|
||||
void XapianIndexer::indexingPrelude(const string indexPath) {
|
||||
this->writableDatabase = Xapian::WritableDatabase(indexPath+".tmp", Xapian::DB_CREATE_OR_OVERWRITE | Xapian::DB_BACKEND_GLASS);
|
||||
this->writableDatabase.begin_transaction(true);
|
||||
|
||||
/* Insert the stopwords */
|
||||
if (!this->stopWords.empty()) {
|
||||
std::vector<std::string>::iterator it = this->stopWords.begin();
|
||||
for( ; it != this->stopWords.end(); ++it) {
|
||||
this->stopper.add(*it);
|
||||
}
|
||||
|
||||
this->indexer.set_stopper(&(this->stopper));
|
||||
}
|
||||
}
|
||||
|
||||
void XapianIndexer::index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount) {
|
||||
|
||||
/* Put the data in the document */
|
||||
Xapian::Document currentDocument;
|
||||
currentDocument.clear_values();
|
||||
currentDocument.add_value(0, title);
|
||||
currentDocument.add_value(1, snippet);
|
||||
currentDocument.add_value(2, size);
|
||||
currentDocument.add_value(3, wordCount);
|
||||
currentDocument.set_data(url);
|
||||
indexer.set_document(currentDocument);
|
||||
|
||||
/* Index the title */
|
||||
if (!unaccentedTitle.empty()) {
|
||||
this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size()));
|
||||
}
|
||||
|
||||
/* Index the keywords */
|
||||
if (!keywords.empty()) {
|
||||
this->indexer.index_text_without_positions(keywords, keywordsBoostFactor);
|
||||
}
|
||||
|
||||
/* Index the content */
|
||||
if (!content.empty()) {
|
||||
this->indexer.index_text_without_positions(content);
|
||||
}
|
||||
|
||||
/* add to the database */
|
||||
this->writableDatabase.add_document(currentDocument);
|
||||
}
|
||||
|
||||
void XapianIndexer::flush() {
|
||||
this->writableDatabase.commit_transaction();
|
||||
this->writableDatabase.begin_transaction(true);
|
||||
}
|
||||
|
||||
void XapianIndexer::indexingPostlude(const string indexPath) {
|
||||
this->flush();
|
||||
this->writableDatabase.commit_transaction();
|
||||
#ifdef _WIN32
|
||||
this->writableDatabase.close();
|
||||
#endif
|
||||
|
||||
/* Compacting the index */
|
||||
Xapian::Compactor compactor;
|
||||
try {
|
||||
Xapian::Database src;
|
||||
src.add_database(Xapian::Database(indexPath+".tmp"));
|
||||
src.compact(indexPath, Xapian::Compactor::FULL | Xapian::DBCOMPACT_SINGLE_FILE, 0, compactor);
|
||||
} catch (const Xapian::Error &error) {
|
||||
cerr << indexPath << ": " << error.get_description() << endl;
|
||||
exit(1);
|
||||
} catch (const char * msg) {
|
||||
cerr << indexPath << ": " << msg << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,49 +18,87 @@
|
||||
*/
|
||||
|
||||
#include "xapianSearcher.h"
|
||||
#include "xapian/myhtmlparse.h"
|
||||
#include <zim/zim.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/error.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <unicode/locid.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
std::map<std::string, int> read_valuesmap(const std::string &s) {
|
||||
std::map<std::string, int> result;
|
||||
std::vector<std::string> elems = split(s, ";");
|
||||
for(std::vector<std::string>::iterator elem = elems.begin();
|
||||
elem != elems.end();
|
||||
elem++)
|
||||
{
|
||||
std::vector<std::string> tmp_elems = split(*elem, ":");
|
||||
result.insert( std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())) );
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath)
|
||||
: Searcher(),
|
||||
stemmer(Xapian::Stem("english")) {
|
||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
|
||||
: reader(reader)
|
||||
{
|
||||
this->openIndex(xapianDirectoryPath);
|
||||
}
|
||||
|
||||
/* Open Xapian readable database */
|
||||
void XapianSearcher::openIndex(const string &directoryPath) {
|
||||
try
|
||||
{
|
||||
zim::File zimFile = zim::File(directoryPath);
|
||||
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
|
||||
if (!xapianArticle.good())
|
||||
throw NoXapianIndexInZim();
|
||||
zim::offset_type dbOffset = xapianArticle.getOffset();
|
||||
int databasefd = open(directoryPath.c_str(), O_RDONLY);
|
||||
lseek(databasefd, dbOffset, SEEK_SET);
|
||||
this->readableDatabase = Xapian::Database(databasefd);
|
||||
} catch (...) {
|
||||
this->readableDatabase = Xapian::Database(directoryPath);
|
||||
}
|
||||
this->readableDatabase = Xapian::Database(directoryPath);
|
||||
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
||||
this->language = this->readableDatabase.get_metadata("language");
|
||||
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
||||
setup_queryParser();
|
||||
}
|
||||
|
||||
/* Close Xapian writable database */
|
||||
void XapianSearcher::closeIndex() {
|
||||
return;
|
||||
}
|
||||
|
||||
void XapianSearcher::setup_queryParser()
|
||||
{
|
||||
queryParser.set_database(readableDatabase);
|
||||
if ( ! language.empty() )
|
||||
{
|
||||
/* Build ICU Local object to retrieve ISO-639 language code (from
|
||||
ISO-639-3) */
|
||||
icu::Locale languageLocale(language.c_str());
|
||||
|
||||
/* Configuring language base steemming */
|
||||
try {
|
||||
stemmer = Xapian::Stem(languageLocale.getLanguage());
|
||||
queryParser.set_stemmer(stemmer);
|
||||
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
|
||||
} catch (...) {
|
||||
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! stopwords.empty() )
|
||||
{
|
||||
std::string stopWord;
|
||||
std::istringstream file(this->stopwords);
|
||||
while (std::getline(file, stopWord, '\n')) {
|
||||
this->stopper.add(stopWord);
|
||||
}
|
||||
queryParser.set_stopper(&(this->stopper));
|
||||
}
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose) {
|
||||
/* Create the query */
|
||||
Xapian::QueryParser queryParser;
|
||||
Xapian::Query query = queryParser.parse_query(search);
|
||||
|
||||
/* Create the enquire object */
|
||||
@@ -68,32 +106,108 @@ namespace kiwix {
|
||||
enquire.set_query(query);
|
||||
|
||||
/* Get the results */
|
||||
Xapian::MSet matches = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||
|
||||
Xapian::MSetIterator i;
|
||||
for (i = matches.begin(); i != matches.end(); ++i) {
|
||||
Xapian::Document doc = i.get_document();
|
||||
|
||||
Result result;
|
||||
result.url = doc.get_data();
|
||||
result.title = doc.get_value(0);
|
||||
result.snippet = doc.get_value(1);
|
||||
result.size = (doc.get_value(2).empty() == true ? -1 : atoi(doc.get_value(2).c_str()));
|
||||
result.wordCount = (doc.get_value(3).empty() == true ? -1 : atoi(doc.get_value(3).c_str()));
|
||||
result.score = i.get_percent();
|
||||
|
||||
this->results.push_back(result);
|
||||
|
||||
if (verbose) {
|
||||
std::cout << "Document ID " << *i << " \t";
|
||||
std::cout << i.get_percent() << "% ";
|
||||
std::cout << "\t[" << doc.get_data() << "] - " << doc.get_value(0) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the global resultCount value*/
|
||||
this->estimatedResultCount = matches.get_matches_estimated();
|
||||
|
||||
return;
|
||||
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||
this->current_result = this->results.begin();
|
||||
}
|
||||
}
|
||||
|
||||
/* Get next result */
|
||||
Result* XapianSearcher::getNextResult() {
|
||||
if (this->current_result != this->results.end()) {
|
||||
XapianResult* result = new XapianResult(this, this->current_result);
|
||||
this->current_result++;
|
||||
return result;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void XapianSearcher::restart_search() {
|
||||
this->current_result = this->results.begin();
|
||||
}
|
||||
|
||||
XapianResult::XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator):
|
||||
searcher(searcher),
|
||||
iterator(iterator),
|
||||
document(iterator.get_document())
|
||||
{
|
||||
}
|
||||
|
||||
std::string XapianResult::get_url() {
|
||||
return document.get_data();
|
||||
}
|
||||
|
||||
std::string XapianResult::get_title() {
|
||||
if ( searcher->valuesmap.empty() )
|
||||
{
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(0);
|
||||
}
|
||||
else if ( searcher->valuesmap.find("title") != searcher->valuesmap.end() )
|
||||
{
|
||||
return document.get_value(searcher->valuesmap["title"]);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
int XapianResult::get_score() {
|
||||
return iterator.get_percent();
|
||||
}
|
||||
|
||||
std::string XapianResult::get_snippet() {
|
||||
if ( searcher->valuesmap.empty() )
|
||||
{
|
||||
/* This is the old legacy version. Guess and try */
|
||||
std::string stored_snippet = document.get_value(1);
|
||||
if ( ! stored_snippet.empty() )
|
||||
return stored_snippet;
|
||||
/* Let's continue here, and see if we can genenate one */
|
||||
}
|
||||
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
|
||||
{
|
||||
return document.get_value(searcher->valuesmap["snippet"]);
|
||||
}
|
||||
/* No reader, no snippet */
|
||||
if ( ! searcher->reader )
|
||||
return "";
|
||||
/* Get the content of the article to generate a snippet.
|
||||
We parse it and use the html dump to avoid remove html tags in the
|
||||
content and be able to nicely cut the text at random place. */
|
||||
MyHtmlParser htmlParser;
|
||||
std::string content;
|
||||
unsigned int contentLength;
|
||||
std::string contentType;
|
||||
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType);
|
||||
try {
|
||||
htmlParser.parse_html(content, "UTF-8", true);
|
||||
} catch (...) {}
|
||||
return searcher->results.snippet(htmlParser.dump, 500);
|
||||
}
|
||||
|
||||
int XapianResult::get_size() {
|
||||
if ( searcher->valuesmap.empty() )
|
||||
{
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(2).empty() == true ? -1 : atoi(document.get_value(2).c_str());
|
||||
}
|
||||
else if ( searcher->valuesmap.find("size") != searcher->valuesmap.end() )
|
||||
{
|
||||
return atoi(document.get_value(searcher->valuesmap["size"]).c_str());
|
||||
}
|
||||
/* The size is never used. Do we really want to get the content and
|
||||
calculate the size ? */
|
||||
return -1;
|
||||
}
|
||||
|
||||
int XapianResult::get_wordCount() {
|
||||
if ( searcher->valuesmap.empty() )
|
||||
{
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(3).empty() == true ? -1 : atoi(document.get_value(3).c_str());
|
||||
}
|
||||
else if ( searcher->valuesmap.find("wordcount") != searcher->valuesmap.end() )
|
||||
{
|
||||
return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str());
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // Kiwix namespace
|
||||
|
||||
35
travis/compile.sh
Executable file
35
travis/compile.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
BUILD_DIR=${HOME}/BUILD_${PLATFORM}
|
||||
INSTALL_DIR=${BUILD_DIR}/INSTALL
|
||||
|
||||
|
||||
case ${PLATFORM} in
|
||||
"native_static")
|
||||
MESON_OPTION="--default-library=static"
|
||||
;;
|
||||
"native_dyn")
|
||||
MESON_OPTION="--default-library=shared"
|
||||
;;
|
||||
"win32_static")
|
||||
MESON_OPTION="--default-library=static --cross-file ${BUILD_DIR}/meson_cross_file.txt"
|
||||
;;
|
||||
"win32_dyn")
|
||||
MESON_OPTION="--default-library=shared --cross-file ${BUILD_DIR}/meson_cross_file.txt"
|
||||
;;
|
||||
"android_arm")
|
||||
MESON_OPTION="-Dandroid=true --default-library=shared --cross-file ${BUILD_DIR}/meson_cross_file.txt"
|
||||
;;
|
||||
"android_arm64")
|
||||
MESON_OPTION="-Dandroid=true --default-library=shared --cross-file ${BUILD_DIR}/meson_cross_file.txt"
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
cd ${TRAVIS_BUILD_DIR}
|
||||
export PKG_CONFIG_PATH=${INSTALL_DIR}/lib/x86_64-linux-gnu/pkgconfig
|
||||
meson . build -Dctpp2-install-prefix=${INSTALL_DIR} ${MESON_OPTION}
|
||||
cd build
|
||||
ninja
|
||||
47
travis/install_deps.sh
Executable file
47
travis/install_deps.sh
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
REPO_NAME=${TRAVIS_REPO_SLUG#*/}
|
||||
ARCHIVE_NAME=deps_${PLATFORM}_${REPO_NAME}.tar.gz
|
||||
|
||||
# Packages.
|
||||
case ${PLATFORM} in
|
||||
"native_static")
|
||||
PACKAGES="gcc cmake libbz2-dev ccache zlib1g-dev uuid-dev libctpp2-dev"
|
||||
;;
|
||||
"native_dyn")
|
||||
PACKAGES="gcc cmake libbz2-dev ccache zlib1g-dev uuid-dev libctpp2-dev libmicrohttpd-dev"
|
||||
;;
|
||||
"win32_static")
|
||||
PACKAGES="g++-mingw-w64-i686 gcc-mingw-w64-i686 gcc-mingw-w64-base mingw-w64-tools ccache"
|
||||
;;
|
||||
"win32_dyn")
|
||||
PACKAGES="g++-mingw-w64-i686 gcc-mingw-w64-i686 gcc-mingw-w64-base mingw-w64-tools ccache"
|
||||
;;
|
||||
"android_arm")
|
||||
PACKAGES="gcc cmake ccache"
|
||||
;;
|
||||
"android_arm64")
|
||||
PACKAGES="gcc cmake ccache"
|
||||
;;
|
||||
esac
|
||||
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq python3-pip ${PACKAGES}
|
||||
pip3 install meson
|
||||
|
||||
# Ninja
|
||||
cd $HOME
|
||||
git clone git://github.com/ninja-build/ninja.git
|
||||
cd ninja
|
||||
git checkout release
|
||||
./configure.py --bootstrap
|
||||
sudo cp ninja /bin
|
||||
|
||||
# Dependencies comming from kiwix-build.
|
||||
cd ${HOME}
|
||||
wget http://tmp.kiwix.org/ci/${ARCHIVE_NAME}
|
||||
mkdir -p BUILD_${PLATFORM}
|
||||
cd BUILD_${PLATFORM}
|
||||
tar xf ${HOME}/${ARCHIVE_NAME}
|
||||
Reference in New Issue
Block a user