Compare commits

..

1 Commits
1.1.1 ... 1.0.2

Author SHA1 Message Date
Matthieu Gautier
00c8f55cc4 Update name of the project in meson.build.
The right name is `kiwix-lib`, not `kiwixlib`.
Also update the version to be able to do a small fix release.
2018-02-01 16:10:54 +01:00
25 changed files with 674 additions and 1886 deletions

View File

@@ -2,24 +2,12 @@ language: cpp
dist: trusty
sudo: required
cache: ccache
before_install:
- eval "${MATRIX_EVAL}"
- ${CXX} --version
install: travis/install_deps.sh
script: travis/compile.sh
env:
global:
- MATRIX_EVAL="CC=gcc-5 && CXX=g++-5"
matrix:
- PLATFORM="native_static"
- PLATFORM="native_dyn"
- PLATFORM="win32_static"
- PLATFORM="win32_dyn"
- PLATFORM="android_arm"
- PLATFORM="android_arm64"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
- PLATFORM="native_static"
- PLATFORM="native_dyn"
- PLATFORM="win32_static"
- PLATFORM="win32_dyn"
- PLATFORM="android_arm"
- PLATFORM="android_arm64"

View File

@@ -1,28 +1,3 @@
kiwix-lib 1.1.0
===============
* Correct the name of kiwix-lib (from `kiwixlib`) in meson.build to generate
dist archive with the correct name.
* Libzim version need to be at least 3.2.0
kiwix-lib 1.1.0
===============
* Allow for more than 70 search result per page in html results rendering
(kiwix/kiwix-tools#92)
* Add a small api to do geo queries.
* Add multi-search support in the JNI (#67)
* Add an API to get only one part of an article.
* Add an API to get direct location of an article content in the zim file.
* Improve urlencoding
* Fix pagination in html results rendering.
* Compile using gcc-5 on Travis.
* Allow JNI to access search snippets.
* JNI throw an exception instead of returning an invalid object if something
goes wrong.
* Add doctext documentation. (#116)
* Various bug fixes.
kiwix-lib 1.0.0
===============

View File

@@ -68,7 +68,7 @@ Then install Meson itself:
```
virtualenv -p python3 ./ # Create virtualenv
source bin/activate # Activate the virtualenv
pip3 install meson # Install Meson
pip install meson # Install Meson
hash -r # Refresh bash paths
```

View File

@@ -23,7 +23,6 @@
#include <unicode/unistr.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <string>
@@ -37,6 +36,7 @@ namespace kiwix
std::string beautifyInteger(const unsigned int number);
std::string beautifyFileSize(const unsigned int number);
std::string urlEncode(const std::string& c);
void printStringInHexadecimal(const char* s);
void printStringInHexadecimal(UnicodeString s);
void stringReplacement(std::string& str,
@@ -48,9 +48,7 @@ std::string encodeDiples(const std::string& str);
std::string removeAccents(const std::string& text);
void loadICUExternalTables();
std::string urlEncode(const std::string& value, bool encodeReserved = false);
std::string urlDecode(const std::string& value, bool component = false);
std::string urlDecode(const std::string& c);
std::vector<std::string> split(const std::string&, const std::string&);
std::vector<std::string> split(const char*, const char*);

View File

@@ -38,10 +38,6 @@ namespace kiwix
{
enum supportedIndexType { UNKNOWN, XAPIAN };
/**
* A class to store information about a book (a zim file)
*/
class Book
{
public:
@@ -82,9 +78,6 @@ class Book
string faviconMimeType;
};
/**
* A Library store several books.
*/
class Library
{
public:
@@ -92,24 +85,7 @@ class Library
~Library();
string version;
/**
* Add a book to the library.
*
* If a book already exist in the library with the same id, update
* the existing book instead of adding a new one.
*
* @param book The book to add.
* @return True if the book has been added.
* False if a book has been updated.
*/
bool addBook(const Book& book);
/**
* Remove a book from the library.
*
* @param bookIndex the index of the book to remove.
* @return True
*/
bool removeBookByIndex(const unsigned int bookIndex);
vector<kiwix::Book> books;

View File

@@ -39,216 +39,43 @@ namespace kiwix
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
/**
* A tool to manage a `Library`.
*
* A `Manager` handle a internal `Library`.
* This `Library` can be retrived with `cloneLibrary` method.
*/
class Manager
{
public:
Manager();
~Manager();
/**
* Read a `library.xml` and add book in the file to the library.
*
* @param path The path to the `library.xml`.
* @param readOnly Set if the libray path could be overwritten latter with
* updated content.
* @return True if file has been properly parsed.
*/
bool readFile(const string path, const bool readOnly = true);
/**
* Read a `library.xml` and add book in the file to the library.
*
* @param nativePath The path of the `library.xml`
* @param UTF8Path The utf8 version (?) of the path. Also the path where the
* library will be writen i readOnly is False.
* @param readOnly Set if the libray path could be overwritten latter with
* updated content.
* @return True if file has been properly parsed.
*/
bool readFile(const string nativePath,
const string UTF8Path,
const bool readOnly = true);
/**
* Load a library content store in the string.
*
* @param xml The content corresponding of the library xml
* @param readOnly Set if the libray path could be overwritten latter with
* updated content.
* @param libraryPath The library path (used to resolve relative path)
* @return True if the content has been properly parsed.
*/
bool readXml(const string xml,
const bool readOnly = true,
const string libraryPath = "");
/**
* Write the library to a file.
*
* @param path the path of the file to write.
* @return True.
*/
bool writeFile(const string path);
string write_OPDS_feed(const string& id, const string& title);
/**
* Remove a book from the library.
*
* @param bookIndex the index of the book to remove
* @return True
*/
bool removeBookByIndex(const unsigned int bookIndex);
/**
* Remove a book from the library.
*
* @param id the id of the book to remove.
* @return True if the book were in the library.
*/
bool removeBookById(const string id);
/**
* Set the current book.
*
* @param id The id to add to the stack of current books.
* If id is empty, remove the current book from the stack.
* @return True
*/
bool setCurrentBookId(const string id);
/**
* Get the current book id.
*
* @return The id of the current book (or empty string if no current book).
*/
string getCurrentBookId() const;
/**
* Set the path of the external fulltext index associated to a book.
*
* @param id The id of the book to set.
* @param path The path of the external fullext index.
* @param supportedIndexType The type of the fulltext index.
* @return True if the book is in the library.
*/
string getCurrentBookId();
bool setBookIndex(const string id,
const string path,
const supportedIndexType type = XAPIAN);
/**
* Set the path of the zim file associated to a book.
*
* @param id The id of the book to set.
* @param path The path of the zim file.
* @return True if the book is in the library.
*/
const supportedIndexType type);
bool setBookIndex(const string id, const string path);
bool setBookPath(const string id, const string path);
/**
* Add a book to the library.
*
* @param pathToOpen The path to the zim file to add.
* @param pathToSave The path to store in the library in place of pathToOpen.
* @param url The url of the book to store in the library.
* @param checMetaData Tell if we check metadata before adding book to the
* library.
* @return The id of the book if the book has been added to the library.
* Else, an empty string.
*/
string addBookFromPathAndGetId(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false);
/**
* Add a book to the library.
*
* @param pathToOpen The path to the zim file to add.
* @param pathToSave The path to store in the library in place of pathToOpen.
* @param url The url of the book to store in the library.
* @param checMetaData Tell if we check metadata before adding book to the
* library.
* @return True if the book has been added to the library.
*/
bool addBookFromPath(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false);
/**
* Clone and return the internal library.
*
* @return A clone of the library.
*/
Library cloneLibrary();
/**
* Get the book corresponding to an id.
*
* @param[in] id The id of the book
* @param[out] book The book corresponding to the id.
* @return True if the book has been found.
*/
bool getBookById(const string id, Book& book);
/**
* Get the current book.
*
* @param[out] The current book.
* @return True if there is a current book.
*/
bool getCurrentBook(Book& book);
/**
* Get the number of book in the library.
*
* @param localBooks If we must count local books (books with a path).
* @param remoteBooks If we must count remote books (books with an url)
* @return The number of books.
*/
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
/**
* Update the "last open date" of a book
*
* @param id the id of the book.
* @return True if the book is in the library.
*/
bool updateBookLastOpenDateById(const string id);
/**
* Remove (set to empty) paths of all books in the library.
*/
void removeBookPaths();
/**
* List books in the library.
*
* The books list will be available in public vector member `bookIdList`.
*
* @param mode The mode of listing :
* - LASTOPEN sort by last opened book.
* - LOCAL list only local file.
* - REMOTE list only remote file.
* @param sortBy Attribute to sort by the book list.
* @param maxSize Do not list book bigger than maxSize MiB.
* Set to 0 to cancel this filter.
* @param language List only books in this language.
* @param creator List only books of this creator.
* @param publisher List only books of this publisher.
* @param search List only books with search in the title, description or
* language.
* @return True
*/
bool listBooks(const supportedListMode mode,
const supportedListSortBy sortBy,
const unsigned int maxSize,
@@ -256,32 +83,9 @@ class Manager
const string creator,
const string publisher,
const string search);
/**
* Get all langagues of the books in the library.
*
* @return A list of languages.
*/
vector<string> getBooksLanguages();
/**
* Get all book creators of the books in the library.
*
* @return A list of book creators.
*/
vector<string> getBooksCreators();
/**
* Get all book publishers of the books in the library.
*
* @return A list of book publishers.
*/
vector<string> getBooksPublishers();
/**
* Get all book ids of the books in the library.
*
* @return A list of book ids.
*/
vector<string> getBooksIds();
string writableLibraryPath;

View File

@@ -36,383 +36,76 @@ using namespace std;
namespace kiwix
{
/**
* The Reader class is the class who allow to get an article content from a zim
* file.
*/
class Reader
{
public:
/**
* Create a Reader to read a zim file specified by zimFilePath.
*
* @param zimFilePath The path to the zim file to read.
* The zim file can be splitted (.zimaa, .zimab, ...).
* In this case, the file path must still point to the
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
Reader(const string zimFilePath);
~Reader();
/**
* Get the number of "displayable" articles in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* articles with the 'text/html' MIMEtype specified in the metadata.
* Else return the number of articles in the 'A' namespace.
*/
void reset();
unsigned int getArticleCount() const;
/**
* Get the number of media in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* articles with the 'image/jpeg', 'image/gif' and 'image/png' in
* the metadata.
* Else return the number of articles in the 'I' namespace.
*/
unsigned int getMediaCount() const;
/**
* Get the number of all articles in the zim file.
*
* @return Return the number of all the articles, whatever their MIMEtype or
* their namespace.
*/
unsigned int getGlobalCount() const;
/**
* Get the path of the zim file.
*
* @return the path of the zim file as given in the constructor.
*/
string getZimFilePath() const;
/**
* Get the Id of the zim file.
*
* @return The uuid stored in the zim file.
*/
string getId() const;
/**
* Get the url of a random page.
*
* @return Url of a random page. The page is picked from all articles in
* the 'A' namespace.
* The main page is excluded from the potential results.
*/
string getRandomPageUrl() const;
/**
* Get the url of the first page.
*
* @return Url of the first article in the 'A' namespace.
*/
string getFirstPageUrl() const;
/**
* Get the url of the main page.
*
* @return Url of the main page as specified in the zim file.
*/
string getMainPageUrl() const;
/**
* Get the content of a metadata.
*
* @param[in] name The name of the metadata.
* @param[out] value The value will be set to the content of the metadata.
* @return True if it was possible to get the content of the metadata.
*/
bool getMetatag(const string& name, string& value) const;
/**
* Get the title of the zim file.
*
* @return The title of zim file as specified in the zim metadata.
* If no title has been set, return a title computed from the
* file path.
*/
bool getMetatag(const string& url, string& content) const;
string getTitle() const;
/**
* Get the description of the zim file.
*
* @return The description of the zim file as specified in the zim metadata.
* If no description has been set, return the subtitle.
*/
string getDescription() const;
/**
* Get the language of the zim file.
*
* @return The language of the zim file as specified in the zim metadata.
*/
string getLanguage() const;
/**
* Get the name of the zim file.
*
* @return The name of the zim file as specified in the zim metadata.
*/
string getName() const;
/**
* Get the tags of the zim file.
*
* @return The tags of the zim file as specified in the zim metadata.
*/
string getTags() const;
/**
* Get the date of the zim file.
*
* @return The date of the zim file as specified in the zim metadata.
*/
string getDate() const;
/**
* Get the creator of the zim file.
*
* @return The creator of the zim file as specified in the zim metadata.
*/
string getCreator() const;
/**
* Get the publisher of the zim file.
*
* @return The publisher of the zim file as specified in the zim metadata.
*/
string getPublisher() const;
/**
* Get the origId of the zim file.
*
* The origId is only used in the case of patch zim file and is the Id
* of the original zim file.
*
* @return The origId of the zim file as specified in the zim metadata.
*/
string getOrigId() const;
/**
* Get the favicon of the zim file.
*
* @param[out] content The content of the favicon.
* @param[out] mimeType The mimeType of the favicon.
* @return True if a favicon has been found.
*/
bool getFavicon(string& content, string& mimeType) const;
/**
* Get the url of a page specified by a title.
*
* @param[in] title the title of the page.
* @param[out] url the url of the page.
* @return True if the page can be found.
*/
bool getPageUrlFromTitle(const string& title, string& url) const;
/**
* Get the mimetype of a article specified by a url.
*
* @param[in] url the url of the article.
* @param[out] mimetype the mimeType of the article.
* @return True if the mimeType has been found.
*/
bool getMimeTypeByUrl(const string& url, string& mimeType) const;
/**
* Get the content of an article specifed by a url.
*
* Alias to `getContentByEncodedUrl`
*/
bool getContentByUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an article specified by a url encoded url.
*
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
*/
bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an article specified by an url encoded url.
*
* Equivalent to getContentByEncodedUrl but without baseUrl.
*/
bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an article specified by a url.
*
* @param[in] url The url of the article.
* @param[out] content The content of the article.
* @param[out] title the title of the article.
* @param[out] contentLength The size of the article (size of content).
* @param[out] contentType The mimeType of the article.
* @param[out] baseUrl Return the true url of the article.
* If the specified article is a redirection, contains
* the url of the targeted article.
* @return True if the article has been found.
*/
bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an article specified by a url.
*
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
*/
bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Search for articles with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
*
* @param prefix The prefix to search.
* @param suggestionCount How many suggestions to search for.
* @param reset If true, remove previous suggestions in the internal vector.
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions where added to the internal vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for articles for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionCount How many suggestions to search for.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Check if the url exists in the zim file.
*
* @param url the url to check.
* @return True if the url exits in the zim file.
*/
bool urlExists(const string& url) const;
/**
* Check if the zim file has a embedded fulltext index.
*
* @return True if the zim file has a embedded fulltext index
* and is not split (else the fulltext is not accessible).
*/
bool hasFulltextIndex() const;
/**
* Get potential case title variations for a title.
*
* @param title a title.
* @return the list of variantions.
*/
std::vector<std::string> getTitleVariants(const std::string& title) const;
/**
* Get the next suggestion title.
*
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
*
* @param[out] title the title of the suggestion.
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
*
* @return True if zim file have a checksum.
*/
bool canCheckIntegrity() const;
/**
* Check is zim file is corrupted.
*
* @return True if zim file is corrupted.
*/
bool isCorrupted() const;
/**
* Parse a full url into a namespace and url.
*
* @param[in] url The full url ("/N/url").
* @param[out] ns The namespace (N).
* @param[out] title The url (url).
* @return True
*/
bool parseUrl(const string& url, char* ns, string& title) const;
/**
* Return the total size of the zim file.
*
* If zim file is split, return the sum of all parts' size.
*
* @return Size of the size file is KiB.
*/
unsigned int getFileSize() const;
/**
* Get the zim file handler.
*
* @return The libzim file handler.
*/
zim::File* getZimFileHandler() const;
/**
* Get the zim article object associated to a url.
*
* @param[in] url The url of the article.
* @param[out] article The libzim article object.
* @return True if the url is good (article.good()).
*/
bool getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const;
@@ -420,6 +113,7 @@ class Reader
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type currentArticleOffset;
zim::size_type nsACount;
zim::size_type nsICount;
std::string zimFilePath;

View File

@@ -53,149 +53,29 @@ class Result
};
struct SearcherInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
*
* Historically, there are two kind of fulltext index :
* - The legacy one, is the external fulltext index. A directory stored outside
* of the zim file.
* - The new one, a embedded fulltext index in the zim file.
*
* Legacy external fulltext index has to be considered as obsolet format with
* less functionnalities:
* - No multi zim search ;
* - No geo_search ;
* - No suggestions search ;
*
* To reflect this, there is two Search creation "API":
* - One for the external fulltext index, using the constructor taking a
* xapianDirectoryPath) ;
* - One for the embedded fulltext index, using a "empty" constructor and the
* `add_reader` method".
*
* On top of that, the Searcher may (if compiled with ctpp2) be used to
* generate a html page for the search result. This use a template that need a
* humanReaderName. This feature is only used by kiwix-serve and this should be
* move outside of Searcher (and with a better API). If you don't use the html
* rendering (getHtml method), you better should simply ignore the different
* humanReadeableName attributes (or give an empty string).
*/
class Searcher
{
public:
/**
* The default constructor.
*
* @param humanReadableName The global zim's humanReadableName.
* Used to generate pagination links.
*/
Searcher(const string& humanReadableName = "");
/**
* The constructor for legacy external fulltext index.
*
* @param xapianDirectoryPath The path to the external index directory.
* @param reader The reader associated to the external index.
* It will be used retrive the article content or generate
* the snippet.
* @param humanReadableName The humanReadableName for the zim.
*/
Searcher();
Searcher(const string& xapianDirectoryPath,
Reader* reader,
const string& humanReadableName);
~Searcher();
/**
* Add a reader (containing embedded fulltext index) to the search.
*
* @param reader The Reader for the zim containing the fulltext index.
* @param humanReaderName The human readable name of the reader.
* @return true if the reader has been added.
* false if the reader cannot be added (no embedded fulltext index present)
*/
bool add_reader(Reader* reader, const std::string& humanReaderName);
/**
* Start a search on the zim associated to the Searcher.
*
* Search results should be retrived using the getNextResult method.
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void add_reader(Reader* reader, const std::string& humanReaderName);
void search(std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a geographique search.
* The search return result for entry in a disc of center latitude/longitude
* and radius distance.
*
* Search results should be retrived using the getNextResult method.
*
* @param latitude The latitude of the center point.
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a suggestion search.
* The search made depend of the "version" of the embedded index.
* - If the index is newer enough and have a title namespace, the search is
* made in the titles only.
* - Else the search is made on the whole article content.
* In any case, the search is made "partial" (as adding '*' at the end of the query)
*
* @param search The search query.
* @param verbose print some info on stdout if true.
*/
void suggestions(std::string& search, const bool verbose = false);
/**
* Get the next result of a started search.
* This is the method to use to loop hover the search results.
*/
Result* getNextResult();
/**
* Restart the previous search.
* Next call to getNextResult will return the first result.
*/
void restart_search();
/**
* Get a estimation of the result count.
*/
unsigned int getEstimatedResultCount();
/**
* Set protocol prefix.
* Only used by getHtml.
*/
bool setProtocolPrefix(const std::string prefix);
/**
* Set search protocol prefix.
* Only used by getHtml.
*/
bool setSearchProtocolPrefix(const std::string prefix);
void reset();
#ifdef ENABLE_CTPP2
/**
* Generate the html page with the resutls of the search.
*/
string getHtml();
#endif
@@ -218,13 +98,7 @@ class Searcher
unsigned int resultStart;
unsigned int resultEnd;
std::string contentHumanReadableId;
private:
void reset();
};
}
#endif

View File

@@ -1,5 +1,5 @@
project('kiwix-lib', 'cpp',
version : '1.1.1',
version : '1.0.2',
license : 'GPL',
default_options : ['c_std=c11', 'cpp_std=c++11'])
@@ -10,7 +10,7 @@ static_deps = get_option('android') or get_option('default_library') == 'static'
thread_dep = dependency('threads')
libicu_dep = dependency('icu-i18n', static:static_deps)
libzim_dep = dependency('libzim', version : '>=3.2.0', static:static_deps)
libzim_dep = dependency('libzim', version : '>=3.0.0', static:static_deps)
pugixml_dep = dependency('pugixml', static:static_deps)
ctpp2_include_path = ''

View File

@@ -4,13 +4,8 @@ set -e
BUILD_PATH=$(pwd)
echo "javac -d $BUILD_PATH/src/android $@"
javac -d $BUILD_PATH/src/android/ "$@"
javac -d $BUILD_PATH/src/android $1 $2 $3 $4
cd $BUILD_PATH/src/android
echo "javah -jni org.kiwix.kiwixlib"
javah -jni org.kiwix.kiwixlib.JNIKiwix
javah -jni org.kiwix.kiwixlib.JNIKiwixReader
javah -jni org.kiwix.kiwixlib.JNIKiwixSearcher
cd $BUILD_PATH

View File

@@ -1,6 +1,5 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,24 +20,540 @@
#include <jni.h>
#include "org_kiwix_kiwixlib_JNIKiwix.h"
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <string>
#include "common/base64.h"
#include "reader.h"
#include "searcher.h"
#include "unicode/putil.h"
#include "utils.h"
#include <android/log.h>
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__)
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
#include <xapian.h>
#include <zim/article.h>
#include <zim/error.h>
#include <zim/file.h>
#include <zim/zim.h>
/* global variables */
kiwix::Reader* reader = NULL;
kiwix::Searcher* searcher = NULL;
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
/* c2jni type conversion functions */
jboolean c2jni(const bool& val)
{
return val ? JNI_TRUE : JNI_FALSE;
}
jstring c2jni(const std::string& val, JNIEnv* env)
{
return env->NewStringUTF(val.c_str());
}
jint c2jni(const int val)
{
return (jint)val;
}
jint c2jni(const unsigned val)
{
return (unsigned)val;
}
/* jni2c type conversion functions */
bool jni2c(const jboolean& val)
{
return val == JNI_TRUE;
}
std::string jni2c(const jstring& val, JNIEnv* env)
{
return std::string(env->GetStringUTFChars(val, 0));
}
int jni2c(const jint val)
{
return (int)val;
}
/* Method to deal with variable passed by reference */
void setStringObjValue(const std::string& value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;");
env->SetObjectField(obj, objFid, c2jni(value, env));
}
void setIntObjValue(const int value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "I");
env->SetIntField(obj, objFid, value);
}
void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Z");
env->SetIntField(obj, objFid, c2jni(value));
}
/* Kiwix library functions */
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv* env, jobject obj)
{
jstring url;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cUrl = reader->getMainPageUrl();
url = c2jni(cUrl, env);
} catch (...) {
std::cerr << "Unable to get ZIM main page" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return url;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv* env,
jobject obj)
{
jstring id;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cId = reader->getId();
id = c2jni(cId, env);
} catch (...) {
std::cerr << "Unable to get ZIM id" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return id;
}
JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv* env,
jobject obj)
{
jint size;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
int cSize = reader->getFileSize();
size = c2jni(cSize);
} catch (...) {
std::cerr << "Unable to get ZIM file size" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return size;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv* env, jobject obj)
{
jstring creator;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cCreator = reader->getCreator();
creator = c2jni(cCreator, env);
} catch (...) {
std::cerr << "Unable to get ZIM creator" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return creator;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv* env, jobject obj)
{
jstring publisher;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cPublisher = reader->getPublisher();
publisher = c2jni(cPublisher, env);
} catch (...) {
std::cerr << "Unable to get ZIM creator" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return publisher;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv* env,
jobject obj)
{
jstring name;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cName = reader->getName();
name = c2jni(cName, env);
} catch (...) {
std::cerr << "Unable to get ZIM name" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return name;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv* env, jobject obj)
{
jstring favicon;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cContent;
std::string cMime;
reader->getFavicon(cContent, cMime);
favicon
= c2jni(base64_encode(
reinterpret_cast<const unsigned char*>(cContent.c_str()),
cContent.length()),
env);
} catch (...) {
std::cerr << "Unable to get ZIM favicon" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return favicon;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv* env,
jobject obj)
{
jstring date;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cDate = reader->getDate();
date = c2jni(cDate, env);
} catch (...) {
std::cerr << "Unable to get ZIM date" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return date;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv* env, jobject obj)
{
jstring language;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cLanguage = reader->getLanguage();
language = c2jni(cLanguage, env);
} catch (...) {
std::cerr << "Unable to get ZIM language" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return language;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(
JNIEnv* env, jobject obj, jstring url)
{
jstring mimeType;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
std::string cUrl = jni2c(url, env);
try {
std::string cMimeType;
reader->getMimeTypeByUrl(cUrl, cMimeType);
mimeType = c2jni(cMimeType, env);
} catch (...) {
std::cerr << "Unable to get mime-type for url " << cUrl << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return mimeType;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env);
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
delete reader;
}
reader = new kiwix::Reader(cPath);
} catch (...) {
std::cerr << "Unable to load ZIM " << cPath << std::endl;
reader = NULL;
retVal = JNI_FALSE;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(
JNIEnv* env, jobject obj, jstring url, jobject titleObj, jobject mimeTypeObj, jobject sizeObj)
{
/* Default values */
setStringObjValue("", titleObj, env);
setStringObjValue("", mimeTypeObj, env);
setIntObjValue(0, sizeObj, env);
jbyteArray data = env->NewByteArray(0);
/* Retrieve the content */
if (reader != NULL) {
std::string cUrl = jni2c(url, env);
std::string cData;
std::string cTitle;
std::string cMimeType;
unsigned int cSize = 0;
pthread_mutex_lock(&readerLock);
try {
if (reader->getContentByUrl(cUrl, cData, cTitle, cSize, cMimeType)) {
data = env->NewByteArray(cSize);
env->SetByteArrayRegion(
data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
setStringObjValue(cMimeType, mimeTypeObj, env);
setStringObjValue(cTitle, titleObj, env);
setIntObjValue(cSize, sizeObj, env);
}
} catch (...) {
std::cerr << "Unable to get content for url " << cUrl << std::endl;
}
pthread_mutex_unlock(&readerLock);
}
return data;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions(
JNIEnv* env, jobject obj, jstring prefix, jint count)
{
jboolean retVal = JNI_FALSE;
std::string cPrefix = jni2c(prefix, env);
unsigned int cCount = jni2c(count);
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
if (reader->searchSuggestionsSmart(cPrefix, cCount)) {
retVal = JNI_TRUE;
}
}
} catch (...) {
std::cerr << "Unable to search suggestions for pattern " << cPrefix
<< std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion(
JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
if (reader->getNextSuggestion(cTitle)) {
setStringObjValue(cTitle, titleObj, env);
retVal = JNI_TRUE;
}
}
} catch (...) {
std::cerr << "Unable to get next suggestion" << std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle(
JNIEnv* env, jobject obj, jstring title, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle = jni2c(title, env);
std::string cUrl;
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
if (reader->getPageUrlFromTitle(cTitle, cUrl)) {
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
}
}
} catch (...) {
std::cerr << "Unable to get URL for title " << cTitle << std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle(
JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
std::string cTitle = reader->getTitle();
setStringObjValue(cTitle, titleObj, env);
retVal = JNI_TRUE;
}
} catch (...) {
std::cerr << "Unable to get ZIM title" << std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv* env, jobject obj)
{
jstring description;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cDescription = reader->getDescription();
description = c2jni(cDescription, env);
} catch (...) {
std::cerr << "Unable to get ZIM description" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return description;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage(
JNIEnv* env, jobject obj, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cUrl;
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) {
std::string cUrl = reader->getRandomPageUrl();
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
}
} catch (...) {
std::cerr << "Unable to get random page" << std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory(
JNIEnv* env, jobject obj, jstring dirStr)
{
std::string cPath = jni2c(dirStr, env);
Lock l;
pthread_mutex_lock(&readerLock);
try {
u_setDataDirectory(cPath.c_str());
} catch (...) {
std::cerr << "Unable to set data directory " << cPath << std::endl;
}
pthread_mutex_unlock(&readerLock);
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env);
pthread_mutex_lock(&searcherLock);
try {
if (searcher != NULL) {
delete searcher;
}
if (!reader || !reader->hasFulltextIndex()) {
// Use old API (no embedded full text index).
searcher = new kiwix::Searcher(cPath, reader, "");
} else {
// Use the new API. We don't care about the human readable name as
// we don't use it (in android).
searcher = new kiwix::Searcher();
searcher->add_reader(reader, "");
}
} catch (...) {
searcher = NULL;
retVal = JNI_FALSE;
std::cerr << "Unable to load full text index " << cPath << std::endl;
}
pthread_mutex_unlock(&searcherLock);
return retVal;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery(
JNIEnv* env, jclass obj, jstring query, jint count)
{
std::string cQuery = jni2c(query, env);
unsigned int cCount = jni2c(count);
kiwix::Result* p_result;
std::string result;
pthread_mutex_lock(&searcherLock);
try {
if (searcher != NULL) {
searcher->search(cQuery, 0, count);
while ((p_result = searcher->getNextResult())
&& !(p_result->get_title().empty())
&& !(p_result->get_url().empty())) {
result += p_result->get_title() + "\n";
delete p_result;
}
}
} catch (...) {
std::cerr << "Unable to make indexed query " << cQuery << std::endl;
}
pthread_mutex_unlock(&searcherLock);
return env->NewStringUTF(result.c_str());
}

View File

@@ -1,421 +0,0 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixReader.h"
#include "common/base64.h"
#include "reader.h"
#include "utils.h"
/* Kiwix Reader JNI functions */
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
JNIEnv* env, jobject obj, jstring filename)
{
std::string cPath = jni2c(filename, env);
Lock l;
kiwix::Reader* reader = nullptr;
try {
reader = new kiwix::Reader(cPath);
} catch (...) {
std::cerr << "Unable to load ZIM " << cPath << std::endl;
reader = NULL;
}
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
{
Handle<kiwix::Reader>::dispose(env, obj);
}
#define READER (Handle<kiwix::Reader>::getHandle(env, obj))
/* Kiwix library functions */
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj)
{
jstring url;
try {
std::string cUrl = READER->getMainPageUrl();
url = c2jni(cUrl, env);
} catch (...) {
std::cerr << "Unable to get ZIM main page" << std::endl;
}
return url;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getId(JNIEnv* env, jobject obj)
{
jstring id;
try {
std::string cId = READER->getId();
id = c2jni(cId, env);
} catch (...) {
std::cerr << "Unable to get ZIM id" << std::endl;
}
return id;
}
JNIEXPORT jint JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFileSize(JNIEnv* env, jobject obj)
{
jint size;
try {
int cSize = READER->getFileSize();
size = c2jni(cSize);
} catch (...) {
std::cerr << "Unable to get ZIM file size" << std::endl;
}
return size;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getCreator(JNIEnv* env, jobject obj)
{
jstring creator;
try {
std::string cCreator = READER->getCreator();
creator = c2jni(cCreator, env);
} catch (...) {
std::cerr << "Unable to get ZIM creator" << std::endl;
}
return creator;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPublisher(JNIEnv* env, jobject obj)
{
jstring publisher;
try {
std::string cPublisher = READER->getPublisher();
publisher = c2jni(cPublisher, env);
} catch (...) {
std::cerr << "Unable to get ZIM creator" << std::endl;
}
return publisher;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getName(JNIEnv* env, jobject obj)
{
jstring name;
try {
std::string cName = READER->getName();
name = c2jni(cName, env);
} catch (...) {
std::cerr << "Unable to get ZIM name" << std::endl;
}
return name;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFavicon(JNIEnv* env, jobject obj)
{
jstring favicon;
try {
std::string cContent;
std::string cMime;
READER->getFavicon(cContent, cMime);
favicon = c2jni(
base64_encode(reinterpret_cast<const unsigned char*>(cContent.c_str()),
cContent.length()),
env);
} catch (...) {
std::cerr << "Unable to get ZIM favicon" << std::endl;
}
return favicon;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDate(JNIEnv* env, jobject obj)
{
jstring date;
try {
std::string cDate = READER->getDate();
date = c2jni(cDate, env);
} catch (...) {
std::cerr << "Unable to get ZIM date" << std::endl;
}
return date;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getLanguage(JNIEnv* env, jobject obj)
{
jstring language;
try {
std::string cLanguage = READER->getLanguage();
language = c2jni(cLanguage, env);
} catch (...) {
std::cerr << "Unable to get ZIM language" << std::endl;
}
return language;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType(
JNIEnv* env, jobject obj, jstring url)
{
jstring mimeType;
std::string cUrl = jni2c(url, env);
try {
std::string cMimeType;
READER->getMimeTypeByUrl(cUrl, cMimeType);
mimeType = c2jni(cMimeType, env);
} catch (...) {
std::cerr << "Unable to get mime-type for url " << cUrl << std::endl;
}
return mimeType;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent(
JNIEnv* env, jobject obj, jstring url, jobject titleObj, jobject mimeTypeObj, jobject sizeObj)
{
/* Default values */
setStringObjValue("", titleObj, env);
setStringObjValue("", mimeTypeObj, env);
setIntObjValue(0, sizeObj, env);
jbyteArray data = env->NewByteArray(0);
/* Retrieve the content */
std::string cUrl = jni2c(url, env);
std::string cData;
std::string cTitle;
std::string cMimeType;
unsigned int cSize = 0;
try {
if (READER->getContentByUrl(cUrl, cData, cTitle, cSize, cMimeType)) {
data = env->NewByteArray(cSize);
env->SetByteArrayRegion(
data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
setStringObjValue(cMimeType, mimeTypeObj, env);
setStringObjValue(cTitle, titleObj, env);
setIntObjValue(cSize, sizeObj, env);
}
} catch (...) {
std::cerr << "Unable to get content for url " << cUrl << std::endl;
}
return data;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPart(
JNIEnv* env, jobject obj, jstring url, jint offset, jint len, jobject sizeObj)
{
jbyteArray data = env->NewByteArray(0);
setIntObjValue(0, sizeObj, env);
/* Default values */
/* Retrieve the content */
std::string cUrl = jni2c(url, env);
unsigned int cOffset = jni2c(offset);
unsigned int cLen = jni2c(len);
try {
zim::Article article;
READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
if (! article.good()) {
return data;
}
int loopCounter = 0;
while (article.isRedirect() && ++loopCounter < 42) {
article = article.getRedirectArticle();
}
if (loopCounter == 42) {
return data;
}
if (cLen == 0) {
setIntObjValue(article.getArticleSize(), sizeObj, env);
} else if (cOffset+cLen > article.getArticleSize()) {
auto blob = article.getData(cOffset, cLen);
data = env->NewByteArray(cLen);
env->SetByteArrayRegion(
data, 0, cLen, reinterpret_cast<const jbyte*>(blob.data()));
setIntObjValue(cLen, sizeObj, env);
}
} catch (...) {
std::cerr << "Unable to get partial content for url " << cUrl
<< "(" << cOffset << ":" << cLen << ")" << std::endl;
}
return data;
}
JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
JNIEnv* env, jobject obj, jstring url)
{
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair");
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V");
jobject pair = env->NewObject(classPair, midPairinit);
setPairObjValue("", 0, pair, env);
std::string cUrl = jni2c(url, env);
try {
zim::Article article;
READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
if (! article.good()) {
return pair;
}
int loopCounter = 0;
while (article.isRedirect() && ++loopCounter < 42) {
article = article.getRedirectArticle();
}
if (loopCounter == 42) {
return pair;
}
auto part_info = article.getDirectAccessInformation();
setPairObjValue(part_info.first, part_info.second, pair, env);
} catch (...) {
std::cerr << "Unable to locate direct access information for url " << cUrl
<< std::endl;
}
return pair;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
jobject obj,
jstring prefix,
jint count)
{
jboolean retVal = JNI_FALSE;
std::string cPrefix = jni2c(prefix, env);
unsigned int cCount = jni2c(count);
try {
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
retVal = JNI_TRUE;
}
} catch (...) {
std::cerr << "Unable to search suggestions for pattern " << cPrefix
<< std::endl;
}
return retVal;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
jobject obj,
jobject titleObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
try {
if (READER->getNextSuggestion(cTitle)) {
setStringObjValue(cTitle, titleObj, env);
retVal = JNI_TRUE;
}
} catch (...) {
std::cerr << "Unable to get next suggestion" << std::endl;
}
return retVal;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env,
jobject obj,
jstring title,
jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle = jni2c(title, env);
std::string cUrl;
try {
if (READER->getPageUrlFromTitle(cTitle, cUrl)) {
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
}
} catch (...) {
std::cerr << "Unable to get URL for title " << cTitle << std::endl;
}
return retVal;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle(
JNIEnv* env, jobject obj)
{
jstring title;
try {
std::string cTitle = READER->getTitle();
title = c2jni(cTitle, env);
} catch (...) {
std::cerr << "Unable to get ZIM title" << std::endl;
}
return title;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDescription(JNIEnv* env, jobject obj)
{
jstring description;
try {
std::string cDescription = READER->getDescription();
description = c2jni(cDescription, env);
} catch (...) {
std::cerr << "Unable to get ZIM description" << std::endl;
}
return description;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage(
JNIEnv* env, jobject obj, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cUrl;
try {
std::string cUrl = READER->getRandomPageUrl();
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
} catch (...) {
std::cerr << "Unable to get random page" << std::endl;
}
return retVal;
}

View File

@@ -1,124 +0,0 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixSearcher.h"
#include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h"
#include "reader.h"
#include "searcher.h"
#include "utils.h"
#define SEARCHER (Handle<kiwix::Searcher>::getHandle(env, obj))
#define RESULT (Handle<kiwix::Result>::getHandle(env, obj))
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_dispose(JNIEnv* env, jobject obj)
{
Handle<kiwix::Searcher>::dispose(env, obj);
}
/* Kiwix Reader JNI functions */
JNIEXPORT jlong JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNativeHandle(JNIEnv* env,
jobject obj)
{
kiwix::Searcher* searcher = new kiwix::Searcher();
return reinterpret_cast<jlong>(new Handle<kiwix::Searcher>(searcher));
}
/* Kiwix library functions */
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_addReader(
JNIEnv* env, jobject obj, jobject reader)
{
auto searcher = SEARCHER;
searcher->add_reader(*(Handle<kiwix::Reader>::getHandle(env, reader)), "");
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_search(
JNIEnv* env, jobject obj, jstring query, jint count)
{
std::string cquery = jni2c(query, env);
unsigned int ccount = jni2c(count);
SEARCHER->search(cquery, 0, ccount);
}
JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNextResult(JNIEnv* env,
jobject obj)
{
jobject result = nullptr;
kiwix::Result* cresult = SEARCHER->getNextResult();
if (cresult != nullptr) {
jclass resultclass
= env->FindClass("org/kiwix/kiwixlib/JNIKiwixSearcher$Result");
jmethodID ctor = env->GetMethodID(
resultclass, "<init>", "(Lorg/kiwix/kiwixlib/JNIKiwixSearcher;JLorg/kiwix/kiwixlib/JNIKiwixSearcher;)V");
result = env->NewObject(resultclass, ctor, obj, reinterpret_cast<jlong>(new Handle<kiwix::Result>(cresult)), obj);
}
return result;
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_dispose(
JNIEnv* env, jobject obj)
{
Handle<kiwix::Result>::dispose(env, obj);
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getUrl(JNIEnv* env,
jobject obj)
{
try {
return c2jni(RESULT->get_url(), env);
} catch (...) {
return nullptr;
}
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getTitle(JNIEnv* env,
jobject obj)
{
try {
return c2jni(RESULT->get_title(), env);
} catch (...) {
return nullptr;
}
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getSnippet(JNIEnv* env,
jobject obj)
{
return c2jni(RESULT->get_snippet(), env);
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getContent(JNIEnv* env,
jobject obj)
{
return c2jni(RESULT->get_content(), env);
}

View File

@@ -3,25 +3,14 @@ jni_generator = find_program('gen_kiwix.sh')
kiwix_jni = custom_target('jni',
input: ['org/kiwix/kiwixlib/JNIKiwix.java',
'org/kiwix/kiwixlib/JNIKiwixReader.java',
'org/kiwix/kiwixlib/JNIKiwixSearcher.java',
'org/kiwix/kiwixlib/JNIKiwixInt.java',
'org/kiwix/kiwixlib/JNIKiwixString.java',
'org/kiwix/kiwixlib/JNIKiwixBool.java',
'org/kiwix/kiwixlib/JNIKiwixException.java',
'org/kiwix/kiwixlib/Pair.java'],
output: ['org_kiwix_kiwixlib_JNIKiwix.h',
'org_kiwix_kiwixlib_JNIKiwixReader.h',
'org_kiwix_kiwixlib_JNIKiwixSearcher.h',
'org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h'],
'org/kiwix/kiwixlib/JNIKiwixBool.java'],
output: ['org_kiwix_kiwixlib_JNIKiwix.h'],
command:[jni_generator, '@INPUT@']
)
kiwix_sources += [
'android/kiwix.cpp',
'android/kiwixreader.cpp',
'android/kiwixsearcher.cpp',
kiwix_jni]
kiwix_sources += ['android/kiwix.cpp', kiwix_jni]
install_subdir('org', install_dir: 'kiwix-lib/java')
install_subdir('res', install_dir: 'kiwix-lib')

View File

@@ -1,6 +1,5 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -20,12 +19,56 @@
package org.kiwix.kiwixlib;
import org.kiwix.kiwixlib.JNIKiwixReader;
import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixBool;
import org.kiwix.kiwixlib.JNIKiwixInt;
public class JNIKiwix
{
static { System.loadLibrary("kiwix"); }
public native String getMainPage();
public native String getId();
public native String getLanguage();
public native String getMimeType(String url);
public native boolean loadZIM(String path);
public native boolean loadFulltextIndex(String path);
public native byte[] getContent(String url, JNIKiwixString title, JNIKiwixString mimeType, JNIKiwixInt size);
public native boolean searchSuggestions(String prefix, int count);
public native boolean getNextSuggestion(JNIKiwixString title);
public native boolean getPageUrlFromTitle(String title, JNIKiwixString url);
public native boolean getTitle(JNIKiwixString title);
public native String getDescription();
public native String getDate();
public native String getFavicon();
public native String getCreator();
public native String getPublisher();
public native String getName();
public native int getFileSize();
public native int getArticleCount();
public native int getMediaCount();
public native boolean getRandomPage(JNIKiwixString url);
public native void setDataDirectory(String icuDataDir);
public static native String indexedQuery(String db, int count);
}

View File

@@ -1,27 +0,0 @@
/*
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.kiwix.kiwixlib;
public class JNIKiwixException extends Exception
{
public JNIKiwixException(String message) {
super(message);
}
}

View File

@@ -1,127 +0,0 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.kiwix.kiwixlib;
import org.kiwix.kiwixlib.JNIKiwixException;
import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixInt;
import org.kiwix.kiwixlib.JNIKiwixSearcher;
import org.kiwix.kiwixlib.Pair;
public class JNIKiwixReader
{
public native String getMainPage();
public native String getTitle();
public native String getId();
public native String getLanguage();
public native String getMimeType(String url);
public native byte[] getContent(String url,
JNIKiwixString title,
JNIKiwixString mimeType,
JNIKiwixInt size);
/**
* getContentPart.
*
* Get only a part of the content of the article.
* Return a byte array of `len` size starting from offset `offset`.
* Set `size` to the number of bytes read
* (`len` if everything is ok, 0 in case of error).
* If `len` == 0, no bytes are read but `size` is set to the total size of the
* article.
*/
public native byte[] getContentPart(String url,
int offest,
int len,
JNIKiwixInt size);
/**
* getDirectAccessInformation.
*
* Return information giving where the content is located in the zim file.
*
* Some contents (binary content) are stored uncompressed in the zim file.
* Knowing this information, it could be interesting to directly open
* the zim file (or zim part) and directly read the content from it (and so
* bypassing the libzim).
*
* Return a `Pair` (filename, offset) where the content is located.
*
* If the content cannot be directly accessed (content is compressed or zim
* file is cut in the middle of the content), the filename is an empty string
* and offset is zero.
*/
public native Pair getDirectAccessInformation(String url);
public native boolean searchSuggestions(String prefix, int count);
public native boolean getNextSuggestion(JNIKiwixString title);
public native boolean getPageUrlFromTitle(String title, JNIKiwixString url);
public native String getDescription();
public native String getDate();
public native String getFavicon();
public native String getCreator();
public native String getPublisher();
public native String getName();
public native int getFileSize();
public native int getArticleCount();
public native int getMediaCount();
public native boolean getRandomPage(JNIKiwixString url);
public JNIKiwixSearcher search(String query, int count)
{
JNIKiwixSearcher searcher = new JNIKiwixSearcher();
searcher.addKiwixReader(this);
searcher.search(query, count);
return searcher;
}
public JNIKiwixReader(String filename) throws JNIKiwixException
{
nativeHandle = getNativeReader(filename);
if (nativeHandle == 0) {
throw new JNIKiwixException("Cannot open zimfile "+filename);
}
}
public JNIKiwixReader() {
}
public native void dispose();
private native long getNativeReader(String filename);
private long nativeHandle;
}

View File

@@ -1,67 +0,0 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.kiwix.kiwixlib;
import org.kiwix.kiwixlib.JNIKiwixReader;
import java.util.Vector;
public class JNIKiwixSearcher
{
public class Result
{
private long nativeHandle;
private JNIKiwixSearcher searcher;
public Result(long handle, JNIKiwixSearcher _searcher)
{
nativeHandle = handle;
searcher = _searcher;
}
public native String getUrl();
public native String getTitle();
public native String getContent();
public native String getSnippet();
public native void dispose();
}
public JNIKiwixSearcher()
{
nativeHandle = getNativeHandle();
usedReaders = new Vector();
}
public native void dispose();
private native long getNativeHandle();
private long nativeHandle;
private Vector usedReaders;
public native void addReader(JNIKiwixReader reader);
public void addKiwixReader(JNIKiwixReader reader)
{
addReader(reader);
usedReaders.addElement(reader);
};
public native void search(String query, int count);
public native Result getNextResult();
public native boolean hasMoreResult();
}

View File

@@ -1,26 +0,0 @@
/*
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.kiwix.kiwixlib;
public class Pair
{
public String filename;
public int offset;
}

View File

@@ -1,150 +0,0 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef _ANDROID_JNI_UTILS_H
#define _ANDROID_JNI_UTILS_H
#include <jni.h>
#include <pthread.h>
#include <string>
extern pthread_mutex_t globalLock;
inline jfieldID getHandleField(JNIEnv* env, jobject obj)
{
jclass c = env->GetObjectClass(obj);
// J is the type signature for long:
return env->GetFieldID(c, "nativeHandle", "J");
}
class Lock
{
protected:
pthread_mutex_t* lock;
public:
Lock() : lock(&globalLock) { pthread_mutex_lock(lock); }
Lock(const Lock&) = delete;
Lock& operator=(const Lock&) = delete;
Lock(Lock&& other) : lock(&globalLock) { other.lock = nullptr; }
virtual ~Lock()
{
if (lock) {
pthread_mutex_unlock(lock);
}
}
};
template <class T>
class LockedHandle;
template <class T>
class Handle
{
protected:
T* h;
public:
Handle(T* h) : h(h){};
// No destructor. This must and will be handled by dispose method.
static LockedHandle<T> getHandle(JNIEnv* env, jobject obj)
{
jlong handle = env->GetLongField(obj, getHandleField(env, obj));
return LockedHandle<T>(reinterpret_cast<Handle<T>*>(handle));
}
static void dispose(JNIEnv* env, jobject obj)
{
auto lHandle = getHandle(env, obj);
auto handle = lHandle.h;
delete handle->h;
delete handle;
}
friend class LockedHandle<T>;
};
template <class T>
struct LockedHandle : public Lock {
Handle<T>* h;
LockedHandle(Handle<T>* h) : h(h) {}
T* operator->() { return h->h; }
T* operator*() { return h->h; }
operator bool() const { return (h->h != nullptr); }
};
/* c2jni type conversion functions */
inline jboolean c2jni(const bool& val) { return val ? JNI_TRUE : JNI_FALSE; }
inline jstring c2jni(const std::string& val, JNIEnv* env)
{
return env->NewStringUTF(val.c_str());
}
inline jint c2jni(const int val) { return (jint)val; }
inline jint c2jni(const unsigned val) { return (unsigned)val; }
/* jni2c type conversion functions */
inline bool jni2c(const jboolean& val) { return val == JNI_TRUE; }
inline std::string jni2c(const jstring& val, JNIEnv* env)
{
const char* chars = env->GetStringUTFChars(val, 0);
std::string ret(chars);
env->ReleaseStringUTFChars(val, chars);
return ret;
}
inline int jni2c(const jint val) { return (int)val; }
/* Method to deal with variable passed by reference */
inline void setStringObjValue(const std::string& value,
const jobject obj,
JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;");
env->SetObjectField(obj, objFid, c2jni(value, env));
}
inline void setIntObjValue(const int value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "I");
env->SetIntField(obj, objFid, value);
}
inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Z");
env->SetIntField(obj, objFid, c2jni(value));
}
inline void setPairObjValue(const std::string& filename, const int offset,
const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID filenameFid = env->GetFieldID(objClass, "filename", "Ljava/lang/String;");
env->SetObjectField(obj, filenameFid, c2jni(filename, env));
jfieldID offsetFid = env->GetFieldID(objClass, "offset", "I");
env->SetIntField(obj, offsetFid, offset);
}
#endif // _ANDROID_JNI_UTILS_H

View File

@@ -133,130 +133,73 @@ std::string kiwix::encodeDiples(const std::string& str)
return result;
}
// Urlencode
// based on javascript encodeURIComponent()
std::string char2hex(char dec)
{
char dig1 = (dec & 0xF0) >> 4;
char dig2 = (dec & 0x0F);
if (0 <= dig1 && dig1 <= 9) {
dig1 += 48; // 0,48inascii
}
if (10 <= dig1 && dig1 <= 15) {
dig1 += 97 - 10; // a,97inascii
}
if (0 <= dig2 && dig2 <= 9) {
dig2 += 48;
}
if (10 <= dig2 && dig2 <= 15) {
dig2 += 97 - 10;
}
std::string r;
r.append(&dig1, 1);
r.append(&dig2, 1);
return r;
}
std::string kiwix::urlEncode(const std::string& c)
{
std::string escaped = "";
int max = c.length();
for (int i = 0; i < max; i++) {
if ((48 <= c[i] && c[i] <= 57) || // 0-9
(65 <= c[i] && c[i] <= 90)
|| // abc...xyz
(97 <= c[i] && c[i] <= 122)
|| // ABC...XYZ
(c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')'
|| c[i] == '\'')) {
escaped.append(&c[i], 1);
} else {
escaped.append("%");
escaped.append(char2hex(c[i])); // converts char 255 to string "ff"
}
}
return escaped;
}
#endif
/* urlEncode() based on javascript encodeURI() &
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
bool isReservedUrlChar(char c)
static char charFromHex(std::string a)
{
switch (c) {
case ';':
case ',':
case '/':
case '?':
case ':':
case '@':
case '&':
case '=':
case '+':
case '$':
return true;
default:
return false;
}
std::istringstream Blat(a);
int Z;
Blat >> std::hex >> Z;
return char(Z);
}
bool needsEscape(char c, bool encodeReserved)
std::string kiwix::urlDecode(const std::string& originalUrl)
{
if (c >= 'a' && c <= 'z')
return false;
if (c >= 'A' && c <= 'Z')
return false;
if (c >= '0' && c <= '9')
return false;
if (isReservedUrlChar(c))
return encodeReserved;
switch (c) {
case '-':
case '_':
case '.':
case '!':
case '~':
case '*':
case '\'':
case '(':
case ')':
return false;
std::string url = originalUrl;
std::string::size_type pos = 0;
while ((pos = url.find('%', pos)) != std::string::npos
&& pos + 2 < url.length()) {
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
++pos;
}
return true;
}
int hexToInt(char c) {
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'A': case 'a': return 10;
case 'B': case 'b': return 11;
case 'C': case 'c': return 12;
case 'D': case 'd': return 13;
case 'E': case 'e': return 14;
case 'F': case 'f': return 15;
default: return -1;
}
}
std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {
if (!needsEscape(*it, encodeReserved)) {
os << *it;
} else {
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
}
}
return os.str();
}
std::string kiwix::urlDecode(const std::string& value, bool component)
{
std::ostringstream os;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {
// If there aren't enough characters left for this to be a
// valid escape code, just use the character and move on
if (it > value.end() - 3) {
os << *it;
continue;
}
if (*it == '%') {
char hi = *(++it);
char lo = *(++it);
int iHi = hexToInt(hi);
int iLo = hexToInt(lo);
if (iHi < 0 || iLo < 0) {
// Invalid escape sequence
os << '%' << hi << lo;
continue;
}
char c = (char)(iHi << 4 | iLo);
if (!component && isReservedUrlChar(c)) {
os << '%' << hi << lo;
} else {
os << c;
}
} else {
os << *it;
}
}
return os.str();
return url;
}
/* Split string in a token array */

View File

@@ -243,7 +243,7 @@ bool Manager::setCurrentBookId(const string id)
return true;
}
string Manager::getCurrentBookId() const
string Manager::getCurrentBookId()
{
return library.current.empty() ? "" : library.current.top();
}
@@ -495,6 +495,11 @@ bool Manager::setBookIndex(const string id,
return false;
}
bool Manager::setBookIndex(const string id, const string path)
{
return this->setBookIndex(id, path, XAPIAN);
}
bool Manager::setBookPath(const string id, const string path)
{
std::vector<kiwix::Book>::iterator itr;

View File

@@ -80,6 +80,7 @@ Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
this->firstArticleOffset
= this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->currentArticleOffset = this->firstArticleOffset;
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
this->zimFilePath = zimFilePath;
@@ -101,6 +102,11 @@ zim::File* Reader::getZimFileHandler() const
{
return this->zimFileHandler;
}
/* Reset the cursor for GetNextArticle() */
void Reader::reset()
{
this->currentArticleOffset = this->firstArticleOffset;
}
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
{
std::map<const std::string, unsigned int> counters;

View File

@@ -17,9 +17,6 @@
* MA 02110-1301, USA.
*/
#include <cmath>
#include "searcher.h"
#include "kiwixlib-resources.h"
#include "reader.h"
@@ -36,8 +33,6 @@
using namespace CTPP;
#endif
#define MAX_SEARCH_LEN 140
namespace kiwix
{
class _Result : public Result
@@ -88,17 +83,17 @@ Searcher::Searcher(const string& xapianDirectoryPath,
resultCountPerPage(0),
estimatedResultCount(0),
resultStart(0),
resultEnd(0),
contentHumanReadableId(humanReadableName)
resultEnd(0)
{
loadICUExternalTables();
if (!reader || !reader->hasFulltextIndex()) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
}
this->contentHumanReadableId = humanReadableName;
this->humanReaderNames.push_back(humanReadableName);
}
Searcher::Searcher(const std::string& humanReadableName)
Searcher::Searcher()
: internal(new SearcherInternal()),
searchPattern(""),
protocolPrefix("zim://"),
@@ -106,8 +101,7 @@ Searcher::Searcher(const std::string& humanReadableName)
resultCountPerPage(0),
estimatedResultCount(0),
resultStart(0),
resultEnd(0),
contentHumanReadableId(humanReadableName)
resultEnd(0)
{
loadICUExternalTables();
}
@@ -118,14 +112,10 @@ Searcher::~Searcher()
delete internal;
}
bool Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
{
if (!reader->hasFulltextIndex()) {
return false;
}
this->readers.push_back(reader);
this->humanReaderNames.push_back(humanReadableName);
return true;
}
/* Search strings in the database */
@@ -151,9 +141,9 @@ void Searcher::search(std::string& search,
if (resultStart != resultEnd) {
/* Avoid big researches */
this->resultCountPerPage = resultEnd - resultStart;
if (this->resultCountPerPage > MAX_SEARCH_LEN) {
resultEnd = resultStart + MAX_SEARCH_LEN;
this->resultCountPerPage = MAX_SEARCH_LEN;
if (this->resultCountPerPage > 70) {
resultEnd = resultStart + 70;
this->resultCountPerPage = 70;
}
/* Perform the search */
@@ -170,9 +160,7 @@ void Searcher::search(std::string& search,
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
if ( (*current)->hasFulltextIndex() ) {
zims.push_back((*current)->getZimFileHandler());
}
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_query(unaccentedSearch);
@@ -186,68 +174,11 @@ void Searcher::search(std::string& search,
return;
}
void Searcher::geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
/* If resultEnd & resultStart inverted */
if (resultStart > resultEnd) {
resultEnd += resultStart;
resultStart = resultEnd - resultStart;
resultEnd -= resultStart;
}
/* Try to find results */
if (resultStart == resultEnd) {
return;
}
if (internal->_xapianSearcher) {
return;
}
/* Avoid big researches */
this->resultCountPerPage = resultEnd - resultStart;
if (this->resultCountPerPage > MAX_SEARCH_LEN) {
resultEnd = resultStart + MAX_SEARCH_LEN;
this->resultCountPerPage = MAX_SEARCH_LEN;
}
/* Perform the search */
std::ostringstream oss;
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
this->searchPattern = oss.str();
this->resultStart = resultStart;
this->resultEnd = resultEnd;
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_query("");
search->set_georange(latitude, longitude, distance);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
void Searcher::restart_search()
{
if (internal->_xapianSearcher) {
internal->_xapianSearcher->restart_search();
} else if (internal->_search) {
} else {
internal->current_iterator = internal->_search->begin();
}
}
@@ -256,8 +187,7 @@ Result* Searcher::getNextResult()
{
if (internal->_xapianSearcher) {
return internal->_xapianSearcher->getNextResult();
} else if (internal->_search &&
internal->current_iterator != internal->_search->end()) {
} else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator);
internal->current_iterator++;
return result;
@@ -368,12 +298,7 @@ int _Result::get_readerIndex()
string Searcher::getHtml()
{
SimpleVM oSimpleVM(
1024, //iIMaxFunctions (default value)
4096, //iIMaxArgStackSize (default value)
4096, //iIMaxCodeStackSize (default value)
10240 * 2 //iIMaxSteps (default*2)
);
SimpleVM oSimpleVM;
// Fill data
CDT oData;
@@ -442,7 +367,7 @@ string Searcher::getHtml()
oData["resultRange"] = this->resultCountPerPage;
oData["resultLastPageStart"]
= this->estimatedResultCount > this->resultCountPerPage
? std::round(this->estimatedResultCount / this->resultCountPerPage) * this->resultCountPerPage
? this->estimatedResultCount - this->resultCountPerPage
: 0;
oData["protocolPrefix"] = this->protocolPrefix;
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;

View File

@@ -29,7 +29,7 @@ esac
sudo apt-get update -qq
sudo apt-get install -qq python3-pip ${PACKAGES}
sudo pip3 install meson==0.43.0
sudo pip3 install meson
# Ninja
cd $HOME