Compare commits

..

1 Commits

Author SHA1 Message Date
Matthieu Gautier
29c0551aaf Run the CI on pull request.
This is needed to have the CI run on pull request created from of fork
respository.

We don't need to launch the CI on push :
- It will also be build on pull_request.
- The CI does check code only (no publication,...)
2020-08-12 11:17:08 +02:00
261 changed files with 33655 additions and 19428 deletions

27
.github/move.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
# Configuration for Move Issues - https://github.com/dessant/move-issues
# Delete the command comment when it contains no other content
deleteCommand: true
# Close the source issue after moving
closeSourceIssue: true
# Lock the source issue after moving
lockSourceIssue: false
# Mention issue and comment authors
mentionAuthors: true
# Preserve mentions in the issue content
keepContentMentions: true
# Move labels that also exist on the target repository
moveLabels: true
# Set custom aliases for targets
# aliases:
# r: repo
# or: owner/repo
# Repository to extend settings from
# _extends: repo

View File

@@ -1,31 +1,27 @@
name: CI
name: Check PR
on:
push:
branches:
- main
pull_request:
on: [pull_request]
jobs:
Macos:
runs-on: macos-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Setup python 3.9
uses: actions/checkout@v1
- name: Setup python 3.5
uses: actions/setup-python@v1
with:
python-version: '3.9'
python-version: '3.5'
- name: Install packages
run: |
brew update
brew install gcovr pkg-config ninja || brew link --overwrite python
uses: mstksg/get-package@v1
with:
brew: gcovr pkg-config ninja
- name: Install python modules
run: pip3 install meson==0.49.2 pytest
- name: Install deps
shell: bash
run: |
ARCHIVE_NAME=deps2_osx_native_dyn_libkiwix.tar.xz
ARCHIVE_NAME=deps2_osx_native_dyn_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C $HOME
- name: Compile
shell: bash
@@ -41,8 +37,17 @@ jobs:
export LD_LIBRARY_PATH=$HOME/BUILD_native_dyn/INSTALL/lib:$HOME/BUILD_native_dyn/INSTALL/lib64
cd build
meson test --verbose
ninja coverage
env:
SKIP_BIG_MEMORY_TEST: 1
- name: Publish coverage
shell: bash
run: |
curl https://codecov.io/bash -o codecov.sh
bash codecov.sh -n osx_native_dyn -Z
rm codecov.sh
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
Linux:
strategy:
@@ -51,6 +56,7 @@ jobs:
name:
- native_static
- native_dyn
- native_dyn_bionic
- android_arm
- android_arm64
- win32_static
@@ -58,34 +64,42 @@ jobs:
include:
- name: native_static
target: native_static
image_variant: bionic
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: native_dyn
target: native_dyn
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: native_dyn_bionic
target: native_dyn
image_variant: bionic
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm
target: android_arm
image_variant: bionic
lib_postfix: '/arm-linux-androideabi'
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm64
target: android_arm64
image_variant: bionic
lib_postfix: '/aarch64-linux-android'
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: win32_static
target: win32_static
image_variant: f35
image_variant: f31
lib_postfix: '64'
- name: win32_dyn
target: win32_dyn
image_variant: f35
image_variant: f31
lib_postfix: '64'
env:
HOME: /home/runner
runs-on: ubuntu-latest
container:
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-31"
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-26"
steps:
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
- name: Checkout code
shell: python
run: |
@@ -95,13 +109,13 @@ jobs:
'git', 'clone',
'https://github.com/${{github.repository}}',
'--depth=1',
'--branch', '${{ github.head_ref || github.ref_name }}'
'--branch', '${{steps.extract_branch.outputs.branch}}'
]
check_call(command, cwd=environ['HOME'])
- name: Install deps
shell: bash
run: |
ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_libkiwix.tar.xz
ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C /home/runner
- name: Compile
shell: bash
@@ -118,9 +132,9 @@ jobs:
MESON_OPTION="$MESON_OPTION --cross-file $HOME/BUILD_${{matrix.target}}/meson_cross_file.txt"
fi
if [[ "${{matrix.target}}" =~ android_.* ]]; then
MESON_OPTION="$MESON_OPTION -Dstatic-linkage=true"
MESON_OPTION="$MESON_OPTION -Dandroid=true"
fi
cd $HOME/libkiwix
cd $HOME/kiwix-lib
meson . build ${MESON_OPTION}
cd build
ninja
@@ -131,7 +145,7 @@ jobs:
if: startsWith(matrix.target, 'native_')
shell: bash
run: |
cd $HOME/libkiwix/build
cd $HOME/kiwix-lib/build
meson test --verbose
ninja coverage
env:
@@ -140,10 +154,10 @@ jobs:
- name: Publish coverage
shell: bash
run: |
cd $HOME/libkiwix
cd $HOME/kiwix-lib
curl https://codecov.io/bash -o codecov.sh
bash codecov.sh -n "${OS_NAME}_${{matrix.target}}" -Z
rm codecov.sh
if: startsWith(matrix.target, 'native_')
if: startsWith(matrix.target, 'native_') && matrix.image_variant == 'xenial'
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -7,11 +7,7 @@ jobs:
strategy:
fail-fast: false
matrix:
distro:
- ubuntu-kinetic
- ubuntu-jammy
- ubuntu-focal
- ubuntu-bionic
distro: [ubuntu-groovy, ubuntu-focal]
steps:
- uses: actions/checkout@v2
@@ -34,18 +30,10 @@ jobs:
email: release+launchpad@kiwix.org
distro: ${{ matrix.distro }}
- uses: legoktm/gh-action-build-deb@ubuntu-kinetic
if: matrix.distro == 'ubuntu-kinetic'
name: Build package for ubuntu-kinetic
id: build-ubuntu-kinetic
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-jammy
if: matrix.distro == 'ubuntu-jammy'
name: Build package for ubuntu-jammy
id: build-ubuntu-jammy
- uses: legoktm/gh-action-build-deb@ubuntu-groovy
if: matrix.distro == 'ubuntu-groovy'
name: Build package for ubuntu-groovy
id: build-ubuntu-groovy
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
@@ -58,14 +46,6 @@ jobs:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-bionic
if: matrix.distro == 'ubuntu-bionic'
name: Build package for ubuntu-bionic
id: build-ubuntu-bionic
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: actions/upload-artifact@v2
with:
name: Packages for ${{ matrix.distro }}
@@ -73,8 +53,8 @@ jobs:
- uses: legoktm/gh-action-dput@master
name: Upload dev package
# Only upload on pushes to git default branch
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' && startswith(matrix.distro, 'ubuntu-')
# Only upload on pushes to master
if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' && startswith(matrix.distro, 'ubuntu-')
with:
gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
repository: ppa:kiwixteam/dev

3
.gitignore vendored
View File

@@ -4,6 +4,3 @@ subprojects/googletest-release*
*.class
build/
.vscode/
builddir/
.cache/
.clangd/

190
ChangeLog
View File

@@ -1,193 +1,3 @@
libkiwix 12.0.0
===============
* [API Break] Remove wrapper around libzim (@mgautierfr #789)
* Allow kiwix-serve to use custom resource files (@veloman-yunkan #779)
* Properly handle searchProtocolPrefix when rendering search result (@veloman-yunkan #823)
* Prevent search on multi language content (@veloman-yunkan #838)
* Use new `zim::Archive::getMediaCount` from libzim (@mgautierfr #836)
* Catalog:
- Include tags in free text catalog search (@veloman-yunkan #802)
- Illustration's url is based on book's uuid (@veloman-yunkan #804)
- Cleanup of the opds-dumper (@veloman-yunkan #829)
- Allow filtering of catalog content using multiple languages (@veloman-yunkan #841)
- Make opds-dumper respect the namemapper (@mgautierfr #837)
* Server:
- Correctly handle `\` in suggestion json generation (@veloman-yunkan #843)
- Better http caching (@veloman-yunkan #833)
- Make `/suggest` endpoint thread-safe (@veloman-yunkan #834)
- Better redirection of main page (@veloman-yunkan #827)
- Remove jquery (@mgautierfr @juuz0 #796)
- Better Viewer of zim content :
. Introduce `/content` endpoints (@veloman-yunkan #806)
. Switch to iframe based content viewer (@veloman-yunkan #716)
- Optimised design of the welcome page:
. Alignement (@juuz0 @kelson42 #786)
. Exit download modal on pressing escape key (@juzz0 #800)
. Add favicon for different devices (@juzz0 #805)
. Fix auto hidding of the toolbar (@veloman-yunkan #821)
. Allow user to filter books by tags in the front page (@juuz0 #711)
* CI :
- Trigger CI on pull_request (@kelson42 #791)
- Drop Ubuntu Impish packaging (@legoktm #825)
- Add Ubuntu Kinetic packaging (@legoktm #801)
* Testing:
- Test ICULanguageInfo (@veloman-yunkan #795)
- Introduce fake `test` language to test i18n (@veloman-yunkan #848)
* Fix documentation (@kelson42 #816)
* Udpate translation (#787 #839 #847)
libkiwix 11.0.0
===============
* [server] Add support for internationalization (@veloman-yunkan #679)
* [server] Use gzip compression instead of deflat (mgautierfr #757)
* [server] Version the static resources. This allow better invalidating
browser cache when resources are changed (@veloman-yunkan #712)
* [server|front] Use integer to query the host for page length (@juuz0 #772)
* [server] Improve multizim search API:
- Improvement of the cache system
- Better API to select on which books to search in.
- SysAdmin is now able to limit the number of book we search in for a multizim search
* [server] Introduce a opensearch API for multizim fulltext search
* [wrapper] Remove java wrapper
* Testing:
- Testing of search result pages content (@veloman-yunkan #765)
- Better testing structure of xml search result (@veloman-yunkan #780)
libkiwix 10.1.1
===============
* Correctly detect the number of article for older zims (<=6) (@mgautier #743)
* [server] Fix fulltext search (@mgautierfr #724)
* [server][internal] New way to build Error message (@veloman-yunkan #732 #738 #744)
* Fix CI (@mgautierfr #736)
libkiwix 10.1.0
===============
This release is an important one as it fixes a Xss vulnerability introduced
in libkiwix 10.0.0
* [SECURITY] Fix a Xss attack vulnerability (introduced in 10.0.0) (@juuz0 #721)
* [server] Add a option to set a limit on the number of connexion per IP (@kelson42 #700)
* [server] Do not display a lang tag in the UI if the book has no language (@juuz0 #706)
* [server] Add the book title associated to a search results (@thavelick #705, @mgautierfr #718)
* Add `dc:issued` to opds output stream (@veloman-yunkan #715)
* Add handling of several languages not provided by ICU (@juuz0 #701)
* [server] Add a caching system for search and suggestion (@maneeshpm #620)
* Fix cross-compilation (@kelson42 #703)
* Add unit-testing of suggestions and error pages (@veloman-yunkan #709 #710 #727)
* Better testing system of html response (@veloman-yunkan #725)
libkiwix 10.0.1
===============
* [server] The catalog search interpret `count=0` as no limit.
This was the case for a long time. This was changed unintentionally
(@veloman-yunkan #686)
* [server] Correctly generere a human friendly title in the server frontend.
(@juuz0 #687, @kelson42 #689)
* [server] Fix download button if there is no url do download from.
(@juuz0 #691)
* Add non-minified isotope.pkdg.js
Needed for debian packaging as we need the source and minified version is
not the source (@legoktm #693)
* [server] Add a tooltip with the full language for the lang tag.
* CI fixes (@kelson42 @legoktm)
libkiwix 10.0.0
===============
This release is huge release.
The project has been renamed to libkiwix, it is more coherent with the library name.
* Server front page :
- Use js in the front page to display the available book,
using the OPDS stream as source. The front page is now populated only with
the visible books and user can search for books. (@MananJethwany #530, #541, #534)
(@kelson42 #628)
- Revamp css (@MananJethwany #559)
- Correctly Convert 3iso language code to 2iso (@juuz0 #672)
* Server suggestions search :
- Add pagination for suggestion search (@maneeshpm #591)
- Fix suggestion system (@MananJethwany #498)
- Provide the kind and path (when adapted) to the suggestion answer (@MananJethwany #464)
- The displayed suggestion have now highligth on the searched terms (@maneeshpm #505)
- Properly handle html encoding of suggestions (@veloman-yunkan #458)
* Server improvements :
- Remove meta endpoints (@mgautier #669)
- Add raw endpoints to get the raw content of a zim (@mgautierfr #646)
- Add details on 404 error pages (@soumyankar #490)
- Fix headbar insertion when `<head>` tag has attributes (@kelson42 #440)
- Better headbar insertion (after charset definition) (@kelson42 #442)
* New OPDS Stream v2 :
- Add a list of categories (@veloman-yunkan)
- Support for partial entries (@veloman-yunkan #602)
- Support multiple icons size in the OPDS stream (@veloman-yunkan #577 #630)
- Add language endpoint to catalog (@veloman-yunkan #553)
- Add illustration API to get the illustration of a book (@mgautierfr #645)
- OPDS search can now filter books by category (@veloman-yunkan #459)
* Library improvements :
- Allow the libray to be live reloaded when the library.xml changes (@veloman-yunkan #636)
- Properly handle removing of book from the library (@veloman-yunkan #485)
- Use xapian to search for books in the library (@veloman-yunkan #460, #488)
* Added methods/functions :
- Fix `fileExist` and introduce `fileReadable` (@juuz0 #668)
- Add `getVersions` and `printVersions` functions (@kelson42 #665)
- Add `getNetworkInterfaces()` and `getBestPublicIP()` functions (@juuz0 #622)
- Add `get_zimid()` method to the search result (@maneeshpm #510)
* Various improvements :
- Better secret value for aria2c rpc (@juuz0 #666)
- Avoid duplicated Archive/Reader in the Searcher (@veloman-yunkan #648)
- Add basic documentation (@mgautierfr #640)
- Do not use Reader internally (@maneeshpm #536 #576)
- Remove dependency headers from our public headers (@mgautierfr #574)
- Downloader now don't write metalink on the filesystem (@kelson42 #502)
- Support opening a zim file using a fd (@veloman-yukan #429)
- Use C++11 std::thread instead of pthread (@mgautierfr #445)
- [READER] Do not crash if zim file has no `Counter` metadata (@mgautierfr #449)
- Ensure libzim dependency is compiled with xapian (@mgautierfr #434)
- Support video and audio mimetype in `getMediaCount` (@kelson42 #439)
- Better parsing of the counterMap (@kelson42 #437)
- Adapt libkiwix to libzim 7.0.0 (@mgautierfr #428)
- Remove deprecated methods (@mgautierfr)
- CI: Build package for Ubuntu Hirsute, Impish and Jammy (@legoktm #431 #568) and remove Groovy
- Fix compilation for FreeBSD (@swills g#432)
- Many fixes and improvement (@MananJethwany, @maneeshpm, @veloman-yunkan, @mgautierfr)
kiwix-lib 9.4.1
===============
* Fix `M/Counter` parsing.
* [SERVER] Adjust body padding-top for taskbar
* Fix potential crash when stoping a server not started.
* Various fix in build system and the CI.
kiwix-lib 9.4.0
===============
* [SERVER] Make the headers handling case insensitive.
* [SERVER] Make server answer 204 http status code for empty search
* [PACKAGING] Made CI build deb packages.
* [SERVER] Add a way to prevent taskbar and external link bloquer at article
level.
* Fix meson file to be compatible with meson 0.45
* [SERVER] Update search requests to use pageStart/pageLength instead of
pageStart/pageEnd arguments.
* [SERVER] Set a fixed favicon size in the main page.
* [SERVER] Refactor the response system code to better handling future new
libzim api.
* Fix segmentation fault around exchange with aria2 process making
kiwix-desktop crash at exit.
kiwix-lib 9.3.1
===============

148
README.md
View File

@@ -1,16 +1,15 @@
Libkiwix
========
Kiwix library
=============
The Libkiwix provides the [Kiwix](https://kiwix.org) software suite
core. It contains the code shared by all Kiwix ports (Windows,
The Kiwix library provides the [Kiwix](https://kiwix.org) software
suite core. It contains the code shared by all Kiwix ports (Windows,
GNU/Linux, macOS, Android, iOS, ...).
[![Release](https://img.shields.io/github/v/tag/kiwix/libkiwix?label=release&sort=semver)](https://download.kiwix.org/release/libkiwix/)
[![Repositories](https://img.shields.io/repology/repositories/libkiwix?label=repositories)](https://github.com/kiwix/libkiwix/wiki/Repology)
[![Build Status](https://github.com/kiwix/libkiwix/workflows/CI/badge.svg?query=branch%3Amain)](https://github.com/kiwix/libkiwix/actions?query=branch%3Amain)
[![Doc](https://readthedocs.org/projects/libkiwix/badge/?style=flat)](https://libkiwix.readthedocs.org/en/latest/?badge=latest)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/libkiwix/badge)](https://www.codefactor.io/repository/github/kiwix/libkiwix)
[![Codecov](https://codecov.io/gh/kiwix/libkiwix/branch/main/graph/badge.svg)](https://codecov.io/gh/kiwix/libkiwix)
[![Download](https://api.bintray.com/packages/kiwix/kiwix/kiwixlib/images/download.svg)](https://bintray.com/kiwix/kiwix/kiwixlib/_latestVersion)
[![AUR version](https://img.shields.io/aur/version/kiwix-lib)](https://aur.archlinux.org/packages/kiwix-lib/)
[![Build Status](https://github.com/kiwix/kiwix-lib/workflows/CI/badge.svg?query=branch%3Amaster)](https://github.com/kiwix/kiwix-lib/actions?query=branch%3Amaster)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/kiwix-lib/badge)](https://www.codefactor.io/repository/github/kiwix/kiwix-lib)
[![Codecov](https://codecov.io/gh/kiwix/kiwix-lib/branch/master/graph/badge.svg)](https://codecov.io/gh/kiwix/kiwix-lib)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
Disclaimer
@@ -18,35 +17,30 @@ Disclaimer
This document assumes you have a little knowledge about software
compilation. If you experience difficulties with the dependencies or
with the Libkiwix compilation itself, we recommend to have a look to
[kiwix-build](https://github.com/kiwix/kiwix-build).
with the Kiwix libary compilation itself, we recommend to have a look
to [kiwix-build](https://github.com/kiwix/kiwix-build).
Preamble
--------
Although the Libkiwix can be (cross-)compiled on/for many sytems, the
following documentation explains how to do it on POSIX ones. It is
primarly thought for GNU/Linux systems and has been tested on recent
releases of Ubuntu and Fedora.
Although the Kiwix library can be (cross-)compiled on/for many
sytems, the following documentation explains how to do it on POSIX
ones. It is primarly thought for GNU/Linux systems and has been tested
on recent releases of Ubuntu and Fedora.
Dependencies
------------
The Libkiwix relies on many third party software libraries. They are
prerequisites to the Libkiwix compilation. Following libraries need to
be available:
The Kiwix library relies on many third parts software libraries. They
are prerequisites to the Kiwix library compilation. Following
libraries need to be available:
* [ICU](https://site.icu-project.org/) (package `libicu-dev` on Ubuntu)
* [ZIM](https://openzim.org/) (package `libzim-dev` on Ubuntu)
* [Pugixml](https://pugixml.org/) (package `libpugixml-dev` on Ubuntu)
* [Mustache](https://github.com/kainjow/Mustache) (Just copy the
header `mustache.hpp` somewhere it can be found by the compiler and/or
set CPPFLAGS with correct `-I` option). Use Mustache version 4.1 or above.
* [Libcurl](https://curl.se/libcurl) (`libcurl4-gnutls-dev`, `libcurl4-nss-dev` or `libcurl4-openssl-dev` on Ubuntu)
* [Microhttpd](https://www.gnu.org/software/libmicrohttpd) (package `libmicrohttpd-dev` on Ubuntu)
* [Zlib](https://zlib.net/) (package `zlib1g-dev` on Ubuntu)
To test the code:
* [Google Test](https://github.com/google/googletest) (package `googletest` on Ubuntu)
set CPPFLAGS with correct `-I` option). Use Mustache version 3 only.
The following dependency needs to be available at runtime:
* [Aria2](https://aria2.github.io/) (package `aria2` on Ubuntu)
@@ -58,12 +52,12 @@ In the worse case, you will have to download and compile bleeding edge
version by hand.
If you want to install these dependencies locally, then use the
`libkiwix` directory as install prefix.
`kiwix-lib` directory as install prefix.
Environment
-------------
The Libkiwix builds using [Meson](https://mesonbuild.com/) version
The Kiwix library builds using [Meson](https://mesonbuild.com/) version
0.45 or higher. Meson relies itself on Ninja, pkg-config and few other
compilation tools.
@@ -79,7 +73,7 @@ section.
Compilation
-----------
Once all dependencies are installed, you can compile the Libkiwix
Once all dependencies are installed, you can compile the Kiwix library
with:
```bash
meson . build
@@ -87,47 +81,12 @@ ninja -C build
```
By default, it will compile dynamic linked libraries. All binary files
will be created in the `build` directory created automatically by
will be created in the "build" directory created automatically by
Meson. If you want statically linked libraries, you can add
`--default-library=static` option to the Meson command.
Depending of you system, `ninja` may be called `ninja-build`.
The android wrapper uses deprecated methods of libkiwix so it cannot be compiled
with `werror=true` (the default). So you must pass `-Dwerror=false` to meson:
```bash
meson . build -Dwrapper=android -Dwerror=false
ninja -C build
```
Static files compilation
------------------------
Libkiwix has a few static files 'compiled' within the binary
code. This is mostly Javascript/HTML/pictures necessary for the HTTP
daemon.
These static files are available in the `static` directory and are
compiled by custom Python code available in this repository `scripts`
directory. This happens automatically at compilation time without any
additional command to run.
To avoid HTTP caching issues, the URLs (to the static content) are
appended with a `cacheid` parameter (this is called "cache
busting"). This `cacheid` value derived from the
[sha1sum](https://en.wikipedia.org/wiki/Sha1sum) of each targeted
static file. As a consequence, each time you change a static file, the
corresponding `cacheid` value will change.
To properly test this feature, this `cacheid` needs to be added
manually to the automated tests and has to be commited. After
modifying the needed static file, [run the automated
tests](#Testing). They will fail, but the inspection of the testing
log will give you the new `cacheid` value(s). Finally update
`test/server.cpp` with the appropriate `cacheid` value(s) which have
changed.
Testing
-------
@@ -140,7 +99,7 @@ meson test
Installation
------------
If you want to install the Libkiwix and the headers you just have
If you want to install the Kiwix library and the headers you just have
compiled on your system, here we go:
```bash
ninja -C build install
@@ -151,7 +110,7 @@ where you want to install the libraries. After the installation
succeeded, you may need to run `ldconfig` (as `root`).
Uninstallation
--------------
------------
If you want to uninstall the Kiwix library:
```bash
@@ -161,55 +120,6 @@ ninja -C build uninstall
Like for the installation, you might need to run the command as `root`
(or using `sudo`).
Custom Index Page
-----------------
to use custom welcome page mention `customIndexPage` argument in `kiwix::internalServer()` or use `kiwix::server->setCustomIndexTemplate()`.
(note - while using custom html file please mention all external links as absolute path.)
to create a HTML template with custom JS you need to have a look at various OPDS based endpoints as mentioned [here](https://wiki.kiwix.org/wiki/OPDS) to load books.
To use JS provided by kiwix-serve you can use the following template to start with ->
```
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title><-- Custom Tittle --></title>
<script src="{{root}}/skin/isotope.pkgd.min.js" defer></script>
<script src="{{root}}/skin/iso6391To3.js"></script>
<script type="text/javascript" src="{{root}}/skin/index.js" defer></script>
</head>
<body>
</body>
</html>
```
- To get books listed using `index.js` add - `<div class="book__list"></div>` under body tag.
- To get number of books listed add - `<h3 class="kiwixHomeBody__results"></h3>` under body tag.
- To add language select box add - `<select id="languageFilter"></select>` under body tag.
- To add language select box add - `<select id="categoryFilter"></select>` under body tag.
- To add search box for books use following form -
```
<form id='kiwixSearchForm'>
<input type="text" name="q" placeholder="Search" id="searchFilter" class='kiwixSearch filter'>
<input type="submit" class="kiwixButton" value="Search"/>
</form>
```
If you compile manually Libmicrohttpd, you might need to compile it
without GNU TLS, a bug here will empeach further compilation
otherwise.
If the compilation still fails, you might need to get a more recent
version of a dependency than the one packaged by your Linux
distribution. Try then with a source tarball distributed by the
problematic upstream project or even directly from the source code
repository.
Troubleshooting
---------------
@@ -232,6 +142,12 @@ cp ninja ../bin
cd ..
```
If the compilation still fails, you might need to get a more recent
version of a dependency than the one packaged by your Linux
distribution. Try then with a source tarball distributed by the
problematic upstream project or even directly from the source code
repository.
License
-------

View File

@@ -26,10 +26,10 @@ task writePom {
project {
groupId 'org.kiwix.kiwixlib'
artifactId 'kiwixlib'
version '10.1.1' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
version '9.3.1' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
packaging 'aar'
name 'kiwixlib'
url 'https://github.com/kiwix/libkiwix'
url 'https://github.com/kiwix/kiwix-lib'
licenses {
license {
name 'GPLv3'
@@ -44,9 +44,9 @@ task writePom {
}
}
scm {
connection 'https://github.com/kiwix/libkiwix.git'
developerConnection 'https://github.com/kiwix/libkiwix.git'
url 'https://github.com/kiwix/libkiwix'
connection 'https://github.com/kiwix/kiwix-lib.git'
developerConnection 'https://github.com/kiwix/kiwix-lib.git'
url 'https://github.com/kiwix/kiwix-lib'
}
}
}.withXml {

15
debian/control vendored
View File

@@ -4,7 +4,7 @@ Maintainer: Kiwix team <kiwix@kiwix.org>
Build-Depends: debhelper-compat (= 13),
meson,
pkg-config,
libzim-dev (>= 7.2.0~),
libzim-dev (>= 6.1.8),
libcurl4-gnutls-dev,
libicu-dev,
libgtest-dev,
@@ -15,19 +15,18 @@ Build-Depends: debhelper-compat (= 13),
zlib1g-dev
Standards-Version: 4.5.0
Section: libs
Homepage: https://github.com/kiwix/libkiwix
Homepage: https://github.com/kiwix/kiwix-lib
Rules-Requires-Root: no
Package: libkiwix-dev
Section: libdevel
Architecture: any
Multi-Arch: same
Depends: libkiwix10 (= ${binary:Version}), ${misc:Depends}, python3,
libzim-dev (>= 7.2.0~),
Depends: libkiwix9 (= ${binary:Version}), ${misc:Depends}, python3,
libzim-dev (>= 6.0.0),
libicu-dev,
libpugixml-dev,
libcurl4-gnutls-dev,
libmicrohttpd-dev
libcurl4-gnutls-dev
Description: library of common code for Kiwix (development)
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all
@@ -35,11 +34,11 @@ Description: library of common code for Kiwix (development)
.
This package contains development files.
Package: libkiwix10
Package: libkiwix9
Architecture: any
Multi-Arch: same
Depends: ${shlibs:Depends}, ${misc:Depends}, aria2
Conflicts: libkiwix0, libkiwix3, libkiwix9
Conflicts: libkiwix0, libkiwix3
Description: library of common code for Kiwix
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all

View File

@@ -1,2 +1 @@
usr/share/man/man1/kiwix-compile-resources.1*
usr/share/man/man1/kiwix-compile-i18n.1*
usr/share/man/man1/kiwix-compile-resources.1

View File

2
docs/.gitignore vendored
View File

@@ -1,2 +0,0 @@
api
xml

View File

@@ -1,72 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'libkiwix'
copyright = '2022, libkiwix-team'
author = 'libkiwix-team'
# -- General configuration ---------------------------------------------------
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'breathe',
'exhale'
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
if not on_rtd:
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
breathe_projects = {
"libkiwix": "./xml"
}
breathe_default_project = 'libkiwix'
exhale_args = {
"containmentFolder": "./api",
"rootFileName": "ref_api.rst",
"rootFileTitle": "Reference API",
"doxygenStripFromPath":"..",
"treeViewIsBootstrap": True,
"createTreeView" : True,
"exhaleExecutesDoxygen": True,
"exhaleDoxygenStdin": "INPUT = ../include"
}
primary_domain = 'cpp'
highlight_language = 'cpp'

View File

@@ -1,14 +0,0 @@
.. libkiwix documentation master file, created by
sphinx-quickstart on Fri Jul 24 15:40:50 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to libkiwix's documentation!
==================================
.. toctree::
:maxdepth: 2
:caption: Contents:
usage
api/ref_api

View File

@@ -1,7 +0,0 @@
sphinx = find_program('sphinx-build', native:true)
sphinx_target = run_target('doc',
command: [sphinx, '-bhtml',
meson.current_source_dir(),
meson.current_build_dir()])

View File

@@ -1,2 +0,0 @@
breathe
exhale

View File

@@ -1,15 +0,0 @@
Libkiwix programming
====================
Introduction
------------
libkiwix is written in C++. To use the library, you need the include files of libkiwix have
to link against libzim.
Errors are handled with exceptions. When something goes wrong, libkiwix throws an error,
which is always derived from std::exception.
All classes are defined in the namespace kiwix.
libkiwix is a set of tools to manage zim files and provide some common functionnality.

36
format_code.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/usr/bin/bash
files=(
"include/library.h"
"include/common/stringTools.h"
"include/common/pathTools.h"
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
"include/xapianSearcher.h"
"include/searcher.h"
"src/library.cpp"
"src/android/kiwix.cpp"
"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java"
"src/android/org/kiwix/kiwixlib/JNIKiwix.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixString.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java"
"src/searcher.cpp"
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"
"src/manager.cpp"
"src/reader.cpp"
)
for i in "${files[@]}"
do
echo $i
clang-format -i -style=file $i
done

View File

@@ -21,54 +21,28 @@
#define KIWIX_BOOK_H
#include <string>
#include <vector>
#include <memory>
#include <mutex>
#include "common.h"
namespace pugi {
class xml_node;
}
namespace zim {
class Archive;
}
namespace kiwix
{
class OPDSDumper;
class Reader;
/**
* A class to store information about a book (a zim file)
*/
class Book
{
public: // types
class Illustration
{
friend class Book;
public:
uint16_t width = 48;
uint16_t height = 48;
std::string mimeType;
std::string url;
const std::string& getData() const;
private:
mutable std::string data;
mutable std::mutex mutex;
};
typedef std::vector<std::shared_ptr<const Illustration>> Illustrations;
public: // functions
public:
Book();
~Book();
bool update(const Book& other);
void update(const zim::Archive& archive);
void update(const Reader& reader);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
std::string getHumanReadableIdFromPath() const;
@@ -85,7 +59,6 @@ class Book
const std::string& getDate() const { return m_date; }
const std::string& getUrl() const { return m_url; }
const std::string& getName() const { return m_name; }
std::string getCategory() const;
const std::string& getTags() const { return m_tags; }
std::string getTagStr(const std::string& tagName) const;
bool getTagBool(const std::string& tagName) const;
@@ -94,13 +67,9 @@ class Book
const uint64_t& getArticleCount() const { return m_articleCount; }
const uint64_t& getMediaCount() const { return m_mediaCount; }
const uint64_t& getSize() const { return m_size; }
DEPRECATED const std::string& getFavicon() const;
DEPRECATED const std::string& getFaviconUrl() const;
DEPRECATED const std::string& getFaviconMimeType() const;
Illustrations getIllustrations() const;
std::shared_ptr<const Illustration> getIllustration(unsigned int size) const;
const std::string& getFavicon() const;
const std::string& getFaviconUrl() const { return m_faviconUrl; }
const std::string& getFaviconMimeType() const { return m_faviconMimeType; }
const std::string& getDownloadId() const { return m_downloadId; }
void setReadOnly(bool readOnly) { m_readOnly = readOnly; }
@@ -121,20 +90,17 @@ class Book
void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; }
void setMediaCount(uint64_t mediaCount) { m_mediaCount = mediaCount; }
void setSize(uint64_t size) { m_size = size; }
void setFavicon(const std::string& favicon) { m_favicon = favicon; }
void setFaviconMimeType(const std::string& faviconMimeType) { m_faviconMimeType = faviconMimeType; }
void setDownloadId(const std::string& downloadId) { m_downloadId = downloadId; }
private: // functions
std::string getCategoryFromTags() const;
const Illustration& getDefaultIllustration() const;
protected: // data
protected:
std::string m_id;
std::string m_downloadId;
std::string m_path;
bool m_pathValid = false;
std::string m_title;
std::string m_description;
std::string m_category;
std::string m_language;
std::string m_creator;
std::string m_publisher;
@@ -148,11 +114,9 @@ class Book
uint64_t m_mediaCount = 0;
bool m_readOnly = false;
uint64_t m_size = 0;
Illustrations m_illustrations;
// Used as the return value of getDefaultIllustration() when no default
// illustration is found in the book
static const Illustration missingDefaultIllustration;
mutable std::string m_favicon;
std::string m_faviconUrl;
std::string m_faviconMimeType;
};
}

View File

@@ -23,6 +23,7 @@
#include <string>
#include <vector>
#include <map>
#include <pthread.h>
#include <memory>
#include <stdexcept>

192
include/entry.h Normal file
View File

@@ -0,0 +1,192 @@
/*
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ENTRY_H
#define KIWIX_ENTRY_H
#include <stdio.h>
#include <zim/article.h>
#include <exception>
#include <string>
#include "common.h"
using namespace std;
namespace kiwix
{
class NoEntry : public std::exception {};
/**
* A entry represent an.. entry in a zim file.
*/
class Entry
{
public:
/**
* Default constructor.
*
* Construct an invalid entry.
*/
Entry() = default;
/**
* Construct an entry making reference to an zim article.
*
* @param article a zim::Article object
*/
Entry(zim::Article article);
virtual ~Entry() = default;
/**
* Get the path of the entry.
*
* The path is the "key" of an entry.
*
* @return the path of the entry.
*/
std::string getPath() const;
/**
* Get the title of the entry.
*
* @return the title of the entry.
*/
std::string getTitle() const;
/**
* Get the content of the entry.
*
* The string is a copy of the content.
* If you don't want to do a copy, use get_blob.
*
* @return the content of the entry.
*/
std::string getContent() const;
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset = 0) const;
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @param size The size of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset, size_type size) const;
/**
* Get the info for direct access to the content of the entry.
*
* Some entry (ie binary ones) have their content plain stored
* in the zim file. Knowing the offset where the content is stored
* an user can directly read the content in the zim file bypassing the
* kiwix-lib/libzim.
*
* @return A pair specifying where to read the content.
* The string is the real file to read (may be different that .zim
* file if zim is cut).
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
std::pair<std::string, offset_type> getDirectAccessInfo() const;
/**
* Get the size of the entry.
*
* @return the size of the entry.
*/
size_type getSize() const;
/**
* Get the mime_type of the entry.
*
* @return the mime_type of the entry.
*/
std::string getMimetype() const;
/**
* Get if the entry is a redirect entry.
*
* @return True if the entry is a redirect.
*/
bool isRedirect() const;
/**
* Get if the entry is a link target entry.
*
* @return True if the entry is a link target.
*/
bool isLinkTarget() const;
/**
* Get if the entry is a deleted entry.
*
* @return True if the entry is a deleted entry.
*/
bool isDeleted() const;
/**
* Get the entry pointed by this entry.
*
* @return the entry pointed.
* @throw NoEntry if the entry is not a redirected entry.
*/
Entry getRedirectEntry() const;
/**
* Get the final entry pointed by this entry.
*
* Follow the redirection until a "not redirecting" entry is found.
* If the entry is not a redirected entry, return the entry itself.
*
* @return the final entry.
*/
Entry getFinalEntry() const;
/**
* Convert the entry to a boolean value.
*
* @return True if the entry is valid.
*/
explicit operator bool() const { return good(); }
private:
zim::Article article;
mutable zim::Article final_article;
bool good() const { return article.good(); }
};
}
#endif // KIWIX_ENTRY_H

View File

@@ -22,4 +22,4 @@
#include "library.h"
#endif
#endif

View File

@@ -24,9 +24,6 @@
#include <vector>
#include <map>
#include <memory>
#include <mutex>
#include <zim/archive.h>
#include <zim/search.h>
#include "book.h"
#include "bookmark.h"
@@ -38,7 +35,6 @@ namespace kiwix
{
class OPDSDumper;
class Library;
enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER };
enum supportedListMode {
@@ -52,23 +48,18 @@ enum supportedListMode {
};
class Filter {
public: // types
using Tags = std::vector<std::string>;
private: // data
private:
uint64_t activeFilters;
Tags _acceptTags;
Tags _rejectTags;
std::string _category;
std::vector<std::string> _acceptTags;
std::vector<std::string> _rejectTags;
std::string _lang;
std::string _publisher;
std::string _creator;
size_t _maxSize;
std::string _query;
bool _queryIsPartial;
std::string _name;
public: // functions
public:
Filter();
~Filter() = default;
@@ -102,95 +93,33 @@ class Filter {
/**
* Set the filter to only accept book with corresponding tag.
*/
Filter& acceptTags(const Tags& tags);
Filter& rejectTags(const Tags& tags);
Filter& acceptTags(std::vector<std::string> tags);
Filter& rejectTags(std::vector<std::string> tags);
Filter& category(std::string category);
/**
* Set the filter to only accept books in the specified language.
*
* Multiple languages can be specified as a comma-separated list (in
* which case a book in any of those languages will match).
*/
Filter& lang(std::string lang);
Filter& publisher(std::string publisher);
Filter& creator(std::string creator);
Filter& maxSize(size_t size);
Filter& query(std::string query, bool partial=true);
Filter& query(std::string query);
Filter& name(std::string name);
bool hasQuery() const;
const std::string& getQuery() const { return _query; }
bool queryIsPartial() const { return _queryIsPartial; }
bool hasName() const;
const std::string& getName() const { return _name; }
bool hasCategory() const;
const std::string& getCategory() const { return _category; }
bool hasLang() const;
const std::string& getLang() const { return _lang; }
bool hasPublisher() const;
const std::string& getPublisher() const { return _publisher; }
bool hasCreator() const;
const std::string& getCreator() const { return _creator; }
const Tags& getAcceptTags() const { return _acceptTags; }
const Tags& getRejectTags() const { return _rejectTags; }
private: // functions
friend class Library;
bool accept(const Book& book) const;
};
class ZimSearcher : public zim::Searcher
{
public:
explicit ZimSearcher(zim::Searcher&& searcher)
: zim::Searcher(searcher)
{}
std::unique_lock<std::mutex> getLock() {
return std::unique_lock<std::mutex>(m_mutex);
}
virtual ~ZimSearcher() = default;
private:
std::mutex m_mutex;
};
/**
* A Library store several books.
*/
class Library
{
// all data fields must be added in LibraryBase
mutable std::mutex m_mutex;
public:
typedef uint64_t Revision;
typedef std::vector<std::string> BookIdCollection;
typedef std::map<std::string, int> AttributeCounts;
typedef std::set<std::string> BookIdSet;
std::map<std::string, kiwix::Book> m_books;
std::map<std::string, std::shared_ptr<Reader>> m_readers;
std::vector<kiwix::Bookmark> m_bookmarks;
public:
Library();
~Library();
/**
* Library is not a copiable object. However it can be moved.
*/
Library(const Library& ) = delete;
Library(Library&& );
void operator=(const Library& ) = delete;
Library& operator=(Library&& );
/**
* Add a book to the library.
*
@@ -203,11 +132,6 @@ class Library
*/
bool addBook(const Book& book);
/**
* A self-explanatory alias for addBook()
*/
bool addOrUpdateBook(const Book& book) { return addBook(book); }
/**
* Add a bookmark to the library.
*
@@ -224,18 +148,9 @@ class Library
*/
bool removeBookmark(const std::string& zimId, const std::string& url);
// XXX: This is a non-thread-safe operation
const Book& getBookById(const std::string& id) const;
// XXX: This is a non-thread-safe operation
const Book& getBookByPath(const std::string& path) const;
Book getBookByIdThreadSafe(const std::string& id) const;
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
return getSearcherByIds(BookIdSet{id});
}
std::shared_ptr<ZimSearcher> getSearcherByIds(const BookIdSet& ids);
Book& getBookById(const std::string& id);
Book& getBookByPath(const std::string& path);
std::shared_ptr<Reader> getReaderById(const std::string& id);
/**
* Remove a book from the library.
@@ -251,7 +166,7 @@ class Library
* @param path the path of the file to write to.
* @return True if the library has been correctly saved.
*/
bool writeToFile(const std::string& path) const;
bool writeToFile(const std::string& path);
/**
* Write the library bookmarks to a file.
@@ -259,7 +174,7 @@ class Library
* @param path the path of the file to write to.
* @return True if the library has been correctly saved.
*/
bool writeBookmarksToFile(const std::string& path) const;
bool writeBookmarksToFile(const std::string& path);
/**
* Get the number of book in the library.
@@ -268,56 +183,53 @@ class Library
* @param remoteBooks If we must count remote books (books with an url)
* @return The number of books.
*/
unsigned int getBookCount(const bool localBooks, const bool remoteBooks) const;
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
/**
* Get all languagues of the books in the library.
* Get all langagues of the books in the library.
*
* @return A list of languages.
*/
std::vector<std::string> getBooksLanguages() const;
/**
* Get all languagues of the books in the library with counts.
*
* @return A list of languages with the count of books in each language.
*/
AttributeCounts getBooksLanguagesWithCounts() const;
/**
* Get all categories of the books in the library.
*
* @return A list of categories.
*/
std::vector<std::string> getBooksCategories() const;
std::vector<std::string> getBooksLanguages();
/**
* Get all book creators of the books in the library.
*
* @return A list of book creators.
*/
std::vector<std::string> getBooksCreators() const;
std::vector<std::string> getBooksCreators();
/**
* Get all book publishers of the books in the library.
*
* @return A list of book publishers.
*/
std::vector<std::string> getBooksPublishers() const;
std::vector<std::string> getBooksPublishers();
/**
* Get all bookmarks.
*
* @return A list of bookmarks
*/
const std::vector<kiwix::Bookmark> getBookmarks(bool onlyValidBookmarks = true) const;
const std::vector<kiwix::Bookmark> getBookmarks(bool onlyValidBookmarks = true);
/**
* Get all book ids of the books in the library.
*
* @return A list of book ids.
*/
BookIdCollection getBooksIds() const;
std::vector<std::string> getBooksIds();
/**
* Filter the library and generate a new one with the keep elements.
*
* This is equivalent to `listBookIds(ALL, UNSORTED, search)`.
*
* @param search List only books with search in the title or description.
* @return The list of bookIds corresponding to the query.
*/
DEPRECATED std::vector<std::string> filter(const std::string& search);
/**
* Filter the library and return the id of the keep elements.
@@ -325,7 +237,7 @@ class Library
* @param filter The filter to use.
* @return The list of bookIds corresponding to the filter.
*/
BookIdCollection filter(const Filter& filter) const;
std::vector<std::string> filter(const Filter& filter);
/**
@@ -335,44 +247,43 @@ class Library
* @param comparator how to sort the books
* @return The sorted list of books
*/
void sort(BookIdCollection& bookIds, supportedListSortBy sortBy, bool ascending) const;
void sort(std::vector<std::string>& bookIds, supportedListSortBy sortBy, bool ascending);
/**
* Return the current revision of the library.
* List books in the library.
*
* The revision of the library is updated (incremented by one) by
* the addBook() and removeBookById() operations.
*
* @return Current revision of the library.
* @param mode The mode of listing :
* - LOCAL  : list only local books (with a path).
* - REMOTE : list only remote books (with an url).
* - VALID  : list only valid books (without a path or with a
* path pointing to a valid zim file).
* - NOLOCAL : list only books without valid path.
* - NOREMOTE : list only books without url.
* - NOVALID : list only books not valid.
* - ALL : Do not do any filter (LOCAL or REMOTE)
* - Flags can be combined.
* @param sortBy Attribute to sort by the book list.
* @param search List only books with search in the title, description.
* @param language List only books in this language.
* @param creator List only books of this creator.
* @param publisher List only books of this publisher.
* @param maxSize Do not list book bigger than maxSize.
* Set to 0 to cancel this filter.
* @return The list of bookIds corresponding to the query.
*/
Revision getRevision() const;
/**
* Remove books that have not been updated since the specified revision.
*
* @param rev the library revision to use
* @return Count of books that were removed by this operation.
*/
uint32_t removeBooksNotUpdatedSince(Revision rev);
DEPRECATED std::vector<std::string> listBooksIds(
int supportedListMode = ALL,
supportedListSortBy sortBy = UNSORTED,
const std::string& search = "",
const std::string& language = "",
const std::string& creator = "",
const std::string& publisher = "",
const std::vector<std::string>& tags = {},
size_t maxSize = 0);
friend class OPDSDumper;
friend class libXMLDumper;
private: // types
typedef const std::string& (Book::*BookStrPropMemFn)() const;
struct Impl;
private: // functions
AttributeCounts getBookAttributeCounts(BookStrPropMemFn p) const;
std::vector<std::string> getBookPropValueSet(BookStrPropMemFn p) const;
BookIdCollection filterViaBookDB(const Filter& filter) const;
void updateBookDB(const Book& book);
void dropCache(const std::string& bookId);
private: //data
std::unique_ptr<Impl> mp_impl;
};
}
#endif

View File

@@ -38,7 +38,7 @@ class LibXMLDumper
{
public:
LibXMLDumper() = default;
LibXMLDumper(const Library* library);
LibXMLDumper(Library* library);
~LibXMLDumper();
/**
@@ -69,10 +69,10 @@ class LibXMLDumper
*
* @param library The library to dump.
*/
void setLibrary(const Library* library) { this->library = library; }
void setLibrary(Library* library) { this->library = library; }
protected:
const kiwix::Library* library;
kiwix::Library* library;
std::string baseDir;
private:
void handleBook(Book book, pugi::xml_node root_node);

View File

@@ -22,10 +22,10 @@
#include "book.h"
#include "library.h"
#include "reader.h"
#include <string>
#include <vector>
#include <memory>
namespace pugi {
class xml_document;
@@ -34,25 +34,26 @@ class xml_document;
namespace kiwix
{
class LibraryManipulator
{
public: // functions
explicit LibraryManipulator(Library* library);
virtual ~LibraryManipulator();
class LibraryManipulator {
public:
virtual ~LibraryManipulator() {}
virtual bool addBookToLibrary(Book book) = 0;
virtual void addBookmarkToLibrary(Bookmark bookmark) = 0;
};
Library& getLibrary() const { return library; }
bool addBookToLibrary(const Book& book);
void addBookmarkToLibrary(const Bookmark& bookmark);
uint32_t removeBooksNotUpdatedSince(Library::Revision rev);
protected: // overrides
virtual void bookWasAddedToLibrary(const Book& book);
virtual void bookmarkWasAddedToLibrary(const Bookmark& bookmark);
virtual void booksWereRemovedFromLibrary();
private: // data
kiwix::Library& library;
class DefaultLibraryManipulator : public LibraryManipulator {
public:
DefaultLibraryManipulator(Library* library) :
library(library) {}
virtual ~DefaultLibraryManipulator() {}
bool addBookToLibrary(Book book) {
return library->addBook(book);
}
void addBookmarkToLibrary(Bookmark bookmark) {
library->addBookmark(bookmark);
}
private:
kiwix::Library* library;
};
/**
@@ -60,12 +61,10 @@ class LibraryManipulator
*/
class Manager
{
public: // types
typedef std::vector<std::string> Paths;
public: // functions
explicit Manager(LibraryManipulator* manipulator);
explicit Manager(Library* library);
public:
Manager(LibraryManipulator* manipulator);
Manager(Library* library);
~Manager();
/**
* Read a `library.xml` and add book in the file to the library.
@@ -73,22 +72,10 @@ class Manager
* @param path The (utf8) path to the `library.xml`.
* @param readOnly Set if the libray path could be overwritten latter with
* updated content.
* @param trustLibrary use book metadata coming from XML.
* @return True if file has been properly parsed.
*/
bool readFile(const std::string& path, bool readOnly = true, bool trustLibrary = true);
/**
* Sync the contents of the library with one or more `library.xml` files.
*
* The metadata of the library files is trusted unconditionally.
* Any books not present in the input library.xml files are removed
* from the library.
*
* @param paths The (utf8) paths to the `library.xml` files.
*/
void reload(const Paths& paths);
/**
* Load a library content store in the string.
*
@@ -163,7 +150,8 @@ class Manager
uint64_t m_itemsPerPage = 0;
protected:
std::shared_ptr<kiwix::LibraryManipulator> manipulator;
kiwix::LibraryManipulator* manipulator;
bool mustDeleteManipulator;
bool readBookFromPath(const std::string& path, Book* book);
bool parseXmlDom(const pugi::xml_document& doc,

View File

@@ -7,12 +7,25 @@ headers = [
'libxml_dumper.h',
'opds_dumper.h',
'downloader.h',
'reader.h',
'entry.h',
'searcher.h',
'search_renderer.h',
'server.h',
'kiwixserve.h',
'name_mapper.h',
'tools.h',
'version.h'
'name_mapper.h'
]
install_headers(headers, subdir:'kiwix')
install_headers(
'tools/base64.h',
'tools/networkTools.h',
'tools/otherTools.h',
'tools/pathTools.h',
'tools/regexTools.h',
'tools/stringTools.h',
'tools/lock.h',
subdir:'kiwix/tools'
)

View File

@@ -22,8 +22,6 @@
#include <string>
#include <map>
#include <memory>
#include <mutex>
namespace kiwix
{
@@ -33,15 +31,15 @@ class Library;
class NameMapper {
public:
virtual ~NameMapper() = default;
virtual std::string getNameForId(const std::string& id) const = 0;
virtual std::string getIdForName(const std::string& name) const = 0;
virtual std::string getNameForId(const std::string& id) = 0;
virtual std::string getIdForName(const std::string& name) = 0;
};
class IdNameMapper : public NameMapper {
public:
virtual std::string getNameForId(const std::string& id) const { return id; };
virtual std::string getIdForName(const std::string& name) const { return name; };
virtual std::string getNameForId(const std::string& id) { return id; };
virtual std::string getIdForName(const std::string& name) { return name; };
};
class HumanReadableNameMapper : public NameMapper {
@@ -52,29 +50,11 @@ class HumanReadableNameMapper : public NameMapper {
public:
HumanReadableNameMapper(kiwix::Library& library, bool withAlias);
virtual ~HumanReadableNameMapper() = default;
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;
virtual std::string getNameForId(const std::string& id);
virtual std::string getIdForName(const std::string& name);
};
class UpdatableNameMapper : public NameMapper {
typedef std::shared_ptr<NameMapper> NameMapperHandle;
public:
UpdatableNameMapper(Library& library, bool withAlias);
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;
void update();
private:
NameMapperHandle currentNameMapper() const;
private:
mutable std::mutex mutex;
Library& library;
NameMapperHandle nameMapper;
const bool withAlias;
};
}

View File

@@ -26,8 +26,11 @@
#include <pugixml.hpp>
#include "tools/base64.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "library.h"
#include "name_mapper.h"
#include "reader.h"
using namespace std;
@@ -42,56 +45,30 @@ class OPDSDumper
{
public:
OPDSDumper() = default;
OPDSDumper(Library* library, NameMapper* NameMapper);
OPDSDumper(Library* library);
~OPDSDumper();
/**
* Dump the OPDS feed.
*
* @param bookIds the ids of the books to include in the feed
* @param query the query used to obtain the list of book ids
* @param id The id of the library.
* @return The OPDS feed.
*/
std::string dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const;
std::string dumpOPDSFeed(const std::vector<std::string>& bookIds);
/**
* Dump the OPDS feed.
*
* @param bookIds the ids of the books to include in the feed
* @param query the query used to obtain the list of book ids
* @param partial whether the feed should include partial or complete entries
* @return The OPDS feed.
*/
std::string dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const;
/**
* Dump the OPDS complete entry document.
*
* @param bookId the id of the book
* @return The OPDS complete entry document.
*/
std::string dumpOPDSCompleteEntry(const std::string& bookId) const;
/**
* Dump the categories OPDS feed.
*
* @return The OPDS feed.
*/
std::string categoriesOPDSFeed() const;
/**
* Dump the languages OPDS feed.
*
* @return The OPDS feed.
*/
std::string languagesOPDSFeed() const;
/**
* Set the id of the library.
* Set the id of the opds stream.
*
* @param id the id to use.
*/
void setLibraryId(const std::string& id) { this->libraryId = id;}
void setId(const std::string& id) { this->id = id;}
/**
* Set the title oft the opds stream.
*
* @param title the title to use.
*/
void setTitle(const std::string& title) { this->title = title; }
/**
* Set the root location used when generating url.
@@ -100,6 +77,13 @@ class OPDSDumper
*/
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
/**
* Set the search url.
*
* @param searchUrl the search url to use.
*/
void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; }
/**
* Set some informations about the search results.
*
@@ -109,14 +93,27 @@ class OPDSDumper
*/
void setOpenSearchInfo(int totalResult, int startIndex, int count);
/**
* Set the library to dump.
*
* @param library The library to dump.
*/
void setLibrary(Library* library) { this->library = library; }
protected:
kiwix::Library* library;
kiwix::NameMapper* nameMapper;
std::string libraryId;
std::string id;
std::string title;
std::string date;
std::string rootLocation;
std::string searchDescriptionUrl;
int m_totalResults;
int m_startIndex;
int m_count;
bool m_isSearchResult = false;
private:
pugi::xml_node handleBook(Book book, pugi::xml_node root_node);
};
}

617
include/reader.h Normal file
View File

@@ -0,0 +1,617 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_READER_H
#define KIWIX_READER_H
#include <stdio.h>
#include <zim/article.h>
#include <zim/file.h>
#include <zim/fileiterator.h>
#include <zim/zim.h>
#include <exception>
#include <map>
#include <sstream>
#include <string>
#include "common.h"
#include "entry.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
using namespace std;
namespace kiwix
{
/**
* The Reader class is the class who allow to get an entry content from a zim
* file.
*/
using SuggestionsList_t = std::vector<std::vector<std::string>>;
class Reader
{
public:
/**
* Create a Reader to read a zim file specified by zimFilePath.
*
* @param zimFilePath The path to the zim file to read.
* The zim file can be splitted (.zimaa, .zimab, ...).
* In this case, the file path must still point to the
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
Reader(const string zimFilePath);
~Reader();
/**
* Get the number of "displayable" entries in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'text/html' MIMEtype specified in the metadata.
* Else return the number of entries in the 'A' namespace.
*/
unsigned int getArticleCount() const;
/**
* Get the number of media in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
* the metadata.
* Else return the number of entries in the 'I' namespace.
*/
unsigned int getMediaCount() const;
/**
* Get the number of all entries in the zim file.
*
* @return Return the number of all the entries, whatever their MIMEtype or
* their namespace.
*/
unsigned int getGlobalCount() const;
/**
* Get the path of the zim file.
*
* @return the path of the zim file as given in the constructor.
*/
string getZimFilePath() const;
/**
* Get the Id of the zim file.
*
* @return The uuid stored in the zim file.
*/
string getId() const;
/**
* Get the url of a random page.
*
* Deprecated : Use `getRandomPage` instead.
*
* @return Url of a random page. The page is picked from all entries in
* the 'A' namespace.
* The main page is excluded from the potential results.
*/
DEPRECATED string getRandomPageUrl() const;
/**
* Get a random page.
*
* @return A random Entry. The entry is picked from all entries in
* the 'A' namespace.
* The main entry is excluded from the potential results.
*/
Entry getRandomPage() const;
/**
* Get the url of the first page.
*
* Deprecated : Use `getFirstPage` instead.
*
* @return Url of the first entry in the 'A' namespace.
*/
DEPRECATED string getFirstPageUrl() const;
/**
* Get the entry of the first page.
*
* @return The first entry in the 'A' namespace.
*/
Entry getFirstPage() const;
/**
* Get the url of the main page.
*
* Deprecated : Use `getMainPage` instead.
*
* @return Url of the main page as specified in the zim file.
*/
DEPRECATED string getMainPageUrl() const;
/**
* Get the entry of the main page.
*
* @return Entry of the main page as specified in the zim file.
*/
Entry getMainPage() const;
/**
* Get the content of a metadata.
*
* @param[in] name The name of the metadata.
* @param[out] value The value will be set to the content of the metadata.
* @return True if it was possible to get the content of the metadata.
*/
bool getMetadata(const string& name, string& value) const;
/**
* Get the name of the zim file.
*
* @return The name of the zim file as specified in the zim metadata.
*/
string getName() const;
/**
* Get the title of the zim file.
*
* @return The title of zim file as specified in the zim metadata.
* If no title has been set, return a title computed from the
* file path.
*/
string getTitle() const;
/**
* Get the creator of the zim file.
*
* @return The creator of the zim file as specified in the zim metadata.
*/
string getCreator() const;
/**
* Get the publisher of the zim file.
*
* @return The publisher of the zim file as specified in the zim metadata.
*/
string getPublisher() const;
/**
* Get the date of the zim file.
*
* @return The date of the zim file as specified in the zim metadata.
*/
string getDate() const;
/**
* Get the description of the zim file.
*
* @return The description of the zim file as specified in the zim metadata.
* If no description has been set, return the subtitle.
*/
string getDescription() const;
/**
* Get the long description of the zim file.
*
* @return The long description of the zim file as specifed in the zim metadata.
*/
string getLongDescription() const;
/**
* Get the language of the zim file.
*
* @return The language of the zim file as specified in the zim metadata.
*/
string getLanguage() const;
/**
* Get the license of the zim file.
*
* @return The license of the zim file as specified in the zim metadata.
*/
string getLicense() const;
/**
* Get the tags of the zim file.
*
* @param original If true, return the original tags as specified in the zim metadata.
* Else, try to convert it to the new 'normalized' format.
* @return The tags of the zim file.
*/
string getTags(bool original=false) const;
/**
* Get the value (as a string) of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
*/
string getTagStr(const std::string& tagName) const;
/**
* Get the boolean value of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The boolean value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
* std::domain_error if the value of the tag cannot be convert to bool.
*/
bool getTagBool(const std::string& tagName) const;
/**
* Get the relations of the zim file.
*
* @return The relation of the zim file as specified in the zim metadata.
*/
string getRelation() const;
/**
* Get the flavour of the zim file.
*
* @return The flavour of the zim file as specified in the zim metadata.
*/
string getFlavour() const;
/**
* Get the source of the zim file.
*
* @return The source of the zim file as specified in the zim metadata.
*/
string getSource() const;
/**
* Get the scraper of the zim file.
*
* @return The scraper of the zim file as specified in the zim metadata.
*/
string getScraper() const;
/**
* Get the origId of the zim file.
*
* The origId is only used in the case of patch zim file and is the Id
* of the original zim file.
*
* @return The origId of the zim file as specified in the zim metadata.
*/
string getOrigId() const;
/**
* Get the favicon of the zim file.
*
* @param[out] content The content of the favicon.
* @param[out] mimeType The mimeType of the favicon.
* @return True if a favicon has been found.
*/
bool getFavicon(string& content, string& mimeType) const;
/**
* Get an entry associated to an path.
*
* @param path The path of the entry.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromPath(const std::string& path) const;
/**
* Get an entry associated to an url encoded path.
*
* Equivalent to `getEntryFromPath(urlDecode(path));`
*
* @param path The url encoded path.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromEncodedPath(const std::string& path) const;
/**
* Get un entry associated to a title.
*
* @param title The title.
* @return The entry
* throw NoEntry If no entry correspond to the url.
*/
Entry getEntryFromTitle(const std::string& title) const;
/**
* Get the url of a page specified by a title.
*
* @param[in] title the title of the page.
* @param[out] url the url of the page.
* @return True if the page can be found.
*/
DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;
/**
* Get the mimetype of a entry specified by a url.
*
* @param[in] url the url of the entry.
* @param[out] mimeType the mimeType of the entry.
* @return True if the mimeType has been found.
*/
DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
/**
* Get the content of an entry specifed by a url.
*
* Alias to `getContentByEncodedUrl`
*/
DEPRECATED bool getContentByUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an entry specified by a url encoded url.
*
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
*/
DEPRECATED bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an entry specified by an url encoded url.
*
* Equivalent to getContentByEncodedUrl but without baseUrl.
*/
DEPRECATED bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an entry specified by a url.
*
* @param[in] url The url of the entry.
* @param[out] content The content of the entry.
* @param[out] title the title of the entry.
* @param[out] contentLength The size of the entry (size of content).
* @param[out] contentType The mimeType of the entry.
* @param[out] baseUrl Return the true url of the entry.
* If the specified entry is a redirection, contains
* the url of the targeted entry.
* @return True if the entry has been found.
*/
DEPRECATED bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an entry specified by a url.
*
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
*/
DEPRECATED bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param reset If true, remove previous suggestions in the internal vector.
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions have been added to the internal vector.
*/
DEPRECATED bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results);
/**
* Check if the url exists in the zim file.
*
* Deprecated : Use `pathExists` instead.
*
* @param url the url to check.
* @return True if the url exits in the zim file.
*/
DEPRECATED bool urlExists(const string& url) const;
/**
* Check if the path exists in the zim file.
*
* @param path the path to check.
* @return True if the path exists in the zim file.
*/
bool pathExists(const string& path) const;
/**
* Check if the zim file has a embedded fulltext index.
*
* @return True if the zim file has a embedded fulltext index
* and is not split (else the fulltext is not accessible).
*/
bool hasFulltextIndex() const;
/**
* Get potential case title variations for a title.
*
* @param title a title.
* @return the list of variantions.
*/
std::vector<std::string> getTitleVariants(const std::string& title) const;
/**
* Get the next suggestion title.
*
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
DEPRECATED bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
*
* @param[out] title the title of the suggestion.
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
DEPRECATED bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
*
* @return True if zim file have a checksum.
*/
bool canCheckIntegrity() const;
/**
* Check is zim file is corrupted.
*
* @return True if zim file is corrupted.
*/
bool isCorrupted() const;
/**
* Parse a full url into a namespace and url.
*
* @param[in] url The full url ("/N/url").
* @param[out] ns The namespace (N).
* @param[out] title The url (url).
* @return True
*/
DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;
/**
* Return the total size of the zim file.
*
* If zim file is split, return the sum of all parts' size.
*
* @return Size of the size file is KiB.
*/
unsigned int getFileSize() const;
/**
* Get the zim file handler.
*
* @return The libzim file handler.
*/
zim::File* getZimFileHandler() const;
/**
* Get the zim article object associated to a url.
*
* @param[in] url The url of the article.
* @param[out] article The libzim article object.
* @return True if the url is good (article.good()).
*/
DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const;
protected:
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type nsACount;
zim::size_type nsICount;
std::string zimFilePath;
SuggestionsList_t suggestions;
SuggestionsList_t::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
};
}
#endif

View File

@@ -21,12 +21,11 @@
#define KIWIX_SEARCH_RENDERER_H
#include <string>
#include <zim/search.h>
#include "library.h"
namespace kiwix
{
class Searcher;
class NameMapper;
/**
* The SearcherRenderer class is used to render a search result to a html page.
@@ -35,43 +34,21 @@ class SearchRenderer
{
public:
/**
* Construct a SearchRenderer from a SearchResultSet.
* The default constructor.
*
* The constructed version of the SearchRenderer will not introduce
* the book name for each result. It is better to use the other constructor
* with a Library pointer to have a better html page.
*
* @param srs The `SearchResultSet` to render.
* @param mapper The `NameMapper` to use to do the rendering.
* @param start The start offset used for the srs.
* @param estimatedResultCount The estimatedResultCount of the whole search
* @param humanReadableName The global zim's humanReadableName.
* Used to generate pagination links.
*/
SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount);
/**
* Construct a SearchRenderer from a SearchResultSet.
*
* @param srs The `SearchResultSet` to render.
* @param mapper The `NameMapper` to use to do the rendering.
* @param library The `Library` to use to look up book details for search results.
* @param start The start offset used for the srs.
* @param estimatedResultCount The estimatedResultCount of the whole search
*/
SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, Library* library,
unsigned int start, unsigned int estimatedResultCount);
SearchRenderer(Searcher* searcher, NameMapper* mapper);
~SearchRenderer();
/**
* Set the search pattern used to do the search
*/
void setSearchPattern(const std::string& pattern);
/**
* Set the querystring used to select books
* Set the search content id.
*/
void setSearchBookQuery(const std::string& bookQuery);
void setSearchContent(const std::string& name);
/**
* Set protocol prefix.
@@ -90,25 +67,16 @@ class SearchRenderer
this->pageLength = pageLength;
}
std::string renderTemplate(const std::string& tmpl_str);
/**
* Generate the html page with the resutls of the search.
*/
std::string getHtml();
/**
* Generate the xml page with the resutls of the search.
*/
std::string getXml();
protected:
std::string beautifyInteger(const unsigned int number);
zim::SearchResultSet m_srs;
Searcher* mp_searcher;
NameMapper* mp_nameMapper;
Library* mp_library;
std::string searchBookQuery;
std::string searchContent;
std::string searchPattern;
std::string protocolPrefix;
std::string searchProtocolPrefix;

171
include/searcher.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SEARCHER_H
#define KIWIX_SEARCHER_H
#include <stdio.h>
#include <stdlib.h>
#include <unicode/putil.h>
#include <algorithm>
#include <cctype>
#include <locale>
#include <string>
#include <vector>
#include <vector>
#include "tools/pathTools.h"
#include "tools/stringTools.h"
using namespace std;
namespace kiwix
{
class Reader;
class Result
{
public:
virtual ~Result(){};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual int get_readerIndex() = 0;
};
struct SearcherInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
*/
class Searcher
{
public:
/**
* The default constructor.
*/
Searcher();
~Searcher();
/**
* Add a reader (containing embedded fulltext index) to the search.
*
* @param reader The Reader for the zim containing the fulltext index.
* @return true if the reader has been added.
* false if the reader cannot be added (no embedded fulltext index present)
*/
bool add_reader(Reader* reader);
Reader* get_reader(int index);
/**
* Start a search on the zim associated to the Searcher.
*
* Search results should be retrived using the getNextResult method.
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void search(const std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a geographique search.
* The search return result for entry in a disc of center latitude/longitude
* and radius distance.
*
* Search results should be retrived using the getNextResult method.
*
* @param latitude The latitude of the center point.
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a suggestion search.
* The search made depend of the "version" of the embedded index.
* - If the index is newer enough and have a title namespace, the search is
* made in the titles only.
* - Else the search is made on the whole article content.
* In any case, the search is made "partial" (as adding '*' at the end of the query)
*
* @param search The search query.
* @param verbose print some info on stdout if true.
*/
void suggestions(std::string& search, const bool verbose = false);
/**
* Get the next result of a started search.
* This is the method to use to loop hover the search results.
*/
Result* getNextResult();
/**
* Restart the previous search.
* Next call to getNextResult will return the first result.
*/
void restart_search();
/**
* Get a estimation of the result count.
*/
unsigned int getEstimatedResultCount();
unsigned int getResultStart() { return resultStart; }
unsigned int getResultEnd() { return resultEnd; }
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
std::vector<Reader*> readers;
SearcherInternal* internal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int resultEnd;
private:
void reset();
};
}
#endif

View File

@@ -54,31 +54,23 @@ namespace kiwix
void setAddress(const std::string& addr) { m_addr = addr; }
void setPort(int port) { m_port = port; }
void setNbThreads(int threads) { m_nbThreads = threads; }
void setMultiZimSearchLimit(unsigned int limit) { m_multizimSearchLimit = limit; }
void setIpConnectionLimit(int limit) { m_ipConnectionLimit = limit; }
void setVerbose(bool verbose) { m_verbose = verbose; }
void setIndexTemplateString(const std::string& indexTemplateString) { m_indexTemplateString = indexTemplateString; }
void setTaskbar(bool withTaskbar, bool withLibraryButton)
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
void setBlockExternalLinks(bool blockExternalLinks)
{ m_blockExternalLinks = blockExternalLinks; }
int getPort();
std::string getAddress();
protected:
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_root = "";
std::string m_addr = "";
std::string m_indexTemplateString = "";
int m_port = 80;
int m_nbThreads = 1;
unsigned int m_multizimSearchLimit = 0;
bool m_verbose = false;
bool m_withTaskbar = true;
bool m_withLibraryButton = true;
bool m_blockExternalLinks = false;
int m_ipConnectionLimit = 0;
std::unique_ptr<InternalServer> mp_server;
};
}

View File

@@ -1,220 +0,0 @@
/*
* Copyright 2021 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_TOOLS_H
#define KIWIX_TOOLS_H
#include <string>
#include <vector>
#include <map>
namespace kiwix {
/**
* Return the current directory.
*
* @return the current directory (utf8 encoded)
*/
std::string getCurrentDirectory();
/**
* Return the data directory.
*
* The data directory is a directory where to put data (zim files, ...)
* It depends of the platform and it may be changed by user using environment variable.
*
* The resolution order is :
* - `KIWIX_DATA_DIR` env variable (if set).
* - On Windows :
* . `$APPDATA/kiwix` if $APPDATA is set
* . `$USERPROFILE/kiwix` if $USERPROFILE is set
* - Else :
* . `$XDG_DATA_HOME/kiwix`if $XDG_DATA_HOME is set
* . `$HOME/.local/share/kiwx` if $HOWE is set
* - current directory
*
* @return the path of the data directory (utf8 encoded)
*/
std::string getDataDirectory();
/** Return the path of the executable
*
* Some application may be packaged in auto extractible archive (Appimage) and the
* real executable is different of the path of the archive.
* If `realPathOnly` is true, return the path of the real executable instead of the
* archive launched by the user.
*
* @param realPathOnly If we must return the real path of the executable.
* @return the path of the executable (utf8 encoded)
*/
std::string getExecutablePath(bool realPathOnly = false);
/** Tell if the path is a relative path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path A utf8 encoded path.
* @return true if the path is relative.
*/
bool isRelativePath(const std::string& path);
/** Append a path to another one.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param basePath the base path.
* @param relativePath a path to add to the base path, must be a relative path.
* @return The concatenation of the paths, using the right separator.
*/
std::string appendToDirectory(const std::string& basePath, const std::string& relativePath);
/** Remove the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The parent directory (or empty string if none).
*/
std::string removeLastPathElement(const std::string& path);
/** Get the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The base name of the path or empty string if none (ending with a separator).
*/
std::string getLastPathElement(const std::string& path);
/** Compute the absolute path of a relative path based on another one
*
* Equivalent to appendToDirectory followed by a normalization of the path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path (if empty, current directory is taken).
* @param relativePath the relative path.
* @return a absolute path.
*/
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
/** Compute the relative path of a path relative to another one
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path.
* @param absolutePath the absolute path to find the relative path for.
* @return a relative path (pointing to absolutePath, relative to path).
*/
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
/** Sleep the current thread.
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* @param milliseconds The number of milliseconds to wait for.
*/
void sleep(unsigned int milliseconds);
/** Split a string
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* Assuming text = "foo:;bar;baz,oups;"
*
* split(text, ":;", true, true) => ["foo", ":", ";", "bar", ";", "baz,oups", ";"]
* split(text, ":;", true, false) => ["foo", "bar", "baz,oups"] (default)
* split(text, ":;", false, true) => ["foo", ":", "", ";", "bar", ";", "baz,oups", ";", ""]
* split(text, ":;", false, false) => ["foo", "", "bar", "baz,oups", ""]
*
* @param str The string to split.
* @param delims A string of potential delimiters.
* Each charater in the string can be a individual delimiters.
* @param dropEmpty true if empty part must be dropped from the result.
* @param keepDelim true if delimiter must be included from the result.
* @return a list of part (potentially containing delimiters)
*/
std::vector<std::string> split(const std::string& str, const std::string& delims, bool dropEmpty=true, bool keepDelim = false);
/** Convert language code from iso2 code to iso3
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate locales.
*
* @param a2code a iso2 code string.
* @return the corresponding iso3 code.
* @throw std::out_of_range if iso2 code is not known.
*/
std::string converta2toa3(const std::string& a2code);
/** Extracts content from given file.
*
* This function provides content of a file provided it's path.
*
* @param path The absolute path provided in string format.
* @return Content of corresponding file in string format.
*/
std::string getFileContent(const std::string& path);
/** Checks if file exists.
*
* This function returns boolean stating if file exists.
*
* @param path The absolute path provided in string format.
* @return Boolean representing if file exists or not.
*/
bool fileExists(const std::string& path);
/** Checks if file is readable.
*
* This function returns boolean stating if file is readable.
*
* @param path The absolute path provided in string format.
* @return Boolean representing if file is readale or not.
*/
bool fileReadable(const std::string& path);
/** Provides mimetype from filename.
*
* This function provides mimetype from file-name.
*
* @param filename string containing filename.
* @return mimetype from filename in string format.
*/
std::string getMimeTypeForFile(const std::string& filename);
/** Provides all available network interfaces
*
* This function provides the available IPv4 network interfaces
*/
std::map<std::string, std::string> getNetworkInterfaces();
/** Provides the best IP address
* This function provides the best IP address from the list given by getNetworkInterfaces
*/
std::string getBestPublicIp();
}
#endif // KIWIX_TOOLS_H

46
include/tools/lock.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef KIWIXLIB_TOOL_LOCK_H
#define KIWIXLIB_TOOL_LOCK_H
#include <pthread.h>
namespace kiwix {
class Lock
{
public:
explicit Lock(pthread_mutex_t* mutex) :
mp_mutex(mutex)
{
pthread_mutex_lock(mp_mutex);
}
~Lock() {
if (mp_mutex != nullptr) {
pthread_mutex_unlock(mp_mutex);
}
}
Lock(Lock && other) :
mp_mutex(other.mp_mutex)
{
other.mp_mutex = nullptr;
}
Lock & operator=(Lock && other)
{
mp_mutex = other.mp_mutex;
other.mp_mutex = nullptr;
return *this;
}
private:
pthread_mutex_t* mp_mutex;
Lock(Lock const &) = delete;
Lock & operator=(Lock const &) = delete;
};
}
#endif //KIWIXLIB_TOOL_LOCK_H

View File

@@ -22,24 +22,16 @@
#include <string>
#include <vector>
#include <map>
#include <cstdlib>
#include <zim/zim.h>
#include <mustache.hpp>
#include "stringTools.h"
namespace pugi {
class xml_node;
}
namespace zim {
class SuggestionItem;
}
namespace kiwix
{
void sleep(unsigned int milliseconds);
std::string nodeToString(const pugi::xml_node& node);
std::string converta2toa3(const std::string& a2code);
/*
* Convert all format tag string to new format
@@ -49,44 +41,6 @@ namespace kiwix
const std::string& tagName);
bool convertStrToBool(const std::string& value);
std::string gen_date_str();
std::string gen_uuid(const std::string& s);
// if s is empty then returns kainjow::mustache::data(false)
// otherwise kainjow::mustache::data(value)
kainjow::mustache::data onlyAsNonEmptyMustacheValue(const std::string& s);
std::string render_template(const std::string& template_str, kainjow::mustache::data data);
template<typename T>
T getEnvVar(const char* name, const T& defaultValue)
{
try {
const char* envString = std::getenv(name);
if (envString == nullptr) {
throw std::runtime_error("Environment variable not set");
}
return extractFromString<T>(envString);
} catch (...) {}
return defaultValue;
}
class Suggestions
{
public:
Suggestions();
void add(const zim::SuggestionItem& suggestion);
void addFTSearchSuggestion(const std::string& uiLang,
const std::string& query);
std::string getJSON() const;
private:
kainjow::mustache::data m_data;
};
}
#endif

View File

@@ -26,13 +26,23 @@
std::string WideToUtf8(const std::wstring& wstr);
std::wstring Utf8ToWide(const std::string& str);
#endif
bool isRelativePath(const std::string& path);
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
std::string removeLastPathElement(const std::string& path);
std::string appendToDirectory(const std::string& directoryPath, const std::string& filename);
unsigned int getFileSize(const std::string& path);
std::string getFileSizeAsString(const std::string& path);
std::string getFileContent(const std::string& path);
bool fileExists(const std::string& path);
bool makeDirectory(const std::string& path);
std::string makeTmpDirectory();
bool copyFile(const std::string& sourcePath, const std::string& destPath);
std::string getLastPathElement(const std::string& path);
std::string getExecutablePath(bool realPathOnly = false);
std::string getCurrentDirectory();
std::string getDataDirectory();
bool writeTextFile(const std::string& path, const std::string& content);
std::string getMimeTypeForFile(const std::string& filename);
#endif

View File

@@ -26,5 +26,8 @@ bool matchRegex(const std::string& content, const std::string& regex);
std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex);
std::string appendToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement);
#endif

View File

@@ -21,12 +21,10 @@
#define KIWIX_STRINGTOOLS_H
#include <unicode/unistr.h>
#include <unicode/locid.h>
#include <string>
#include <vector>
#include <sstream>
#include <stdexcept>
namespace kiwix
{
@@ -42,35 +40,10 @@ std::string encodeDiples(const std::string& str);
std::string removeAccents(const std::string& text);
void loadICUExternalTables();
class ICULanguageInfo
{
public:
explicit ICULanguageInfo(const std::string& langCode);
std::string iso3Code() const;
std::string selfName() const;
private:
const icu::Locale locale;
};
/* urlEncode() is the equivalent of JS encodeURIComponent(), with the only
* difference that the slash (/) symbol is NOT encoded. */
std::string urlEncode(const std::string& value);
std::string urlEncode(const std::string& value, bool encodeReserved = false);
std::string urlDecode(const std::string& value, bool component = false);
// Only URI components that are of interest to libkiwix
// are included in the below enumeration type
enum class URIComponentKind
{
PATH,
QUERY
};
// Encode 'value' for usage in a URI componenet specified by 'target'
std::string uriEncode(URIComponentKind target, const std::string& value);
std::vector<std::string> split(const std::string&, const std::string&, bool trimEmpty = true);
std::string join(const std::vector<std::string>& list, const std::string& sep);
std::string ucAll(const std::string& word);
@@ -93,17 +66,9 @@ T extractFromString(const std::string& str) {
std::istringstream iss(str);
T ret;
iss >> ret;
if(iss.fail() || !iss.eof()) {
throw std::invalid_argument("no conversion");
}
return ret;
}
template<>
std::string extractFromString(const std::string& str);
bool startsWith(const std::string& base, const std::string& start);
std::vector<std::string> getTitleVariants(const std::string& title);
} //namespace kiwix
#endif

View File

@@ -1,31 +1,34 @@
project('libkiwix', 'cpp',
version : '12.0.0',
project('kiwix-lib', 'cpp',
version : '9.3.1', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
license : 'GPLv3+',
default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true'])
compiler = meson.get_compiler('cpp')
static_deps = get_option('static-linkage') or get_option('default_library') == 'static'
wrapper = get_option('wrapper')
# See https://github.com/kiwix/libkiwix/issues/371
if ['arm', 'mips', 'm68k', 'ppc', 'sh4'].contains(host_machine.cpu_family())
extra_libs = ['-latomic']
static_deps = wrapper.contains('android') or wrapper.contains('java') or get_option('default_library') == 'static'
if wrapper.contains('android')
extra_libs = ['-llog']
else
extra_libs = []
endif
if (compiler.get_id() == 'gcc' and build_machine.system() == 'linux') or host_machine.system() == 'freebsd'
# C++ std::thread is implemented using pthread on linux by gcc
thread_dep = dependency('threads')
else
thread_dep = dependency('', required:false)
if wrapper.contains('java')
add_languages('java')
endif
# See https://github.com/kiwix/kiwix-lib/issues/371
if ['arm', 'mips', 'm68k', 'ppc', 'sh4'].contains(target_machine.cpu_family())
extra_libs += '-latomic'
endif
thread_dep = dependency('threads')
libicu_dep = dependency('icu-i18n', static:static_deps)
libzim_dep = dependency('libzim', version : '>=6.1.8', static:static_deps)
pugixml_dep = dependency('pugixml', static:static_deps)
libcurl_dep = dependency('libcurl', static:static_deps)
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)
zlib_dep = dependency('zlib', static:static_deps)
xapian_dep = dependency('xapian-core', static:static_deps)
if compiler.has_header('mustache.hpp')
extra_include = []
@@ -35,28 +38,18 @@ else
error('Cannot found header mustache.hpp')
endif
libzim_dep = dependency('libzim', version : '>=8.1.0', static:static_deps)
if not compiler.has_header_symbol('zim/zim.h', 'LIBZIM_WITH_XAPIAN')
error('Libzim seems to be compiled without xapian. Xapian support is mandatory.')
endif
extra_cflags = ''
if host_machine.system() == 'windows' and static_deps
if target_machine.system() == 'windows' and static_deps
add_project_arguments('-DCURL_STATICLIB', language : 'cpp')
extra_cflags += '-DCURL_STATICLIB'
endif
if host_machine.system() == 'windows'
add_project_arguments('-DNOMINMAX', language: 'cpp')
endif
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep]
inc = include_directories('include', extra_include)
conf = configuration_data()
conf.set('LIBKIWIX_VERSION', '"@0@"'.format(meson.project_version()))
conf.set('VERSION', '"@0@"'.format(meson.project_version()))
if build_machine.system() == 'windows'
extra_link_args = ['-lshlwapi', '-lwinmm']
@@ -69,11 +62,8 @@ subdir('scripts')
subdir('static')
subdir('src')
subdir('test')
if get_option('doc')
subdir('docs')
endif
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl', 'libmicrohttpd', 'xapian-core']
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl']
pkg_conf = configuration_data()
pkg_conf.set('prefix', get_option('prefix'))

View File

@@ -1,4 +1,2 @@
option('static-linkage', type : 'boolean', value : false,
description : 'Link statically with the dependencies.')
option('doc', type : 'boolean', value : false,
description : 'Build the documentations.')
option('wrapper', type:'array', choices:['java', 'android'], value:[],
description: 'The wrapper to generate.')

View File

@@ -1,14 +0,0 @@
#!/usr/bin/bash
# Compute 'src' path
SCRIPT_DIR=$(dirname "$0")
REPO_DIR=$(readlink -f "$SCRIPT_DIR"/..)
DIRS="src include"
# Apply formating to all *.cpp and *.h files
cd "$REPO_DIR"
for FILE in $(find $DIRS -name '*.h' -o -name '*.cpp')
do
echo $FILE
clang-format -i -style=file "$FILE"
done

View File

@@ -1,148 +0,0 @@
#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import os.path
import re
import json
def to_identifier(name):
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
if ident[0].isnumeric():
return "_"+ident
return ident
def lang_code(filename):
filename = os.path.basename(filename)
lang = to_identifier(os.path.splitext(filename)[0])
print(filename, '->', lang)
return lang
from string import Template
def expand_cxx_template(t, **kwargs):
return Template(t).substitute(**kwargs)
def cxx_string_literal(s):
# Taking advantage of the fact the JSON string escape rules match
# those of C++
return 'u8' + json.dumps(s)
string_table_cxx_template = '''
const I18nString $TABLE_NAME[] = {
$TABLE_ENTRIES
};
'''
lang_table_entry_cxx_template = '''
{
$LANG_STRING_LITERAL,
ARRAY_ELEMENT_COUNT($STRING_TABLE_NAME),
$STRING_TABLE_NAME
}'''
cxxfile_template = '''// This file is automatically generated. Do not modify it.
#include "server/i18n.h"
namespace kiwix {
namespace i18n {
namespace
{
$STRING_DATA
} // unnamed namespace
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))
extern const I18nStringTable stringTables[] = {
$LANG_TABLE
};
extern const size_t langCount = $LANG_COUNT;
} // namespace i18n
} // namespace kiwix
'''
class Resource:
def __init__(self, filename):
filename = filename.strip()
self.filename = filename
self.lang_code = lang_code(filename)
with open(filename, 'r', encoding='utf-8') as f:
self.data = f.read()
def get_string_table_name(self):
return "string_table_for_" + self.lang_code
def get_string_table(self):
table_entries = ",\n ".join(self.get_string_table_entries())
return expand_cxx_template(string_table_cxx_template,
TABLE_NAME=self.get_string_table_name(),
TABLE_ENTRIES=table_entries)
def get_string_table_entries(self):
d = json.loads(self.data)
for k in sorted(d.keys()):
if k != "@metadata":
key_string = cxx_string_literal(k)
value_string = cxx_string_literal(d[k])
yield '{ ' + key_string + ', ' + value_string + ' }'
def get_lang_table_entry(self):
return expand_cxx_template(lang_table_entry_cxx_template,
LANG_STRING_LITERAL=cxx_string_literal(self.lang_code),
STRING_TABLE_NAME=self.get_string_table_name())
def gen_c_file(resources):
string_data = []
lang_table = []
for r in resources:
string_data.append(r.get_string_table())
lang_table.append(r.get_lang_table_entry())
return expand_cxx_template(cxxfile_template,
STRING_DATA="\n".join(string_data),
LANG_TABLE=",\n ".join(lang_table),
LANG_COUNT=len(resources)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--cxxfile',
required=True,
help='The Cpp file name to generate')
parser.add_argument('i18n_resource_files', nargs='+',
help='The list of resources to compile.')
args = parser.parse_args()
resources = [Resource(filename) for filename in args.i18n_resource_files]
with open(args.cxxfile, 'w') as f:
f.write(gen_c_file(resources))

View File

@@ -1,18 +0,0 @@
.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands"
.SH NAME
kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data
.SH SYNOPSIS
\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_files ...\fR
.SH DESCRIPTION
.TP
i18n_resource_files ...
The list of i18n resources to compile.
.TP
\fB\-h\fR, \fB\-\-help\fR
show a help message and exit
.TP
\fB\-\-cxxfile\fR CXXFILE
The Cpp file name to generate
.TP
.SH AUTHOR
Veloman Yunkan <veloman.yunkan@gmail.com>

View File

@@ -52,21 +52,15 @@ resource_getter_template = """
return RESOURCE::{identifier};
"""
resource_cacheid_getter_template = """
if (name == "{common_name}")
return "{cacheid}";
"""
resource_decl_template = """{namespaces_open}
extern const std::string {identifier};
{namespaces_close}"""
class Resource:
def __init__(self, base_dirs, filename, cacheid=None):
filename = filename
def __init__(self, base_dirs, filename):
filename = filename.strip()
self.filename = filename
self.identifier = full_identifier(filename)
self.cacheid = cacheid
found = False
for base_dir in base_dirs:
try:
@@ -77,7 +71,7 @@ class Resource:
except FileNotFoundError:
continue
if not found:
raise Exception("Resource not found: {}".format(filename))
raise Exception("Impossible to found {}".format(filename))
def dump_impl(self):
nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0)
@@ -99,12 +93,6 @@ class Resource:
identifier="::".join(self.identifier)
)
def dump_cacheid_getter(self):
return resource_cacheid_getter_template.format(
common_name=self.filename,
cacheid=self.cacheid
)
def dump_decl(self):
return resource_decl_template.format(
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
@@ -114,7 +102,7 @@ class Resource:
master_c_template = """//This file is automatically generated. Do not modify it.
master_c_template = """//This file is automaically generated. Do not modify it.
#include <stdlib.h>
#include <fstream>
@@ -135,12 +123,7 @@ static std::string init_resource(const char* name, const unsigned char* content,
const std::string& getResource_{basename}(const std::string& name) {{
{RESOURCES_GETTER}
throw ResourceNotFound("Resource not found: " + name);
}}
const char* getResourceCacheId_{basename}(const std::string& name) {{
{RESOURCE_CACHEID_GETTER}
return nullptr;
throw ResourceNotFound("Resource not found.");
}}
{RESOURCES}
@@ -151,7 +134,6 @@ def gen_c_file(resources, basename):
return master_c_template.format(
RESOURCES="\n\n".join(r.dump_impl() for r in resources),
RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources),
RESOURCE_CACHEID_GETTER="\n\n".join(r.dump_cacheid_getter() for r in resources if r.cacheid is not None),
include_file=basename,
basename=to_identifier(basename)
)
@@ -177,10 +159,8 @@ class ResourceNotFound : public std::runtime_error {{
}};
const std::string& getResource_{basename}(const std::string& name);
const char* getResourceCacheId_{basename}(const std::string& name);
#define getResource(a) (getResource_{basename}(a))
#define getResourceCacheId(a) (getResourceCacheId_{basename}(a))
#endif // KIWIX_{BASENAME}
@@ -209,8 +189,8 @@ if __name__ == "__main__":
base_dir = os.path.dirname(os.path.realpath(args.resource_file))
source_dir = args.source_dir or []
with open(args.resource_file, 'r') as f:
resources = [Resource([base_dir]+source_dir, *line.strip().split())
for line in f.readlines()]
resources = [Resource([base_dir]+source_dir, filename)
for filename in f.readlines()]
h_identifier = to_identifier(os.path.basename(args.hfile))
with open(args.hfile, 'w') as f:

View File

@@ -1,135 +0,0 @@
#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import hashlib
import os.path
import re
def read_resource_file(resource_file_path):
with open(resource_file_path, 'r') as f:
return [line.strip() for line in f]
def list_resources(resource_file_path):
for resource_path in read_resource_file(resource_file_path):
print(resource_path)
def compute_resource_revision(resource_path):
with open(os.path.join(OUT_DIR, resource_path), 'rb') as f:
return hashlib.sha1(f.read()).hexdigest()[:8]
resource_revisions = {}
def get_resource_revision(res):
if not res in resource_revisions:
preprocess_resource(res)
resource_revisions[res] = compute_resource_revision(res)
return resource_revisions[res]
RESOURCE_WITH_CACHEID_URL_PATTERN=r'(?P<pre>.*/(?P<resource>skin/[^"?]+)\?)KIWIXCACHEID(?P<post>[^"]*)'
def set_cacheid(resource_matchobj):
pre = resource_matchobj.group('pre')
resource = resource_matchobj.group('resource')
post = resource_matchobj.group('post')
cacheid = 'cacheid=' + get_resource_revision(resource)
return pre + cacheid + post
def preprocess_text(s):
if 'KIWIXCACHEID' in s:
s = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, s)
assert not 'KIWIXCACHEID' in s
return s
def get_preprocessed_resource(srcpath):
"""Get the transformed content of a resource
If the resource at srcpath is modified by preprocessing then this function
returns the transformed content of the resource. Otherwise it returns None.
"""
try:
with open(srcpath, 'r') as resource_file:
content = resource_file.read()
preprocessed_content = preprocess_text(content)
return preprocessed_content if preprocessed_content != content else None
except UnicodeDecodeError:
# It was a binary resource
return None
def symlink_resource(src, resource_path):
if os.path.exists(resource_path):
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
return
os.remove(resource_path)
os.symlink(src, resource_path)
def preprocess_resource(resource_path):
print('Preprocessing', resource_path, '...')
resource_dir = os.path.dirname(resource_path)
if resource_dir != '':
os.makedirs(os.path.join(OUT_DIR, resource_dir), exist_ok=True)
srcpath = os.path.join(BASE_DIR, resource_path)
outpath = os.path.join(OUT_DIR, resource_path)
if os.path.exists(outpath):
os.remove(outpath)
preprocessed_content = get_preprocessed_resource(srcpath)
if preprocessed_content is None:
symlink_resource(srcpath, outpath)
else:
with open(outpath, 'w') as target:
print(preprocessed_content, end='', file=target)
def copy_resource_list_file(src_path, dst_path):
with open(src_path, 'r') as src:
with open(dst_path, 'w') as dst:
for line in src:
res = line.strip()
if line.startswith("skin/") and res in resource_revisions:
dst.write(res + " " + resource_revisions[res] + "\n")
else:
dst.write(line)
def preprocess_resources(resource_file_path):
resource_filename = os.path.basename(resource_file_path)
for resource in read_resource_file(resource_file_path):
if resource.startswith('skin/'):
get_resource_revision(resource)
else:
preprocess_resource(resource)
copy_resource_list_file(resource_file_path, os.path.join(OUT_DIR, resource_filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
commands = parser.add_mutually_exclusive_group()
commands.add_argument('--list-all', action='store_true')
commands.add_argument('--preprocess', action='store_true')
parser.add_argument('--outdir')
parser.add_argument('resource_file')
args = parser.parse_args()
BASE_DIR = os.path.dirname(os.path.realpath(args.resource_file))
OUT_DIR = args.outdir
if args.list_all:
list_resources(args.resource_file)
elif args.preprocess:
preprocess_resources(args.resource_file)

View File

@@ -1,13 +1,6 @@
res_manager = find_program('kiwix-resources')
res_compiler = find_program('kiwix-compile-resources')
install_data(res_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-resources.1')
i18n_compiler = find_program('kiwix-compile-i18n')
install_data(i18n_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-i18n.1')

View File

@@ -3,15 +3,13 @@
#include "aria2.h"
#include "xmlrpc.h"
#include <iostream>
#include <algorithm>
#include <sstream>
#include <thread>
#include <chrono>
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "downloader.h" // For AriaError
#include <tools/otherTools.h>
#include <tools/pathTools.h>
#include <tools/stringTools.h>
#include <downloader.h> // For AriaError
#ifdef _WIN32
# define ARIA2_CMD "aria2c.exe"
@@ -21,20 +19,15 @@
#endif
#define LOG_ARIA_ERROR() \
{ \
std::cerr << "ERROR: aria2 RPC request failed. (" << res << ")." << std::endl; \
std::cerr << (m_curlErrorBuffer[0] ? m_curlErrorBuffer.get() : curl_easy_strerror(res)) << std::endl; \
}
namespace kiwix {
Aria2::Aria2():
mp_aria(nullptr),
m_port(42042),
m_secret(getNewRpcSecret()),
m_curlErrorBuffer(new char[CURL_ERROR_SIZE]),
mp_curl(nullptr)
m_secret("kiwixariarpc"),
mp_curl(nullptr),
m_lock(PTHREAD_MUTEX_INITIALIZER)
{
m_downloadDir = getDataDirectory();
makeDirectory(m_downloadDir);
@@ -65,12 +58,11 @@ Aria2::Aria2():
// Try to use a potential installed aria2c.
callCmd.push_back(ARIA2_CMD);
}
callCmd.push_back("--follow-metalink=mem");
callCmd.push_back("--enable-rpc");
callCmd.push_back(rpc_secret.c_str());
callCmd.push_back(rpc_port.c_str());
callCmd.push_back(download_dir.c_str());
if (fileReadable(session_file)) {
if (fileExists(session_file)) {
callCmd.push_back(inputFile.c_str());
}
callCmd.push_back(session.c_str());
@@ -92,21 +84,27 @@ Aria2::Aria2():
}
mp_aria = Subprocess::run(callCmd);
mp_curl = curl_easy_init();
char errbuf[CURL_ERROR_SIZE];
curl_easy_setopt(mp_curl, CURLOPT_URL, "http://localhost/rpc");
curl_easy_setopt(mp_curl, CURLOPT_PORT, m_port);
curl_easy_setopt(mp_curl, CURLOPT_POST, 1L);
curl_easy_setopt(mp_curl, CURLOPT_ERRORBUFFER, m_curlErrorBuffer.get());
curl_easy_setopt(mp_curl, CURLOPT_ERRORBUFFER, errbuf);
int watchdog = 50;
while(--watchdog) {
sleep(10);
m_curlErrorBuffer[0] = 0;
errbuf[0] = 0;
auto res = curl_easy_perform(mp_curl);
if (res == CURLE_OK) {
break;
} else if (watchdog == 1) {
LOG_ARIA_ERROR();
std::cerr <<" curl_easy_perform() failed." << std::endl;
fprintf(stderr, "\nlibcurl: (%d) ", res);
if (errbuf[0] != 0) {
std::cerr << errbuf << std::endl;
} else {
std::cerr << curl_easy_strerror(res) << std::endl;
}
}
}
if (!watchdog) {
@@ -117,7 +115,6 @@ Aria2::Aria2():
Aria2::~Aria2()
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_cleanup(mp_curl);
}
@@ -129,44 +126,38 @@ void Aria2::close()
size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
{
auto outStream = static_cast<std::stringstream*>(userdata);
outStream->write(ptr, nmemb);
auto str = static_cast<std::stringstream*>(userdata);
str->write(ptr, nmemb);
return nmemb;
}
std::string Aria2::doRequest(const MethodCall& methodCall)
{
pthread_mutex_lock(&m_lock);
auto requestContent = methodCall.toString();
std::stringstream outStream;
std::stringstream stringstream;
CURLcode res;
long response_code;
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &outStream);
m_curlErrorBuffer[0] = 0;
res = curl_easy_perform(mp_curl);
if (res != CURLE_OK) {
LOG_ARIA_ERROR();
throw std::runtime_error("Cannot perform request");
}
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &stringstream);
res = curl_easy_perform(mp_curl);
if (res == CURLE_OK) {
long response_code;
curl_easy_getinfo(mp_curl, CURLINFO_RESPONSE_CODE, &response_code);
pthread_mutex_unlock(&m_lock);
if (response_code != 200) {
throw std::runtime_error("Invalid return code from aria");
}
auto responseContent = stringstream.str();
MethodResponse response(responseContent);
if (response.isFault()) {
throw AriaError(response.getFault().getFaultString());
}
return responseContent;
}
auto responseContent = outStream.str();
if (response_code != 200) {
std::cerr << "ERROR: Invalid return code (" << response_code << ") from aria :" << std::endl;
std::cerr << responseContent << std::endl;
throw std::runtime_error("Invalid return code from aria");
}
MethodResponse response(responseContent);
if (response.isFault()) {
throw AriaError(response.getFault().getFaultString());
}
return responseContent;
pthread_mutex_unlock(&m_lock);
throw std::runtime_error("Cannot perform request");
}
std::string Aria2::addUri(const std::vector<std::string>& uris, const std::vector<std::pair<std::string, std::string>>& options)
@@ -197,13 +188,6 @@ std::string Aria2::tellStatus(const std::string& gid, const std::vector<std::str
return doRequest(methodCall);
}
std::string Aria2::getNewRpcSecret()
{
std::string uuid = gen_uuid("");
uuid.erase(std::remove(uuid.begin(), uuid.end(), '-'));
return uuid.substr(0, 9);
}
std::vector<std::string> Aria2::tellActive()
{
MethodCall methodCall("aria2.tellActive", m_secret);

View File

@@ -12,8 +12,8 @@
#include "xmlrpc.h"
#include <memory>
#include <mutex>
#include <curl/curl.h>
#include <pthread.h>
namespace kiwix {
@@ -24,9 +24,8 @@ class Aria2
int m_port;
std::string m_secret;
std::string m_downloadDir;
std::unique_ptr<char[]> m_curlErrorBuffer;
CURL* mp_curl;
std::mutex m_lock;
pthread_mutex_t m_lock;
std::string doRequest(const MethodCall& methodCall);
@@ -37,7 +36,6 @@ class Aria2
std::string addUri(const std::vector<std::string>& uri, const std::vector<std::pair<std::string, std::string>>& options = {});
std::string tellStatus(const std::string& gid, const std::vector<std::string>& statusKey);
static std::string getNewRpcSecret();
std::vector<std::string> tellActive();
std::vector<std::string> tellWaiting();
void saveSession();

View File

@@ -18,18 +18,13 @@
*/
#include "book.h"
#include "reader.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/networkTools.h"
#include "tools/otherTools.h"
#include "tools/stringTools.h"
#include "tools/pathTools.h"
#include "tools/archiveTools.h"
#include <zim/archive.h>
#include <zim/item.h>
#include <pugixml.hpp>
namespace kiwix
@@ -40,17 +35,11 @@ Book::Book() :
m_readOnly(false)
{
}
/* Destructor */
Book::~Book()
{
}
Book::Illustrations Book::getIllustrations() const
{
return m_illustrations;
}
bool Book::update(const kiwix::Book& other)
{
if (m_readOnly)
@@ -59,38 +48,53 @@ bool Book::update(const kiwix::Book& other)
if (m_id != other.m_id)
return false;
*this = other;
m_readOnly = other.m_readOnly;
m_path = other.m_path;
m_pathValid = other.m_pathValid;
m_title = other.m_title;
m_description = other.m_description;
m_language = other.m_language;
m_creator = other.m_creator;
m_publisher = other.m_publisher;
m_date = other.m_date;
m_url = other.m_url;
m_name = other.m_name;
m_flavour = other.m_flavour;
m_tags = other.m_tags;
m_origId = other.m_origId;
m_articleCount = other.m_articleCount;
m_mediaCount = other.m_mediaCount;
m_size = other.m_size;
m_favicon = other.m_favicon;
m_faviconMimeType = other.m_faviconMimeType;
m_faviconUrl = other.m_faviconUrl;
m_downloadId = other.m_downloadId;
return true;
}
void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename();
void Book::update(const kiwix::Reader& reader)
{
m_path = reader.getZimFilePath();
m_pathValid = true;
m_id = reader.getId();
m_title = reader.getTitle();
m_description = reader.getDescription();
m_language = reader.getLanguage();
m_creator = reader.getCreator();
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
m_mediaCount = reader.getMediaCount();
m_size = static_cast<uint64_t>(reader.getFileSize()) << 10;
m_pathValid = true;
m_id = std::string(archive.getUuid());
m_title = getArchiveTitle(archive);
m_description = getMetaDescription(archive);
m_language = getMetaLanguage(archive);
m_creator = getMetaCreator(archive);
m_publisher = getMetaPublisher(archive);
m_date = getMetaDate(archive);
m_name = getMetaName(archive);
m_flavour = getMetaFlavour(archive);
m_tags = getMetaTags(archive);
m_category = getCategoryFromTags();
m_articleCount = archive.getArticleCount();
m_mediaCount = archive.getMediaCount();
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
m_illustrations.clear();
for ( const auto illustrationSize : archive.getIllustrationSizes() ) {
const auto illustration = std::make_shared<Illustration>();
const zim::Item illustrationItem = archive.getIllustrationItem(illustrationSize);
illustration->width = illustration->height = illustrationSize;
illustration->mimeType = illustrationItem.getMimetype();
illustration->data = illustrationItem.getData();
// NOTE: illustration->url is left uninitialized
m_illustrations.push_back(illustration);
}
reader.getFavicon(m_favicon, m_faviconMimeType);
}
#define ATTR(name) node.attribute(name).value()
@@ -102,7 +106,7 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
path = computeAbsolutePath(baseDir, path);
}
m_path = path;
m_pathValid = fileReadable(path);
m_pathValid = fileExists(path);
m_title = ATTR("title");
m_description = ATTR("description");
m_language = ATTR("language");
@@ -117,19 +121,12 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
m_articleCount = strtoull(ATTR("articleCount"), 0, 0);
m_mediaCount = strtoull(ATTR("mediaCount"), 0, 0);
m_size = strtoull(ATTR("size"), 0, 0) << 10;
std::string favicon_mimetype = ATTR("faviconMimeType");
if (! favicon_mimetype.empty()) {
const auto favicon = std::make_shared<Illustration>();
favicon->data = base64_decode(ATTR("favicon"));
favicon->mimeType = favicon_mimetype;
favicon->url = ATTR("faviconUrl");
m_illustrations.assign(1, favicon);
}
m_favicon = base64_decode(ATTR("favicon"));
m_faviconMimeType = ATTR("faviconMimeType");
m_faviconUrl = ATTR("faviconUrl");
try {
m_downloadId = ATTR("downloadId");
} catch(...) {}
const auto catattr = node.attribute("category");
m_category = catattr.empty() ? getCategoryFromTags() : catattr.value();
}
#undef ATTR
@@ -155,14 +152,10 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_language = VALUE("language");
m_creator = node.child("author").child("name").child_value();
m_publisher = node.child("publisher").child("name").child_value();
const std::string dcIssuedDate = VALUE("dc:issued");
m_date = dcIssuedDate.empty() ? VALUE("updated") : dcIssuedDate;
m_date = fromOpdsDate(m_date);
m_date = fromOpdsDate(VALUE("updated"));
m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags");
const auto catnode = node.child("category");
m_category = catnode.empty() ? getCategoryFromTags() : catnode.child_value();
m_articleCount = strtoull(VALUE("articleCount"), 0, 0);
m_mediaCount = strtoull(VALUE("mediaCount"), 0, 0);
for(auto linkNode = node.child("link"); linkNode;
@@ -174,11 +167,8 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_size = strtoull(linkNode.attribute("length").value(), 0, 0);
}
if (rel == "http://opds-spec.org/image/thumbnail") {
const auto favicon = std::make_shared<Illustration>();
favicon->data.clear();
favicon->url = urlHost + linkNode.attribute("href").value();
favicon->mimeType = linkNode.attribute("type").value();
m_illustrations.assign(1, favicon);
m_faviconUrl = urlHost + linkNode.attribute("href").value();
m_faviconMimeType = linkNode.attribute("type").value();
}
}
@@ -189,7 +179,7 @@ std::string Book::getHumanReadableIdFromPath() const
{
std::string id = m_path;
if (!id.empty()) {
id = kiwix::removeAccents(id);
kiwix::removeAccents(id);
#ifdef _WIN32
id = replaceRegex(id, "", "^.*\\\\");
@@ -211,54 +201,15 @@ void Book::setPath(const std::string& path)
: path;
}
const Book::Illustration Book::missingDefaultIllustration;
std::shared_ptr<const Book::Illustration> Book::getIllustration(unsigned int size) const
{
for ( const auto& ilPtr : m_illustrations ) {
if (ilPtr->width == size && ilPtr->height == size) {
return ilPtr;
}
}
throw std::runtime_error("Cannot find illustration");
}
const Book::Illustration& Book::getDefaultIllustration() const
{
try {
return *getIllustration(48);
} catch (...) {
return missingDefaultIllustration;
}
}
const std::string& Book::Illustration::getData() const
{
if (data.empty() && !url.empty()) {
const std::lock_guard<std::mutex> l(mutex);
if ( data.empty() ) {
try {
data = download(url);
} catch(...) {
std::cerr << "Cannot download favicon from " << url;
}
}
}
return data;
}
const std::string& Book::getFavicon() const {
return getDefaultIllustration().getData();
}
const std::string& Book::getFaviconUrl() const
{
return getDefaultIllustration().url;
}
const std::string& Book::getFaviconMimeType() const
{
return getDefaultIllustration().mimeType;
if (m_favicon.empty() && !m_faviconUrl.empty()) {
try {
m_favicon = download(m_faviconUrl);
} catch(...) {
std::cerr << "Cannot download favicon from " << m_faviconUrl;
}
}
return m_favicon;
}
std::string Book::getTagStr(const std::string& tagName) const {
@@ -269,21 +220,4 @@ bool Book::getTagBool(const std::string& tagName) const {
return convertStrToBool(getTagStr(tagName));
}
std::string Book::getCategory() const
{
return m_category;
}
std::string Book::getCategoryFromTags() const
{
try
{
return getTagStr("category");
}
catch ( const std::out_of_range& )
{
return "";
}
}
}

View File

@@ -1,3 +1,3 @@
#mesondefine LIBKIWIX_VERSION
#mesondefine VERSION

View File

@@ -133,7 +133,7 @@ Downloader::Downloader() :
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellActive failed : " << e.what() << std::endl;
std::cerr << "aria2 tellActive failed : " << e.what();
}
try {
for (auto gid : mp_aria->tellWaiting()) {
@@ -141,7 +141,7 @@ Downloader::Downloader() :
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellWaiting failed : " << e.what() << std::endl;
std::cerr << "aria2 tellWaiting failed : " << e.what();
}
}

140
src/entry.cpp Normal file
View File

@@ -0,0 +1,140 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
#include <zim/search.h>
namespace kiwix
{
Entry::Entry(zim::Article article)
: article(article)
{
}
#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
std::string Entry::getPath() const
{
RETURN_IF_INVALID("");
return article.getLongUrl();
}
std::string Entry::getTitle() const
{
RETURN_IF_INVALID("");
return article.getTitle();
}
std::string Entry::getContent() const
{
RETURN_IF_INVALID("");
return article.getData();
}
zim::Blob Entry::getBlob(offset_type offset) const
{
RETURN_IF_INVALID(zim::Blob());
return article.getData(offset);
}
zim::Blob Entry::getBlob(offset_type offset, size_type size) const
{
RETURN_IF_INVALID(zim::Blob());
return article.getData(offset, size);
}
std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
{
RETURN_IF_INVALID(std::make_pair("", 0));
return article.getDirectAccessInformation();
}
size_type Entry::getSize() const
{
RETURN_IF_INVALID(0);
return article.getArticleSize();
}
std::string Entry::getMimetype() const
{
RETURN_IF_INVALID("");
try {
return article.getMimeType();
} catch (exception& e) {
return "application/octet-stream";
}
}
bool Entry::isRedirect() const
{
RETURN_IF_INVALID(false);
return article.isRedirect();
}
bool Entry::isLinkTarget() const
{
RETURN_IF_INVALID(false);
return article.isLinktarget();
}
bool Entry::isDeleted() const
{
RETURN_IF_INVALID(false);
return article.isDeleted();
}
Entry Entry::getRedirectEntry() const
{
RETURN_IF_INVALID(Entry());
if ( !article.isRedirect() ) {
throw NoEntry();
}
auto targeted_article = article.getRedirectArticle();
if ( !targeted_article.good()) {
throw NoEntry();
}
return targeted_article;
}
Entry Entry::getFinalEntry() const
{
RETURN_IF_INVALID(Entry());
if (final_article.good()) {
return final_article;
}
int loopCounter = 42;
final_article = article;
while (final_article.isRedirect() && loopCounter--) {
final_article = final_article.getRedirectArticle();
if ( !final_article.good()) {
throw NoEntry();
}
}
// Prevent infinite loops.
if (final_article.isRedirect()) {
throw NoEntry();
}
return final_article;
}
}

View File

@@ -9,7 +9,6 @@
# include <unistd.h>
#endif
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"

View File

@@ -19,184 +19,54 @@
#include "library.h"
#include "book.h"
#include "reader.h"
#include "libxml_dumper.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/concurrent_cache.h"
#include <pugixml.hpp>
#include <algorithm>
#include <set>
#include <cmath>
#include <unicode/locid.h>
#include <xapian.h>
namespace kiwix
{
namespace
{
std::string iso639_3ToXapian(const std::string& lang) {
return icu::Locale(lang.c_str()).getLanguage();
};
std::string normalizeText(const std::string& text)
{
return removeAccents(text);
}
bool booksReferToTheSameArchive(const Book& book1, const Book& book2)
{
return book1.isPathValid()
&& book2.isPathValid()
&& book1.getPath() == book2.getPath();
}
template<typename Key, typename Value>
class MultiKeyCache: public ConcurrentCache<std::set<Key>, Value>
{
public:
explicit MultiKeyCache(size_t maxEntries)
: ConcurrentCache<std::set<Key>, Value>(maxEntries)
{}
bool drop(const Key& key)
{
std::unique_lock<std::mutex> l(this->lock_);
bool removed = false;
for(auto& cache_key: this->impl_.keys()) {
if(cache_key.find(key)!=cache_key.end()) {
removed |= this->impl_.drop(cache_key);
}
}
return removed;
}
};
} // unnamed namespace
struct Library::Impl
{
struct Entry : Book
{
Library::Revision lastUpdatedRevision = 0;
};
Library::Revision m_revision;
std::map<std::string, Entry> m_books;
using ArchiveCache = ConcurrentCache<std::string, std::shared_ptr<zim::Archive>>;
std::unique_ptr<ArchiveCache> mp_archiveCache;
using SearcherCache = MultiKeyCache<std::string, std::shared_ptr<ZimSearcher>>;
std::unique_ptr<SearcherCache> mp_searcherCache;
std::vector<kiwix::Bookmark> m_bookmarks;
Xapian::WritableDatabase m_bookDB;
unsigned int getBookCount(const bool localBooks, const bool remoteBooks) const;
Impl();
~Impl();
Impl(Impl&& );
Impl& operator=(Impl&& );
};
Library::Impl::Impl()
: mp_archiveCache(new ArchiveCache(std::max(getEnvVar<int>("KIWIX_ARCHIVE_CACHE_SIZE", 1), 1))),
mp_searcherCache(new SearcherCache(std::max(getEnvVar<int>("KIWIX_SEARCHER_CACHE_SIZE", 1), 1))),
m_bookDB("", Xapian::DB_BACKEND_INMEMORY)
{
}
Library::Impl::~Impl()
{
}
Library::Impl::Impl(Library::Impl&& ) = default;
Library::Impl& Library::Impl::operator=(Library::Impl&& ) = default;
unsigned int
Library::Impl::getBookCount(const bool localBooks, const bool remoteBooks) const
{
unsigned int result = 0;
for (auto& pair: m_books) {
auto& book = pair.second;
if ((!book.getPath().empty() && localBooks)
|| (!book.getUrl().empty() && remoteBooks)) {
result++;
}
}
return result;
}
/* Constructor */
Library::Library()
: mp_impl(new Library::Impl)
{
}
Library::Library(Library&& other)
: mp_impl(std::move(other.mp_impl))
{
}
Library& Library::operator=(Library&& other)
{
mp_impl = std::move(other.mp_impl);
return *this;
}
/* Destructor */
Library::~Library() = default;
Library::~Library()
{
}
bool Library::addBook(const Book& book)
{
std::lock_guard<std::mutex> lock(m_mutex);
++mp_impl->m_revision;
/* Try to find it */
updateBookDB(book);
try {
auto& oldbook = mp_impl->m_books.at(book.getId());
if ( ! booksReferToTheSameArchive(oldbook, book) ) {
dropCache(book.getId());
}
oldbook.update(book); // XXX: This may have no effect if oldbook is readonly
// XXX: Then m_bookDB will become out-of-sync with
// XXX: the real contents of the library.
oldbook.lastUpdatedRevision = mp_impl->m_revision;
auto& oldbook = m_books.at(book.getId());
oldbook.update(book);
return false;
} catch (std::out_of_range&) {
auto& newEntry = mp_impl->m_books[book.getId()];
static_cast<Book&>(newEntry) = book;
newEntry.lastUpdatedRevision = mp_impl->m_revision;
size_t new_cache_size = static_cast<size_t>(std::ceil(mp_impl->getBookCount(true, true)*0.1));
if (getEnvVar<int>("KIWIX_ARCHIVE_CACHE_SIZE", -1) <= 0) {
mp_impl->mp_archiveCache->setMaxSize(new_cache_size);
}
if (getEnvVar<int>("KIWIX_SEARCHER_CACHE_SIZE", -1) <= 0) {
mp_impl->mp_searcherCache->setMaxSize(new_cache_size);
}
m_books[book.getId()] = book;
return true;
}
}
void Library::addBookmark(const Bookmark& bookmark)
{
std::lock_guard<std::mutex> lock(m_mutex);
mp_impl->m_bookmarks.push_back(bookmark);
m_bookmarks.push_back(bookmark);
}
bool Library::removeBookmark(const std::string& zimId, const std::string& url)
{
std::lock_guard<std::mutex> lock(m_mutex);
for(auto it=mp_impl->m_bookmarks.begin(); it!=mp_impl->m_bookmarks.end(); it++) {
for(auto it=m_bookmarks.begin(); it!=m_bookmarks.end(); it++) {
if (it->getBookId() == zimId && it->getUrl() == url) {
mp_impl->m_bookmarks.erase(it);
m_bookmarks.erase(it);
return true;
}
}
@@ -204,74 +74,20 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url)
}
void Library::dropCache(const std::string& id)
{
mp_impl->mp_archiveCache->drop(id);
mp_impl->mp_searcherCache->drop(id);
}
bool Library::removeBookById(const std::string& id)
{
std::lock_guard<std::mutex> lock(m_mutex);
mp_impl->m_bookDB.delete_document("Q" + id);
dropCache(id);
// We do not change the cache size here
// Most of the time, the book is remove in case of library refresh, it is
// often associated with addBook calls (which will properly set the cache size)
// Having a too big cache is not a problem here (or it would have been before)
// (And setMaxSize doesn't actually reduce the cache size, extra cached items
// will be removed in put or getOrPut).
const bool bookWasRemoved = mp_impl->m_books.erase(id) == 1;
if ( bookWasRemoved ) {
++mp_impl->m_revision;
}
return bookWasRemoved;
return m_books.erase(id) == 1;
}
Library::Revision Library::getRevision() const
Book& Library::getBookById(const std::string& id)
{
std::lock_guard<std::mutex> lock(m_mutex);
return mp_impl->m_revision;
return m_books.at(id);
}
uint32_t Library::removeBooksNotUpdatedSince(Revision libraryRevision)
Book& Library::getBookByPath(const std::string& path)
{
BookIdCollection booksToRemove;
{
std::lock_guard<std::mutex> lock(m_mutex);
for ( const auto& entry : mp_impl->m_books) {
if ( entry.second.lastUpdatedRevision <= libraryRevision ) {
booksToRemove.push_back(entry.first);
}
}
}
uint32_t countOfRemovedBooks = 0;
for ( const auto& id : booksToRemove ) {
if ( removeBookById(id) )
++countOfRemovedBooks;
}
return countOfRemovedBooks;
}
const Book& Library::getBookById(const std::string& id) const
{
// XXX: Doesn't make sense to lock this operation since it cannot
// XXX: guarantee thread-safety because of its return type
return mp_impl->m_books.at(id);
}
Book Library::getBookByIdThreadSafe(const std::string& id) const
{
std::lock_guard<std::mutex> lock(m_mutex);
return getBookById(id);
}
const Book& Library::getBookByPath(const std::string& path) const
{
// XXX: Doesn't make sense to lock this operation since it cannot
// XXX: guarantee thread-safety because of its return type
for(auto& it: mp_impl->m_books) {
for(auto& it: m_books) {
auto& book = it.second;
if (book.getPath() == path)
return book;
@@ -281,141 +97,113 @@ const Book& Library::getBookByPath(const std::string& path) const
throw std::out_of_range(ss.str());
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
{
try {
return mp_impl->mp_archiveCache->getOrPut(id,
[&](){
auto book = getBookById(id);
if (!book.isPathValid()) {
throw std::invalid_argument("");
}
return std::make_shared<zim::Archive>(book.getPath());
});
} catch (std::invalid_argument&) {
return nullptr;
}
}
return m_readers.at(id);
} catch (std::out_of_range& e) {}
std::shared_ptr<ZimSearcher> Library::getSearcherByIds(const BookIdSet& ids)
{
assert(!ids.empty());
try {
return mp_impl->mp_searcherCache->getOrPut(ids,
[&](){
std::vector<zim::Archive> archives;
for(auto& id:ids) {
auto archive = getArchiveById(id);
if(!archive) {
throw std::invalid_argument("");
}
archives.push_back(*archive);
}
return std::make_shared<ZimSearcher>(zim::Searcher(archives));
});
} catch (std::invalid_argument&) {
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
}
auto sptr = make_shared<Reader>(book.getPath());
m_readers[id] = sptr;
return sptr;
}
unsigned int Library::getBookCount(const bool localBooks,
const bool remoteBooks) const
const bool remoteBooks)
{
std::lock_guard<std::mutex> lock(m_mutex);
return mp_impl->getBookCount(localBooks, remoteBooks);
}
bool Library::writeToFile(const std::string& path) const
{
const auto allBookIds = getBooksIds();
auto baseDir = removeLastPathElement(path);
LibXMLDumper dumper(this);
dumper.setBaseDir(baseDir);
std::string xml;
{
std::lock_guard<std::mutex> lock(m_mutex);
xml = dumper.dumpLibXMLContent(allBookIds);
};
return writeTextFile(path, xml);
}
bool Library::writeBookmarksToFile(const std::string& path) const
{
LibXMLDumper dumper(this);
// NOTE: LibXMLDumper::dumpLibXMLBookmark uses Library in a thread-safe way
const std::string xml = dumper.dumpLibXMLBookmark();
return writeTextFile(path, xml);
}
Library::AttributeCounts Library::getBookAttributeCounts(BookStrPropMemFn p) const
{
std::lock_guard<std::mutex> lock(m_mutex);
AttributeCounts propValueCounts;
for (const auto& pair: mp_impl->m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
propValueCounts[(book.*p)()] += 1;
unsigned int result = 0;
for (auto& pair: m_books) {
auto& book = pair.second;
if ((!book.getPath().empty() && localBooks)
|| (book.getPath().empty() && remoteBooks)) {
result++;
}
}
return propValueCounts;
}
std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
{
std::vector<std::string> result;
for ( const auto& kv : getBookAttributeCounts(p) ) {
result.push_back(kv.first);
}
return result;
}
std::vector<std::string> Library::getBooksLanguages() const
bool Library::writeToFile(const std::string& path)
{
return getBookPropValueSet(&Book::getLanguage);
auto baseDir = removeLastPathElement(path);
LibXMLDumper dumper(this);
dumper.setBaseDir(baseDir);
return writeTextFile(path, dumper.dumpLibXMLContent(getBooksIds()));
}
Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
bool Library::writeBookmarksToFile(const std::string& path)
{
return getBookAttributeCounts(&Book::getLanguage);
LibXMLDumper dumper(this);
return writeTextFile(path, dumper.dumpLibXMLBookmark());
}
std::vector<std::string> Library::getBooksCategories() const
std::vector<std::string> Library::getBooksLanguages()
{
std::lock_guard<std::mutex> lock(m_mutex);
std::set<std::string> categories;
std::vector<std::string> booksLanguages;
std::map<std::string, bool> booksLanguagesMap;
for (const auto& pair: mp_impl->m_books) {
const auto& book = pair.second;
const auto& c = book.getCategory();
if ( !c.empty() ) {
categories.insert(c);
for (auto& pair: m_books) {
auto& book = pair.second;
auto& language = book.getLanguage();
if (booksLanguagesMap.find(language) == booksLanguagesMap.end()) {
if (book.getOrigId().empty()) {
booksLanguagesMap[language] = true;
booksLanguages.push_back(language);
}
}
}
return std::vector<std::string>(categories.begin(), categories.end());
return booksLanguages;
}
std::vector<std::string> Library::getBooksCreators() const
std::vector<std::string> Library::getBooksCreators()
{
return getBookPropValueSet(&Book::getCreator);
std::vector<std::string> booksCreators;
std::map<std::string, bool> booksCreatorsMap;
for (auto& pair: m_books) {
auto& book = pair.second;
auto& creator = book.getCreator();
if (booksCreatorsMap.find(creator) == booksCreatorsMap.end()) {
if (book.getOrigId().empty()) {
booksCreatorsMap[creator] = true;
booksCreators.push_back(creator);
}
}
}
return booksCreators;
}
std::vector<std::string> Library::getBooksPublishers() const
std::vector<std::string> Library::getBooksPublishers()
{
return getBookPropValueSet(&Book::getPublisher);
std::vector<std::string> booksPublishers;
std::map<std::string, bool> booksPublishersMap;
for (auto& pair:m_books) {
auto& book = pair.second;
auto& publisher = book.getPublisher();
if (booksPublishersMap.find(publisher) == booksPublishersMap.end()) {
if (book.getOrigId().empty()) {
booksPublishersMap[publisher] = true;
booksPublishers.push_back(publisher);
}
}
}
return booksPublishers;
}
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks) const
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks)
{
if (!onlyValidBookmarks) {
return mp_impl->m_bookmarks;
return m_bookmarks;
}
std::vector<kiwix::Bookmark> validBookmarks;
auto booksId = getBooksIds();
std::lock_guard<std::mutex> lock(m_mutex);
for(auto& bookmark:mp_impl->m_bookmarks) {
for(auto& bookmark:m_bookmarks) {
if (std::find(booksId.begin(), booksId.end(), bookmark.getBookId()) != booksId.end()) {
validBookmarks.push_back(bookmark);
}
@@ -423,227 +211,37 @@ const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks
return validBookmarks;
}
Library::BookIdCollection Library::getBooksIds() const
std::vector<std::string> Library::getBooksIds()
{
std::lock_guard<std::mutex> lock(m_mutex);
BookIdCollection bookIds;
std::vector<std::string> bookIds;
for (auto& pair: mp_impl->m_books) {
for (auto& pair: m_books) {
bookIds.push_back(pair.first);
}
return bookIds;
}
void Library::updateBookDB(const Book& book)
std::vector<std::string> Library::filter(const std::string& search)
{
Xapian::Stem stemmer;
Xapian::TermGenerator indexer;
const std::string lang = book.getLanguage();
try {
stemmer = Xapian::Stem(iso639_3ToXapian(lang));
indexer.set_stemmer(stemmer);
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
} catch (...) {}
Xapian::Document doc;
indexer.set_document(doc);
const std::string title = normalizeText(book.getTitle());
const std::string desc = normalizeText(book.getDescription());
// Index title and description without prefixes for general search
indexer.index_text(title);
indexer.increase_termpos();
indexer.index_text(desc);
// Index all fields for field-based search
indexer.index_text(title, 1, "S");
indexer.index_text(desc, 1, "XD");
indexer.index_text(lang, 1, "L");
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");
for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) {
doc.add_boolean_term("XT" + tag);
if ( tag[0] != '_' ) {
indexer.increase_termpos();
indexer.index_text(tag);
}
}
const std::string idterm = "Q" + book.getId();
doc.add_boolean_term(idterm);
doc.set_data(book.getId());
mp_impl->m_bookDB.replace_document(idterm, doc);
}
namespace
{
bool willSelectEverything(const Xapian::Query& query)
{
return query.get_type() == Xapian::Query::LEAF_MATCH_ALL;
}
Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
{
if ( !filter.hasQuery() || filter.getQuery().empty() ) {
// This is a thread-safe way to construct an equivalent of
// a Xapian::Query::MatchAll query
return Xapian::Query(std::string());
}
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_AND);
queryParser.add_prefix("title", "S");
queryParser.add_prefix("description", "XD");
queryParser.add_prefix("name", "XN");
queryParser.add_prefix("category", "XC");
queryParser.add_prefix("lang", "L");
queryParser.add_prefix("publisher", "XP");
queryParser.add_prefix("creator", "A");
queryParser.add_prefix("tag", "XT");
const auto partialQueryFlag = filter.queryIsPartial()
? Xapian::QueryParser::FLAG_PARTIAL
: 0;
// Language assumed for the query is not known for sure so stemming
// is not applied
//queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???)));
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
const auto flags = Xapian::QueryParser::FLAG_PHRASE
| Xapian::QueryParser::FLAG_BOOLEAN
| Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
| Xapian::QueryParser::FLAG_LOVEHATE
| Xapian::QueryParser::FLAG_WILDCARD
| partialQueryFlag;
return queryParser.parse_query(normalizeText(filter.getQuery()), flags);
}
Xapian::Query nameQuery(const std::string& name)
{
return Xapian::Query("XN" + normalizeText(name));
}
Xapian::Query categoryQuery(const std::string& category)
{
return Xapian::Query("XC" + normalizeText(category));
}
Xapian::Query langQuery(const std::string& commaSeparatedLanguageList)
{
Xapian::Query q;
bool firstIteration = true;
for ( const auto& lang : kiwix::split(commaSeparatedLanguageList, ",") ) {
const Xapian::Query singleLangQuery("L" + normalizeText(lang));
if ( firstIteration ) {
q = singleLangQuery;
firstIteration = false;
} else {
q = Xapian::Query(Xapian::Query::OP_OR, q, singleLangQuery);
}
}
return q;
}
Xapian::Query publisherQuery(const std::string& publisher)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query creatorQuery(const std::string& creator)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(creator), flags, "A");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags)
{
Xapian::Query q = Xapian::Query(std::string());
if (!acceptTags.empty()) {
for ( const auto& tag : acceptTags )
q &= Xapian::Query("XT" + normalizeText(tag));
}
if (!rejectTags.empty()) {
for ( const auto& tag : rejectTags )
q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + normalizeText(tag));
}
return q;
}
Xapian::Query buildXapianQuery(const Filter& filter)
{
auto q = buildXapianQueryFromFilterQuery(filter);
if ( filter.hasName() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName()));
}
if ( filter.hasCategory() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory()));
}
if ( filter.hasLang() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang()));
}
if ( filter.hasPublisher() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher()));
}
if ( filter.hasCreator() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, creatorQuery(filter.getCreator()));
}
if ( !filter.getAcceptTags().empty() || !filter.getRejectTags().empty() ) {
const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags());
q = Xapian::Query(Xapian::Query::OP_AND, q, tq);;
}
return q;
}
} // unnamed namespace
Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const
{
const auto query = buildXapianQuery(filter);
if ( willSelectEverything(query) )
if (search.empty()) {
return getBooksIds();
BookIdCollection bookIds;
std::lock_guard<std::mutex> lock(m_mutex);
Xapian::Enquire enquire(mp_impl->m_bookDB);
enquire.set_query(query);
const auto results = enquire.get_mset(0, mp_impl->m_books.size());
for ( auto it = results.begin(); it != results.end(); ++it ) {
bookIds.push_back(it.get_document().get_data());
}
return bookIds;
return filter(Filter().query(search));
}
Library::BookIdCollection Library::filter(const Filter& filter) const
std::vector<std::string> Library::filter(const Filter& filter)
{
BookIdCollection result;
const auto preliminaryResult = filterViaBookDB(filter);
std::lock_guard<std::mutex> lock(m_mutex);
for(auto id : preliminaryResult) {
if(filter.accept(mp_impl->m_books.at(id))) {
result.push_back(id);
std::vector<std::string> bookIds;
for(auto& pair:m_books) {
auto book = pair.second;
if(filter.accept(book)) {
bookIds.push_back(pair.first);
}
}
return result;
return bookIds;
}
template<supportedListSortBy SORT>
@@ -659,13 +257,13 @@ struct KEY_TYPE<SIZE> {
template<supportedListSortBy sort>
class Comparator {
private:
const Library* const lib;
const bool ascending;
Library* lib;
bool ascending;
inline typename KEY_TYPE<sort>::TYPE get_key(const std::string& id);
public:
Comparator(const Library* lib, bool ascending) : lib(lib), ascending(ascending) {}
Comparator(Library* lib, bool ascending) : lib(lib), ascending(ascending) {}
inline bool operator() (const std::string& id1, const std::string& id2) {
if (ascending) {
return get_key(id1) < get_key(id2);
@@ -705,13 +303,8 @@ std::string Comparator<PUBLISHER>::get_key(const std::string& id)
return lib->getBookById(id).getPublisher();
}
void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool ascending) const
void Library::sort(std::vector<std::string>& bookIds, supportedListSortBy sort, bool ascending)
{
// NOTE: Can reimplement this method in a way that doesn't require locking
// NOTE: for the entire duration of the sort. Will need to obtain (under a
// NOTE: lock) the required atributes from the books once, and then the
// NOTE: sorting will run on a copy of data without locking.
std::lock_guard<std::mutex> lock(m_mutex);
switch(sort) {
case TITLE:
std::sort(bookIds.begin(), bookIds.end(), Comparator<TITLE>(this, ascending));
@@ -734,6 +327,48 @@ void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool asc
}
std::vector<std::string> Library::listBooksIds(
int mode,
supportedListSortBy sortBy,
const std::string& search,
const std::string& language,
const std::string& creator,
const std::string& publisher,
const std::vector<std::string>& tags,
size_t maxSize) {
Filter _filter;
if (mode & LOCAL)
_filter.local(true);
if (mode & NOLOCAL)
_filter.local(false);
if (mode & VALID)
_filter.valid(true);
if (mode & NOVALID)
_filter.valid(false);
if (mode & REMOTE)
_filter.remote(true);
if (mode & NOREMOTE)
_filter.remote(false);
if (!tags.empty())
_filter.acceptTags(tags);
if (maxSize != 0)
_filter.maxSize(maxSize);
if (!language.empty())
_filter.lang(language);
if (!publisher.empty())
_filter.publisher(publisher);
if (!creator.empty())
_filter.creator(creator);
if (!search.empty())
_filter.query(search);
auto bookIds = filter(_filter);
sort(bookIds, sortBy, true);
return bookIds;
}
Filter::Filter()
: activeFilters(0),
_maxSize(0)
@@ -756,7 +391,6 @@ enum filterTypes {
MAXSIZE = FLAG(11),
QUERY = FLAG(12),
NAME = FLAG(13),
CATEGORY = FLAG(14),
};
Filter& Filter::local(bool accept)
@@ -795,27 +429,20 @@ Filter& Filter::valid(bool accept)
return *this;
}
Filter& Filter::acceptTags(const Tags& tags)
Filter& Filter::acceptTags(std::vector<std::string> tags)
{
_acceptTags = tags;
activeFilters |= ACCEPTTAGS;
return *this;
}
Filter& Filter::rejectTags(const Tags& tags)
Filter& Filter::rejectTags(std::vector<std::string> tags)
{
_rejectTags = tags;
activeFilters |= REJECTTAGS;
return *this;
}
Filter& Filter::category(std::string category)
{
_category = category;
activeFilters |= CATEGORY;
return *this;
}
Filter& Filter::lang(std::string lang)
{
_lang = lang;
@@ -844,10 +471,9 @@ Filter& Filter::maxSize(size_t maxSize)
return *this;
}
Filter& Filter::query(std::string query, bool partial)
Filter& Filter::query(std::string query)
{
_query = query;
_queryIsPartial = partial;
activeFilters |= QUERY;
return *this;
}
@@ -861,36 +487,6 @@ Filter& Filter::name(std::string name)
#define ACTIVE(X) (activeFilters & (X))
#define FILTER(TAG, TEST) if (ACTIVE(TAG) && !(TEST)) { return false; }
bool Filter::hasQuery() const
{
return ACTIVE(QUERY);
}
bool Filter::hasName() const
{
return ACTIVE(NAME);
}
bool Filter::hasCategory() const
{
return ACTIVE(CATEGORY);
}
bool Filter::hasLang() const
{
return ACTIVE(LANG);
}
bool Filter::hasPublisher() const
{
return ACTIVE(_PUBLISHER);
}
bool Filter::hasCreator() const
{
return ACTIVE(_CREATOR);
}
bool Filter::accept(const Book& book) const
{
auto local = !book.getPath().empty();
@@ -906,8 +502,40 @@ bool Filter::accept(const Book& book) const
FILTER(_NOREMOTE, !remote)
FILTER(MAXSIZE, book.getSize() <= _maxSize)
FILTER(LANG, book.getLanguage() == _lang)
FILTER(_PUBLISHER, book.getPublisher() == _publisher)
FILTER(_CREATOR, book.getCreator() == _creator)
FILTER(NAME, book.getName() == _name)
if (ACTIVE(ACCEPTTAGS)) {
if (!_acceptTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _acceptTags) {
if (sBookTags.find(t) == sBookTags.end()) {
return false;
}
}
}
}
if (ACTIVE(REJECTTAGS)) {
if (!_rejectTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _rejectTags) {
if (sBookTags.find(t) != sBookTags.end()) {
return false;
}
}
}
}
if ( ACTIVE(QUERY)
&& !(matchRegex(book.getTitle(), "\\Q" + _query + "\\E")
|| matchRegex(book.getDescription(), "\\Q" + _query + "\\E")))
return false;
return true;
}
}

View File

@@ -20,15 +20,15 @@
#include "libxml_dumper.h"
#include "book.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/pathTools.h"
namespace kiwix
{
/* Constructor */
LibXMLDumper::LibXMLDumper(const Library* library)
LibXMLDumper::LibXMLDumper(Library* library)
: library(library)
{
}
@@ -60,13 +60,10 @@ void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName());
ADD_ATTR_NOT_EMPTY(entry_node, "flavour", book.getFlavour());
ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags());
try {
auto defaultIllustration = book.getIllustration(48);
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", defaultIllustration->mimeType);
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", defaultIllustration->url);
if (!defaultIllustration->getData().empty())
ADD_ATTRIBUTE(entry_node, "favicon", base64_encode(defaultIllustration->getData()));
} catch(...) {}
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", book.getFaviconUrl());
if (!book.getFavicon().empty())
ADD_ATTRIBUTE(entry_node, "favicon", base64_encode(book.getFavicon()));
} else {
ADD_ATTRIBUTE(entry_node, "origId", book.getOrigId());
}
@@ -94,7 +91,7 @@ void LibXMLDumper::handleBookmark(Bookmark bookmark, pugi::xml_node root_node) {
auto book_node = entry_node.append_child("book");
try {
auto book = library->getBookByIdThreadSafe(bookmark.getBookId());
auto book = library->getBookById(bookmark.getBookId());
ADD_TEXT_ENTRY(book_node, "id", book.getId());
ADD_TEXT_ENTRY(book_node, "title", book.getTitle());
ADD_TEXT_ENTRY(book_node, "language", book.getLanguage());

View File

@@ -19,88 +19,34 @@
#include "manager.h"
#include "tools.h"
#include "tools/pathTools.h"
#include <pugixml.hpp>
namespace kiwix
{
namespace
{
struct NoDelete
{
template<class T> void operator()(T*) {}
};
} // unnamed namespace
////////////////////////////////////////////////////////////////////////////////
// LibraryManipulator
////////////////////////////////////////////////////////////////////////////////
LibraryManipulator::LibraryManipulator(Library* library)
: library(*library)
{}
LibraryManipulator::~LibraryManipulator()
{}
bool LibraryManipulator::addBookToLibrary(const Book& book)
{
const auto ret = library.addBook(book);
if ( ret ) {
bookWasAddedToLibrary(book);
}
return ret;
}
void LibraryManipulator::addBookmarkToLibrary(const Bookmark& bookmark)
{
library.addBookmark(bookmark);
bookmarkWasAddedToLibrary(bookmark);
}
uint32_t LibraryManipulator::removeBooksNotUpdatedSince(Library::Revision rev)
{
const auto n = library.removeBooksNotUpdatedSince(rev);
if ( n != 0 ) {
booksWereRemovedFromLibrary();
}
return n;
}
void LibraryManipulator::bookWasAddedToLibrary(const Book& book)
{
}
void LibraryManipulator::bookmarkWasAddedToLibrary(const Bookmark& bookmark)
{
}
void LibraryManipulator::booksWereRemovedFromLibrary()
{
}
////////////////////////////////////////////////////////////////////////////////
// Manager
////////////////////////////////////////////////////////////////////////////////
/* Constructor */
Manager::Manager(LibraryManipulator* manipulator):
writableLibraryPath(""),
manipulator(manipulator, NoDelete())
manipulator(manipulator),
mustDeleteManipulator(false)
{
}
Manager::Manager(Library* library) :
writableLibraryPath(""),
manipulator(new LibraryManipulator(library))
manipulator(new DefaultLibraryManipulator(library)),
mustDeleteManipulator(true)
{
}
/* Destructor */
Manager::~Manager()
{
if (mustDeleteManipulator) {
delete manipulator;
}
}
bool Manager::parseXmlDom(const pugi::xml_document& doc,
bool readOnly,
const std::string& libraryPath,
@@ -134,7 +80,7 @@ bool Manager::readXml(const std::string& xml,
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)xml.data(), xml.size());
= doc.load_buffer_inplace((void*)xml.data(), xml.size());
if (result) {
this->parseXmlDom(doc, readOnly, libraryPath, trustLibrary);
@@ -178,7 +124,7 @@ bool Manager::readOpds(const std::string& content, const std::string& urlHost)
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)content.data(), content.size());
= doc.load_buffer_inplace((void*)content.data(), content.size());
if (result) {
this->parseOpdsDom(doc, urlHost);
@@ -268,8 +214,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
tmp_path = computeAbsolutePath(getCurrentDirectory(), path);
}
try {
zim::Archive archive(tmp_path);
book->update(archive);
kiwix::Reader reader(tmp_path);
book->update(reader);
book->setPathValid(true);
} catch (const std::exception& e) {
book->setPathValid(false);
@@ -302,21 +248,4 @@ bool Manager::readBookmarkFile(const std::string& path)
return true;
}
void Manager::reload(const Paths& paths)
{
const auto libRevision = manipulator->getLibrary().getRevision();
for (std::string path : paths) {
if (!path.empty()) {
if ( kiwix::isRelativePath(path) )
path = kiwix::computeAbsolutePath(kiwix::getCurrentDirectory(), path);
if (!readFile(path, false, true)) {
throw std::runtime_error("Failed to load the XML library file '" + path + "'.");
}
}
}
manipulator->removeBooksNotUpdatedSince(libRevision);
}
}

View File

@@ -6,7 +6,10 @@ kiwix_sources = [
'libxml_dumper.cpp',
'opds_dumper.cpp',
'downloader.cpp',
'reader.cpp',
'entry.cpp',
'server.cpp',
'searcher.cpp',
'search_renderer.cpp',
'subprocess.cpp',
'aria2.cpp',
@@ -16,21 +19,14 @@ kiwix_sources = [
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',
'server/etag.cpp',
'server/request_context.cpp',
'server/response.cpp',
'server/internalServer.cpp',
'server/internalServer_catalog_v2.cpp',
'server/i18n.cpp',
'opds_catalog.cpp',
'version.cpp'
'server/response.cpp'
]
kiwix_sources += lib_resources
kiwix_sources += i18n_resources
if host_machine.system() == 'windows'
kiwix_sources += 'subprocess_windows.cpp'
@@ -38,14 +34,22 @@ else
kiwix_sources += 'subprocess_unix.cpp'
endif
install_dir = get_option('libdir')
if wrapper.contains('android')
install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi')
else
install_dir = get_option('libdir')
endif
if wrapper.contains('android') or wrapper.contains('java')
subdir('wrapper/java')
endif
config_h = configure_file(output : 'kiwix_config.h',
configuration : conf,
input : 'config.h.in')
install_headers(config_h, subdir:'kiwix')
libkiwix = library('kiwix',
kiwixlib = library('kiwix',
kiwix_sources,
include_directories : inc,
dependencies : all_deps,

View File

@@ -51,54 +51,12 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w
}
}
std::string HumanReadableNameMapper::getNameForId(const std::string& id) const {
std::string HumanReadableNameMapper::getNameForId(const std::string& id) {
return m_idToName.at(id);
}
std::string HumanReadableNameMapper::getIdForName(const std::string& name) const {
std::string HumanReadableNameMapper::getIdForName(const std::string& name) {
return m_nameToId.at(name);
}
////////////////////////////////////////////////////////////////////////////////
// UpdatableNameMapper
////////////////////////////////////////////////////////////////////////////////
UpdatableNameMapper::UpdatableNameMapper(Library& lib, bool withAlias)
: library(lib)
, withAlias(withAlias)
{
update();
}
void UpdatableNameMapper::update()
{
const auto newNameMapper = new HumanReadableNameMapper(library, withAlias);
std::lock_guard<std::mutex> lock(mutex);
nameMapper.reset(newNameMapper);
}
UpdatableNameMapper::NameMapperHandle
UpdatableNameMapper::currentNameMapper() const
{
// Return a copy of the handle to the current NameMapper object. It will
// ensure that the object survives any call to UpdatableNameMapper::update()
// made before the completion of any pending operation on that object.
std::lock_guard<std::mutex> lock(mutex);
return nameMapper;
}
std::string UpdatableNameMapper::getNameForId(const std::string& id) const
{
// Ensure that the current nameMapper object survives a concurrent call
// to UpdatableNameMapper::update()
return currentNameMapper()->getNameForId(id);
}
std::string UpdatableNameMapper::getIdForName(const std::string& name) const
{
// Ensure that the current nameMapper object survives a concurrent call
// to UpdatableNameMapper::update()
return currentNameMapper()->getIdForName(name);
}
}

View File

@@ -1,74 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "opds_catalog.h"
#include "tools/stringTools.h"
#include <sstream>
namespace kiwix
{
namespace
{
const char opdsSearchEndpoint[] = "/catalog/v2/entries";
enum Separator { AMP };
std::ostringstream& operator<<(std::ostringstream& oss, Separator sep)
{
if ( oss.tellp() > 0 )
oss << "&";
return oss;
}
std::string buildSearchString(const Filter& f)
{
std::ostringstream oss;
if ( f.hasQuery() )
oss << AMP << "q=" << urlEncode(f.getQuery());
if ( f.hasCategory() )
oss << AMP << "category=" << urlEncode(f.getCategory());
if ( f.hasLang() )
oss << AMP << "lang=" << urlEncode(f.getLang());
if ( f.hasName() )
oss << AMP << "name=" << urlEncode(f.getName());
if ( !f.getAcceptTags().empty() )
oss << AMP << "tag=" << urlEncode(join(f.getAcceptTags(), ";"));
return oss.str();
}
} // unnamed namespace
std::string getSearchUrl(const Filter& f)
{
const std::string searchString = buildSearchString(f);
if ( searchString.empty() )
return opdsSearchEndpoint;
else
return opdsSearchEndpoint + ("?" + searchString);
}
} // namespace kiwix

View File

@@ -20,19 +20,14 @@
#include "opds_dumper.h"
#include "book.h"
#include "libkiwix-resources.h"
#include <mustache.hpp>
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include <iomanip>
namespace kiwix
{
/* Constructor */
OPDSDumper::OPDSDumper(Library* library, NameMapper* nameMapper)
: library(library),
nameMapper(nameMapper)
OPDSDumper::OPDSDumper(Library* library)
: library(library)
{
}
/* Destructor */
@@ -40,254 +35,120 @@ OPDSDumper::~OPDSDumper()
{
}
std::string gen_date_str()
{
auto now = time(0);
auto tm = localtime(&now);
std::stringstream is;
is << std::setw(2) << std::setfill('0')
<< 1900+tm->tm_year << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mon << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
return is.str();
}
static std::string gen_date_from_yyyy_mm_dd(const std::string& date)
{
std::stringstream is;
is << date << "T00:00::00:Z";
return is.str();
}
void OPDSDumper::setOpenSearchInfo(int totalResults, int startIndex, int count)
{
m_totalResults = totalResults;
m_startIndex = startIndex,
m_count = count;
m_isSearchResult = true;
}
namespace
{
#define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str())
const std::string XML_HEADER(R"(<?xml version="1.0" encoding="UTF-8"?>)");
pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
auto entry_node = root_node.append_child("entry");
ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.getId());
ADD_TEXT_ENTRY(entry_node, "title", book.getTitle());
ADD_TEXT_ENTRY(entry_node, "summary", book.getDescription());
ADD_TEXT_ENTRY(entry_node, "language", book.getLanguage());
ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate()));
ADD_TEXT_ENTRY(entry_node, "name", book.getName());
ADD_TEXT_ENTRY(entry_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(entry_node, "tags", book.getTags());
ADD_TEXT_ENTRY(entry_node, "articleCount", to_string(book.getArticleCount()));
ADD_TEXT_ENTRY(entry_node, "mediaCount", to_string(book.getMediaCount()));
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());
typedef kainjow::mustache::data MustacheData;
typedef kainjow::mustache::list BooksData;
typedef kainjow::mustache::list IllustrationInfo;
auto content_node = entry_node.append_child("link");
content_node.append_attribute("type") = "text/html";
content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str();
IllustrationInfo getBookIllustrationInfo(const Book& book)
{
kainjow::mustache::list illustrations;
if ( book.isPathValid() ) {
for ( const auto& illustration : book.getIllustrations() ) {
// For now, we are handling only sizexsize@1 illustration.
// So we can simply pass one size to mustache.
illustrations.push_back(kainjow::mustache::object{
{"icon_size", to_string(illustration->width)},
{"icon_mimetype", illustration->mimeType}
});
}
}
return illustrations;
auto author_node = entry_node.append_child("author");
ADD_TEXT_ENTRY(author_node, "name", book.getCreator());
auto publisher_node = entry_node.append_child("publisher");
ADD_TEXT_ENTRY(publisher_node, "name", book.getPublisher());
if (! book.getUrl().empty()) {
auto acquisition_link = entry_node.append_child("link");
acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access";
acquisition_link.append_attribute("type") = "application/x-zim";
acquisition_link.append_attribute("href") = book.getUrl().c_str();
acquisition_link.append_attribute("length") = to_string(book.getSize()).c_str();
}
if (! book.getFaviconMimeType().empty() ) {
auto image_link = entry_node.append_child("link");
image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail";
image_link.append_attribute("type") = book.getFaviconMimeType().c_str();
image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str();
}
return entry_node;
}
std::string fullEntryXML(const Book& book, const std::string& rootLocation, const std::string& contentId)
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds)
{
const auto bookDate = book.getDate() + "T00:00:00Z";
const kainjow::mustache::object data{
{"root", rootLocation},
{"id", book.getId()},
{"name", book.getName()},
{"title", book.getTitle()},
{"description", book.getDescription()},
{"language", book.getLanguage()},
{"content_id", urlEncode(contentId)},
{"updated", bookDate}, // XXX: this should be the entry update datetime
{"book_date", bookDate},
{"category", book.getCategory()},
{"flavour", book.getFlavour()},
{"tags", book.getTags()},
{"article_count", to_string(book.getArticleCount())},
{"media_count", to_string(book.getMediaCount())},
{"author_name", book.getCreator()},
{"publisher_name", book.getPublisher()},
{"url", onlyAsNonEmptyMustacheValue(book.getUrl())},
{"size", to_string(book.getSize())},
{"icons", getBookIllustrationInfo(book)},
};
return render_template(RESOURCE::templates::catalog_v2_entry_xml, data);
}
date = gen_date_str();
pugi::xml_document doc;
std::string partialEntryXML(const Book& book, const std::string& rootLocation)
{
const auto bookDate = book.getDate() + "T00:00:00Z";
const kainjow::mustache::object data{
{"root", rootLocation},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"id", book.getId()},
{"title", book.getTitle()},
{"updated", bookDate}, // XXX: this should be the entry update datetime
};
const auto xmlTemplate = RESOURCE::templates::catalog_v2_partial_entry_xml;
return render_template(xmlTemplate, data);
}
auto root_node = doc.append_child("feed");
root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom";
root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog";
BooksData getBooksData(const Library* library, const NameMapper* nameMapper, const std::vector<std::string>& bookIds, const std::string& rootLocation, bool partial)
{
BooksData booksData;
for ( const auto& bookId : bookIds ) {
try {
const Book book = library->getBookByIdThreadSafe(bookId);
const std::string contentId = nameMapper->getNameForId(bookId);
const auto entryXML = partial
? partialEntryXML(book, rootLocation)
: fullEntryXML(book, rootLocation, contentId);
booksData.push_back(kainjow::mustache::object{ {"entry", entryXML} });
} catch ( const std::out_of_range& ) {
// the book was removed from the library since its id was obtained
// ignore it
ADD_TEXT_ENTRY(root_node, "id", id);
ADD_TEXT_ENTRY(root_node, "title", title);
ADD_TEXT_ENTRY(root_node, "updated", date);
if (m_isSearchResult) {
ADD_TEXT_ENTRY(root_node, "totalResults", to_string(m_totalResults));
ADD_TEXT_ENTRY(root_node, "startIndex", to_string(m_startIndex));
ADD_TEXT_ENTRY(root_node, "itemsPerPage", to_string(m_count));
}
auto self_link_node = root_node.append_child("link");
self_link_node.append_attribute("rel") = "self";
self_link_node.append_attribute("href") = "";
self_link_node.append_attribute("type") = "application/atom+xml";
if (!searchDescriptionUrl.empty() ) {
auto search_link = root_node.append_child("link");
search_link.append_attribute("rel") = "search";
search_link.append_attribute("type") = "application/opensearchdescription+xml";
search_link.append_attribute("href") = searchDescriptionUrl.c_str();
}
if (library) {
for (auto& bookId: bookIds) {
handleBook(library->getBookById(bookId), root_node);
}
}
return booksData;
}
std::map<std::string, std::string> iso639_3 = {
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"ido", "ido"},
{"kbp", "kabɩ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"twi", "twi"}
};
std::once_flag fillLanguagesFlag;
void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}
std::string getLanguageSelfName(const std::string& lang) {
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};
} // unnamed namespace
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const
{
const auto booksData = getBooksData(library, nameMapper, bookIds, rootLocation, false);
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"root", rootLocation},
{"feed_id", gen_uuid(libraryId + "/catalog/search?"+query)},
{"filter", onlyAsNonEmptyMustacheValue(query)},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData }
};
return render_template(RESOURCE::templates::catalog_entries_xml, template_data);
}
string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const
{
const auto endpointRoot = rootLocation + "/catalog/v2";
const auto booksData = getBooksData(library, nameMapper, bookIds, rootLocation, partial);
const char* const endpoint = partial ? "/partial_entries" : "/entries";
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"endpoint_root", endpointRoot},
{"feed_id", gen_uuid(libraryId + endpoint + "?" + query)},
{"filter", onlyAsNonEmptyMustacheValue(query)},
{"query", query.empty() ? "" : "?" + query},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData },
{"dump_partial_entries", MustacheData(partial)}
};
return render_template(RESOURCE::templates::catalog_v2_entries_xml, template_data);
}
std::string OPDSDumper::dumpOPDSCompleteEntry(const std::string& bookId) const
{
const auto book = library->getBookById(bookId);
const std::string contentId = nameMapper->getNameForId(bookId);
return XML_HEADER
+ "\n"
+ fullEntryXML(book, rootLocation, contentId);
}
std::string OPDSDumper::categoriesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list categoryData;
for ( const auto& category : library->getBooksCategories() ) {
const auto urlencodedCategoryName = urlEncode(category);
categoryData.push_back(kainjow::mustache::object{
{"name", category},
{"urlencoded_name", urlencodedCategoryName},
{"updated", now},
{"id", gen_uuid(libraryId + "/categories/" + urlencodedCategoryName)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_categories_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/categories")},
{"categories", categoryData }
}
);
}
std::string OPDSDumper::languagesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
std::call_once(fillLanguagesFlag, fillLanguagesMap);
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
const auto languageSelfName = getLanguageSelfName(languageCode);
languageData.push_back(kainjow::mustache::object{
{"lang_code", languageCode},
{"lang_self_name", languageSelfName},
{"book_count", to_string(bookCount)},
{"updated", now},
{"id", gen_uuid(libraryId + "/languages/" + languageCode)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_languages_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/languages")},
{"languages", languageData }
}
);
return nodeToString(root_node);
}
}

928
src/reader.cpp Normal file
View File

@@ -0,0 +1,928 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
#include <zim/search.h>
#include "tools/otherTools.h"
inline char hi(char v)
{
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v)
{
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID(std::string in)
{
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]);
}
std::string op = out.str();
return op;
}
namespace kiwix
{
/* Constructor */
Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
{
string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && tmpZimFilePath.size() > 5
&& found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
}
this->zimFileHandler = new zim::File(tmpZimFilePath);
if (this->zimFileHandler != NULL) {
this->firstArticleOffset
= this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
this->zimFilePath = zimFilePath;
}
/* initialize random seed: */
srand(time(NULL));
}
/* Destructor */
Reader::~Reader()
{
if (this->zimFileHandler != NULL) {
delete this->zimFileHandler;
}
}
zim::File* Reader::getZimFileHandler() const
{
return this->zimFileHandler;
}
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
{
std::map<const std::string, unsigned int> counters;
string mimeType, item, counterString;
unsigned int counter;
zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
if (article.good()) {
stringstream ssContent(article.getData());
while (getline(ssContent, item, ';')) {
stringstream ssItem(item);
getline(ssItem, mimeType, '=');
getline(ssItem, counterString, '=');
if (!counterString.empty() && !mimeType.empty()) {
sscanf(counterString.c_str(), "%u", &counter);
counters.insert(pair<string, int>(mimeType, counter));
}
}
}
return counters;
}
/* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
if (counterMap.empty()) {
counter = this->nsACount;
} else {
for(auto &pair:counterMap) {
if (startsWith(pair.first, "text/html")) {
counter += pair.second;
}
}
}
return counter;
}
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
if (counterMap.empty()) {
counter = this->nsICount;
} else {
auto it = counterMap.find("image/jpeg");
if (it != counterMap.end()) {
counter += it->second;
}
it = counterMap.find("image/gif");
if (it != counterMap.end()) {
counter += it->second;
}
it = counterMap.find("image/png");
if (it != counterMap.end()) {
counter += it->second;
}
}
return counter;
}
/* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() const
{
return this->zimFileHandler->getCountArticles();
}
/* Return the UID of the ZIM file */
string Reader::getId() const
{
std::ostringstream s;
s << this->zimFileHandler->getFileheader().getUuid();
return s.str();
}
/* Return a page url from a title */
bool Reader::getPageUrlFromTitle(const string& title, string& url) const
{
try {
auto entry = getEntryFromTitle(title);
entry = entry.getFinalEntry();
url = entry.getPath();
return true;
} catch (NoEntry& e) {
return false;
}
}
/* Return an URL from a title */
string Reader::getRandomPageUrl() const
{
return getRandomPage().getPath();
}
Entry Reader::getRandomPage() const
{
if (!this->zimFileHandler) {
throw NoEntry();
}
zim::Article article;
std::string mainPagePath = this->getMainPage().getPath();
int watchdog = 42;
do {
auto idx = this->firstArticleOffset
+ (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
* this->nsACount);
article = zimFileHandler->getArticle(idx);
if (!watchdog--) {
throw NoEntry();
}
} while (!article.good() && article.getLongUrl() == mainPagePath);
return article;
}
/* Return the welcome page URL */
string Reader::getMainPageUrl() const
{
return getMainPage().getPath();
}
Entry Reader::getMainPage() const
{
if (!this->zimFileHandler) {
throw NoEntry();
}
zim::Article article;
if (this->zimFileHandler->getFileheader().hasMainPage())
{
article = zimFileHandler->getArticle(
this->zimFileHandler->getFileheader().getMainPage());
}
if (!article.good())
{
return getFirstPage();
}
return article;
}
bool Reader::getFavicon(string& content, string& mimeType) const
{
static const char* const paths[] = {"-/favicon", "-/favicon.png", "I/favicon.png", "I/favicon"};
for (auto &path: paths) {
try {
auto entry = getEntryFromPath(path);
entry = entry.getFinalEntry();
content = entry.getContent();
mimeType = entry.getMimetype();
return true;
} catch(NoEntry& e) {};
}
return false;
}
string Reader::getZimFilePath() const
{
return this->zimFilePath;
}
/* Return a metatag value */
bool Reader::getMetadata(const string& name, string& value) const
{
try {
auto entry = getEntryFromPath("M/"+name);
value = entry.getContent();
return true;
} catch(NoEntry& e) {
return false;
}
}
#define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
string Reader::getName() const
{
METADATA("Name")
}
string Reader::getTitle() const
{
string value;
this->getMetadata("Title", value);
if (value.empty()) {
value = getLastPathElement(zimFileHandler->getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
string Reader::getCreator() const
{
METADATA("Creator")
}
string Reader::getPublisher() const
{
METADATA("Publisher")
}
string Reader::getDate() const
{
METADATA("Date")
}
string Reader::getDescription() const
{
string value;
this->getMetadata("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
this->getMetadata("Subtitle", value);
}
return value;
}
string Reader::getLongDescription() const
{
METADATA("LongDescription")
}
string Reader::getLanguage() const
{
METADATA("Language")
}
string Reader::getLicense() const
{
METADATA("License")
}
string Reader::getTags(bool original) const
{
string tags_str;
getMetadata("Tags", tags_str);
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
string Reader::getTagStr(const std::string& tagName) const
{
string tags_str;
getMetadata("Tags", tags_str);
return getTagValueFromTagList(convertTags(tags_str), tagName);
}
bool Reader::getTagBool(const std::string& tagName) const
{
return convertStrToBool(getTagStr(tagName));
}
string Reader::getRelation() const
{
METADATA("Relation")
}
string Reader::getFlavour() const
{
METADATA("Flavour")
}
string Reader::getSource() const
{
METADATA("Source")
}
string Reader::getScraper() const
{
METADATA("Scraper")
}
#undef METADATA
string Reader::getOrigId() const
{
string value;
this->getMetadata("startfileuid", value);
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = hexUUID(tempArray);
return origID;
}
/* Return the first page URL */
string Reader::getFirstPageUrl() const
{
return getFirstPage().getPath();
}
Entry Reader::getFirstPage() const
{
if (!this->zimFileHandler) {
throw NoEntry();
}
auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
auto article = zimFileHandler->getArticle(firstPageOffset);
if (! article.good()) {
throw NoEntry();
}
return article;
}
bool _parseUrl(const string& url, char* ns, string& title)
{
/* Offset to visit the url */
unsigned int urlLength = url.size();
unsigned int offset = 0;
/* Ignore the first '/' */
if (url[offset] == '/')
offset++;
if (url[offset] == '/' || offset >= urlLength)
return false;
/* Get namespace */
*ns = url[offset++];
if (url[offset] != '/' || offset >= urlLength)
return false;
offset++;
if ( offset >= urlLength)
return false;
/* Get content title */
title = url.substr(offset, urlLength - offset);
return true;
}
bool Reader::parseUrl(const string& url, char* ns, string& title) const
{
return _parseUrl(url, ns, title);
}
Entry Reader::getEntryFromPath(const std::string& path) const
{
char ns = 0;
std::string short_url;
if (!this->zimFileHandler) {
throw NoEntry();
}
_parseUrl(path, &ns, short_url);
if (short_url.empty() && ns == 0) {
return getMainPage();
}
auto article = zimFileHandler->getArticle(ns, short_url);
if (!article.good()) {
throw NoEntry();
}
return article;
}
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
{
return getEntryFromPath(urlDecode(path, true));
}
Entry Reader::getEntryFromTitle(const std::string& title) const
{
if (!this->zimFileHandler) {
throw NoEntry();
}
auto article = this->zimFileHandler->getArticleByTitle('A', title);
if (!article.good()) {
throw NoEntry();
}
return article;
}
/* Return article by url */
bool Reader::getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const
{
if (this->zimFileHandler == NULL) {
return false;
}
/* Parse the url */
char ns = 0;
string urlStr;
_parseUrl(url, &ns, urlStr);
/* Main page */
if (urlStr.empty() && ns == 0) {
_parseUrl(this->getMainPage().getPath(), &ns, urlStr);
}
/* Extract the content from the zim file */
article = zimFileHandler->getArticle(ns, urlStr);
return article.good();
}
/* Return the mimeType without the content */
bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
{
try {
auto entry = getEntryFromPath(url);
mimeType = entry.getMimetype();
return true;
} catch (NoEntry& e) {
mimeType = "";
return false;
}
}
bool get_content_by_decoded_url(const Reader& reader,
const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl)
{
content = "";
contentType = "";
contentLength = 0;
try {
auto entry = reader.getEntryFromPath(url);
entry = entry.getFinalEntry();
baseUrl = entry.getPath();
contentType = entry.getMimetype();
content = entry.getContent();
contentLength = entry.getSize();
title = entry.getTitle();
/* Try to set a stub HTML header/footer if necesssary */
if (contentType.find("text/html") != string::npos
&& content.find("<body") == std::string::npos
&& content.find("<BODY") == std::string::npos) {
content = "<html><head><title>" + title +
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
"charset=utf-8\" /></head><body>" +
content + "</body></html>";
}
return true;
} catch (NoEntry& e) {
return false;
}
}
/* Get a content from a zim file */
bool Reader::getContentByUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
baseUrl);
}
bool Reader::getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
url,
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
return get_content_by_decoded_url(*this,
url,
content,
title,
contentLength,
contentType,
baseUrl);
}
/* Check if an article exists */
bool Reader::urlExists(const string& url) const
{
return pathExists(url);
}
bool Reader::pathExists(const string& path) const
{
if (!zimFileHandler)
{
return false;
}
char ns = 0;
string titleStr;
_parseUrl(path, &ns, titleStr);
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
}
/* Does the ZIM file has a fulltext index */
bool Reader::hasFulltextIndex() const
{
if (!zimFileHandler || zimFileHandler->is_multiPart() )
{
return false;
}
return ( pathExists("Z//fulltextIndex/xapian")
|| pathExists("X/fulltext/xapian"));
}
/* Search titles by prefix */
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset)
{
/* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
if (reset) {
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
} else {
if (this->suggestions.size() > suggestionsCount) {
return false;
}
}
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
bool retVal = false;
/* Return if no prefix */
if (prefix.size() == 0) {
return false;
}
for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end()
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
&& results.size() < suggestionsCount;
++articleItr) {
/* Extract the interesting part of article title & url */
std::string normalizedArticleTitle
= kiwix::normalize(articleItr->getTitle());
std::string articleFinalUrl = "/A/" + articleItr->getUrl();
if (articleItr->isRedirect()) {
zim::Article article = *articleItr;
unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++ < 42) {
article = article.getRedirectArticle();
}
articleFinalUrl = "/A/" + article.getUrl();
}
/* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other
title) */
bool insert = true;
std::vector<std::vector<std::string>>::iterator suggestionItr;
for (suggestionItr = results.begin();
suggestionItr != results.end();
suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
insert = false;
break;
} else if (result < 0) {
break;
}
}
/* Insert if possible */
if (insert) {
std::vector<std::string> suggestion;
suggestion.push_back(articleItr->getTitle());
suggestion.push_back(articleFinalUrl);
suggestion.push_back(normalizedArticleTitle);
results.insert(suggestionItr, suggestion);
}
/* Suggestions where found */
retVal = true;
}
return retVal;
}
std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount)
{
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false;
/* Try to search in the title using fulltext search database */
const auto suggestionSearch
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
if (suggestionSearch->get_matches_estimated()) {
for (auto current = suggestionSearch->begin();
current != suggestionSearch->end();
current++) {
if (!current->good()) {
continue;
}
std::vector<std::string> suggestion;
suggestion.push_back(current->getTitle());
suggestion.push_back("/A/" + current->getUrl());
suggestion.push_back(kiwix::normalize(current->getTitle()));
results.push_back(suggestion);
}
retVal = true;
} else {
for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end();
variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|| retVal;
}
}
return retVal;
}
/* Get next suggestion */
bool Reader::getNextSuggestion(string& title)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset))[0];
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
bool Reader::getNextSuggestion(string& title, string& url)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset))[0];
url = (*(this->suggestionsOffset))[1];
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
/* Check if the file has as checksum */
bool Reader::canCheckIntegrity() const
{
return this->zimFileHandler->getChecksum() != "";
}
/* Return true if corrupted, false otherwise */
bool Reader::isCorrupted() const
{
try {
if (this->zimFileHandler->verify() == true) {
return false;
}
} catch (exception& e) {
cerr << e.what() << endl;
return true;
}
return true;
}
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
zim::File* file = this->getZimFileHandler();
zim::size_type size = 0;
if (file != NULL) {
size = file->getFilesize();
}
return (size / 1024);
}
}

View File

@@ -21,35 +21,26 @@
#include <cmath>
#include "search_renderer.h"
#include "searcher.h"
#include "reader.h"
#include "library.h"
#include "name_mapper.h"
#include "tools/archiveTools.h"
#include <zim/search.h>
#include <mustache.hpp>
#include "libkiwix-resources.h"
#include "tools/stringTools.h"
#include "kiwixlib-resources.h"
namespace kiwix
{
/* Constructor */
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount)
: SearchRenderer(srs, mapper, nullptr, start, estimatedResultCount)
{}
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, Library* library,
unsigned int start, unsigned int estimatedResultCount)
: m_srs(srs),
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: mp_searcher(searcher),
mp_nameMapper(mapper),
mp_library(library),
protocolPrefix("zim://"),
searchProtocolPrefix("search://"),
estimatedResultCount(estimatedResultCount),
resultStart(start)
searchProtocolPrefix("search://?")
{}
/* Destructor */
@@ -57,12 +48,12 @@ SearchRenderer::~SearchRenderer() = default;
void SearchRenderer::setSearchPattern(const std::string& pattern)
{
searchPattern = pattern;
this->searchPattern = pattern;
}
void SearchRenderer::setSearchBookQuery(const std::string& bookQuery)
void SearchRenderer::setSearchContent(const std::string& name)
{
searchBookQuery = bookQuery;
this->searchContent = name;
}
void SearchRenderer::setProtocolPrefix(const std::string& prefix)
@@ -75,162 +66,86 @@ void SearchRenderer::setSearchProtocolPrefix(const std::string& prefix)
this->searchProtocolPrefix = prefix;
}
std::string extractValueFromQuery(const std::string& query, const std::string& key) {
const std::string p = key + "=";
const size_t i = query.find(p);
if (i == std::string::npos) {
return "";
}
std::string r = query.substr(i + p.size());
return r.substr(0, r.find("&"));
}
kainjow::mustache::data buildQueryData
(
const std::string& searchProtocolPrefix,
const std::string& pattern,
const std::string& bookQuery
) {
kainjow::mustache::data query;
query.set("pattern", kiwix::encodeDiples(pattern));
std::ostringstream ss;
ss << searchProtocolPrefix << "?pattern=" << urlEncode(pattern);
ss << "&" << bookQuery;
query.set("unpaginatedQuery", ss.str());
auto lang = extractValueFromQuery(bookQuery, "books.filter.lang");
if(!lang.empty()) {
query.set("lang", lang);
}
return query;
}
kainjow::mustache::data buildPagination(
unsigned int pageLength,
unsigned int resultsCount,
unsigned int resultsStart
)
std::string SearchRenderer::getHtml()
{
assert(pageLength!=0);
kainjow::mustache::data pagination;
kainjow::mustache::data results{kainjow::mustache::data::type::list};
mp_searcher->restart_search();
Result* p_result = NULL;
while ((p_result = mp_searcher->getNextResult())) {
kainjow::mustache::data result;
result.set("title", p_result->get_title());
result.set("url", p_result->get_url());
result.set("snippet", p_result->get_snippet());
auto readerIndex = p_result->get_readerIndex();
auto reader = mp_searcher->get_reader(readerIndex);
result.set("resultContentId", mp_nameMapper->getNameForId(reader->getId()));
if (p_result->get_wordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount()));
}
results.push_back(result);
delete p_result;
}
// pages
kainjow::mustache::data pages{kainjow::mustache::data::type::list};
if (resultsCount == 0) {
// Easy case
pagination.set("itemsPerPage", to_string(pageLength));
pagination.set("hasPages", false);
pagination.set("pages", pages);
return pagination;
}
// First we want to display pages starting at a multiple of `pageLength`
// so, let's calculate the start index of the current page.
auto currentPage = resultsStart/pageLength;
auto lastPage = ((resultsCount-1)/pageLength);
auto lastPageStart = lastPage*pageLength;
auto nbPages = lastPage + 1;
auto firstPageGenerated = currentPage > 4 ? currentPage-4 : 0;
auto lastPageGenerated = std::min(currentPage+4, lastPage);
if (nbPages != 1) {
if (firstPageGenerated!=0) {
kainjow::mustache::data page;
page.set("label", "");
page.set("start", to_string(0));
page.set("current", false);
pages.push_back(page);
auto resultStart = mp_searcher->getResultStart();
auto resultEnd = 0U;
auto estimatedResultCount = mp_searcher->getEstimatedResultCount();
auto currentPage = 0U;
auto pageStart = 0U;
auto pageEnd = 0U;
auto lastPageStart = 0U;
if (pageLength) {
currentPage = resultStart/pageLength;
pageStart = currentPage > 4 ? currentPage-4 : 0;
pageEnd = currentPage + 5;
if (pageEnd > estimatedResultCount / pageLength) {
pageEnd = (estimatedResultCount + pageLength - 1) / pageLength;
}
for (auto i=firstPageGenerated; i<=lastPageGenerated; i++) {
kainjow::mustache::data page;
page.set("label", to_string(i+1));
page.set("start", to_string(i*pageLength));
page.set("current", bool(i == currentPage));
pages.push_back(page);
}
if (lastPageGenerated!=lastPage) {
kainjow::mustache::data page;
page.set("label", "");
page.set("start", to_string(lastPageStart));
page.set("current", false);
pages.push_back(page);
if (estimatedResultCount > pageLength) {
lastPageStart = ((estimatedResultCount-1)/pageLength)*pageLength;
}
}
pagination.set("itemsPerPage", to_string(pageLength));
pagination.set("hasPages", firstPageGenerated < lastPageGenerated);
pagination.set("pages", pages);
return pagination;
}
resultEnd = resultStart+pageLength; //setting result end
std::string SearchRenderer::renderTemplate(const std::string& tmpl_str)
{
const std::string absPathPrefix = protocolPrefix;
// Build the results list
kainjow::mustache::data items{kainjow::mustache::data::type::list};
for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
kainjow::mustache::data result;
const std::string zim_id(it.getZimId());
const auto path = mp_nameMapper->getNameForId(zim_id) + "/" + it.getPath();
result.set("title", it.getTitle());
result.set("absolutePath", absPathPrefix + urlEncode(path));
result.set("snippet", it.getSnippet());
if (mp_library) {
result.set("bookTitle", mp_library->getBookById(zim_id).getTitle());
}
if (it.getWordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(it.getWordCount()));
}
for (unsigned int i = pageStart; i < pageEnd; i++) {
kainjow::mustache::data page;
page.set("label", to_string(i + 1));
page.set("start", to_string(i * pageLength));
items.push_back(result);
if (i == currentPage) {
page.set("selected", true);
}
pages.push_back(page);
}
kainjow::mustache::data results;
results.set("items", items);
results.set("count", kiwix::beautifyInteger(estimatedResultCount));
results.set("hasResults", estimatedResultCount != 0);
results.set("start", kiwix::beautifyInteger(resultStart));
results.set("end", kiwix::beautifyInteger(std::min(resultStart+pageLength-1, estimatedResultCount)));
// pagination
auto pagination = buildPagination(
pageLength,
estimatedResultCount,
resultStart
);
kainjow::mustache::data query = buildQueryData(
searchProtocolPrefix,
searchPattern,
searchBookQuery
);
std::string template_str = RESOURCE::templates::search_result_html;
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data allData;
allData.set("searchProtocolPrefix", searchProtocolPrefix);
allData.set("results", results);
allData.set("pagination", pagination);
allData.set("query", query);
kainjow::mustache::mustache tmpl(tmpl_str);
allData.set("pages", pages);
allData.set("hasResults", estimatedResultCount != 0);
allData.set("hasPages", pageStart != pageEnd);
allData.set("count", kiwix::beautifyInteger(estimatedResultCount));
allData.set("searchPattern", kiwix::encodeDiples(this->searchPattern));
allData.set("searchPatternEncoded", urlEncode(this->searchPattern));
allData.set("resultStart", to_string(resultStart + 1));
allData.set("resultEnd", to_string(min(resultEnd, estimatedResultCount)));
allData.set("pageLength", to_string(pageLength));
allData.set("resultLastPageStart", to_string(lastPageStart));
allData.set("protocolPrefix", this->protocolPrefix);
allData.set("searchProtocolPrefix", this->searchProtocolPrefix);
allData.set("contentId", this->searchContent);
std::stringstream ss;
tmpl.render(allData, [&ss](const std::string& str) { ss << str; });
if (!tmpl.is_valid()) {
throw std::runtime_error("Error while rendering search results: " + tmpl.error_message());
}
return ss.str();
}
std::string SearchRenderer::getHtml()
{
return renderTemplate(RESOURCE::templates::search_result_html);
}
std::string SearchRenderer::getXml()
{
return renderTemplate(RESOURCE::templates::search_result_xml);
}
}
}

279
src/searcher.cpp Normal file
View File

@@ -0,0 +1,279 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <cmath>
#include "searcher.h"
#include "reader.h"
#include <zim/search.h>
#include <mustache.hpp>
#include "kiwixlib-resources.h"
#define MAX_SEARCH_LEN 140
namespace kiwix
{
class _Result : public Result
{
public:
_Result(zim::Search::iterator& iterator);
virtual ~_Result(){};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual int get_readerIndex();
private:
zim::Search::iterator iterator;
};
struct SearcherInternal {
const zim::Search* _search;
zim::Search::iterator current_iterator;
SearcherInternal() : _search(NULL) {}
~SearcherInternal()
{
if (_search != NULL) {
delete _search;
}
}
};
/* Constructor */
Searcher::Searcher()
: internal(new SearcherInternal()),
searchPattern(""),
estimatedResultCount(0),
resultStart(0),
resultEnd(0)
{
loadICUExternalTables();
}
/* Destructor */
Searcher::~Searcher()
{
delete internal;
}
bool Searcher::add_reader(Reader* reader)
{
if (!reader->hasFulltextIndex()) {
return false;
}
this->readers.push_back(reader);
return true;
}
Reader* Searcher::get_reader(int readerIndex)
{
return readers.at(readerIndex);
}
/* Search strings in the database */
void Searcher::search(const std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing query `" << search << "'" << endl;
}
this->searchPattern = search;
this->resultStart = resultStart;
this->resultEnd = resultEnd;
/* Try to find results */
if (resultStart != resultEnd) {
/* Perform the search */
string unaccentedSearch = removeAccents(search);
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
if ( (*current)->hasFulltextIndex() ) {
zims.push_back((*current)->getZimFileHandler());
}
}
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
return;
}
void Searcher::geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
/* Perform the search */
std::ostringstream oss;
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
this->searchPattern = oss.str();
this->resultStart = resultStart;
this->resultEnd = resultEnd;
/* Try to find results */
if (resultStart == resultEnd) {
return;
}
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query("");
search->set_georange(latitude, longitude, distance);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
void Searcher::restart_search()
{
if (internal->_search) {
internal->current_iterator = internal->_search->begin();
}
}
Result* Searcher::getNextResult()
{
if (internal->_search &&
internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(internal->current_iterator);
internal->current_iterator++;
return result;
}
return NULL;
}
/* Reset the results */
void Searcher::reset()
{
this->estimatedResultCount = 0;
this->searchPattern = "";
return;
}
void Searcher::suggestions(std::string& searchPattern, const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing suggestion query `" << searchPattern << "`" << endl;
}
this->searchPattern = searchPattern;
this->resultStart = 0;
this->resultEnd = 10;
string unaccentedSearch = removeAccents(searchPattern);
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
search->set_suggestion_mode(true);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
/* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount()
{
return this->estimatedResultCount;
}
_Result::_Result(zim::Search::iterator& iterator)
: iterator(iterator)
{
}
std::string _Result::get_url()
{
return iterator.get_url();
}
std::string _Result::get_title()
{
return iterator.get_title();
}
int _Result::get_score()
{
return iterator.get_score();
}
std::string _Result::get_snippet()
{
return iterator.get_snippet();
}
std::string _Result::get_content()
{
if (iterator->good()) {
return iterator->getData();
}
return "";
}
int _Result::get_size()
{
return iterator.get_size();
}
int _Result::get_wordCount()
{
return iterator.get_wordCount();
}
int _Result::get_readerIndex()
{
return iterator.get_fileIndex();
}
}

View File

@@ -19,16 +19,140 @@
#include "server.h"
#ifdef _WIN32
# if !defined(__MINGW32__) && (_MSC_VER < 1600)
# include "stdint4win.h"
# endif
# include <winsock2.h>
# include <ws2tcpip.h>
# ifdef __GNUC__
// inet_pton is not declared in mingw, even if the function exists.
extern "C" {
WINSOCK_API_LINKAGE INT WSAAPI inet_pton( INT Family, PCSTR pszAddrString, PVOID pAddrBuf);
}
# endif
typedef UINT64 uint64_t;
typedef UINT16 uint16_t;
#endif
extern "C" {
#include "microhttpd_wrapper.h"
}
#include "tools/otherTools.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "library.h"
#include "name_mapper.h"
#include "entry.h"
#include "searcher.h"
#include "search_renderer.h"
#include "opds_dumper.h"
#include <zim/uuid.h>
#include <mustache.hpp>
#include <pthread.h>
#include <atomic>
#include <string>
#include <vector>
#include <chrono>
#include "kiwixlib-resources.h"
#include <zim/item.h>
#include "server/internalServer.h"
#ifndef _WIN32
# include <arpa/inet.h>
#endif
#include "server/request_context.h"
#include "server/response.h"
#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
namespace kiwix {
static IdNameMapper defaultNameMapper;
typedef kainjow::mustache::data MustacheData;
static MHD_Result staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
class InternalServer {
public:
InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks);
virtual ~InternalServer() = default;
MHD_Result handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
bool start();
void stop();
private: // functions
Response handle_request(const RequestContext& request);
Response build_500(const std::string& msg);
Response build_404(const RequestContext& request, const std::string& zimName);
Response build_304(const RequestContext& request, const ETag& etag) const;
Response build_redirect(const std::string& bookName, const kiwix::Entry& entry) const;
Response build_homepage(const RequestContext& request);
Response handle_skin(const RequestContext& request);
Response handle_catalog(const RequestContext& request);
Response handle_meta(const RequestContext& request);
Response handle_search(const RequestContext& request);
Response handle_suggest(const RequestContext& request);
Response handle_random(const RequestContext& request);
Response handle_captured_external(const RequestContext& request);
Response handle_content(const RequestContext& request);
MustacheData get_default_data() const;
MustacheData homepage_data() const;
Response get_default_response() const;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const;
private: // data
std::string m_addr;
int m_port;
std::string m_root;
int m_nbThreads;
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
struct MHD_Daemon* mp_daemon;
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_server_id;
};
Server::Server(Library* library, NameMapper* nameMapper) :
mp_library(library),
mp_nameMapper(nameMapper),
@@ -46,21 +170,16 @@ bool Server::start() {
m_port,
m_root,
m_nbThreads,
m_multizimSearchLimit,
m_verbose,
m_withTaskbar,
m_withLibraryButton,
m_blockExternalLinks,
m_indexTemplateString,
m_ipConnectionLimit));
m_blockExternalLinks));
return mp_server->start();
}
void Server::stop() {
if (mp_server) {
mp_server->stop();
mp_server.reset(nullptr);
}
mp_server->stop();
mp_server.reset(nullptr);
}
void Server::setRoot(const std::string& root)
@@ -74,14 +193,780 @@ void Server::setRoot(const std::string& root)
}
}
int Server::getPort()
{
return mp_server->getPort();
InternalServer::InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks) :
m_addr(addr),
m_port(port),
m_root(root),
m_nbThreads(nbThreads),
m_verbose(verbose),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
{}
bool InternalServer::start() {
#ifdef _WIN32
int flags = MHD_USE_SELECT_INTERNALLY;
#else
int flags = MHD_USE_POLL_INTERNALLY;
#endif
if (m_verbose.load())
flags |= MHD_USE_DEBUG;
struct sockaddr_in sockAddr;
memset(&sockAddr, 0, sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(m_port);
if (m_addr.empty()) {
if (0 != INADDR_ANY)
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
} else {
if (inet_pton(AF_INET, m_addr.c_str(), &(sockAddr.sin_addr.s_addr)) == 0) {
std::cerr << "Ip address " << m_addr << " is not a valid ip address" << std::endl;
return false;
}
}
mp_daemon = MHD_start_daemon(flags,
m_port,
NULL,
NULL,
&staticHandlerCallback,
this,
MHD_OPTION_SOCK_ADDR, &sockAddr,
MHD_OPTION_THREAD_POOL_SIZE, m_nbThreads,
MHD_OPTION_END);
if (mp_daemon == nullptr) {
std::cerr << "Unable to instantiate the HTTP daemon. The port " << m_port
<< " is maybe already occupied or need more permissions to be open. "
"Please try as root or with a port number higher or equal to 1024."
<< std::endl;
return false;
}
auto server_start_time = std::chrono::system_clock::now().time_since_epoch();
m_server_id = kiwix::to_string(server_start_time.count());
return true;
}
std::string Server::getAddress()
void InternalServer::stop()
{
return mp_server->getAddress();
MHD_stop_daemon(mp_daemon);
}
static MHD_Result staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
InternalServer* _this = static_cast<InternalServer*>(cls);
return _this->handlerCallback(connection,
url,
method,
version,
upload_data,
upload_data_size,
cont_cls);
}
MHD_Result InternalServer::handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
auto start_time = std::chrono::steady_clock::now();
if (m_verbose.load() ) {
printf("======================\n");
printf("Requesting : \n");
printf("full_url : %s\n", url);
}
RequestContext request(connection, m_root, url, method, version);
if (m_verbose.load() ) {
request.print_debug_info();
}
/* Unexpected method */
if (request.get_method() != RequestMethod::GET
&& request.get_method() != RequestMethod::POST
&& request.get_method() != RequestMethod::HEAD) {
printf("Reject request because of unhandled request method.\n");
printf("----------------------\n");
return MHD_NO;
}
auto response = handle_request(request);
if (response.getReturnCode() == MHD_HTTP_INTERNAL_SERVER_ERROR) {
printf("========== INTERNAL ERROR !! ============\n");
if (!m_verbose.load()) {
printf("Requesting : \n");
printf("full_url : %s\n", url);
request.print_debug_info();
}
}
if (response.getReturnCode() == MHD_HTTP_OK && !etag_not_needed(request))
response.set_server_id(m_server_id);
auto ret = response.send(request, connection);
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
if (m_verbose.load()) {
printf("Request time : %fs\n", time_span.count());
printf("----------------------\n");
}
return ret;
}
Response InternalServer::build_304(const RequestContext& request, const ETag& etag) const
{
auto response = get_default_response();
response.set_code(MHD_HTTP_NOT_MODIFIED);
response.set_etag(etag);
response.set_content("");
return response;
}
Response InternalServer::handle_request(const RequestContext& request)
{
try {
if (! request.is_valid_url())
return build_404(request, "");
const ETag etag = get_matching_if_none_match_etag(request);
if ( etag )
return build_304(request, etag);
if (kiwix::startsWith(request.get_url(), "/skin/"))
return handle_skin(request);
if (startsWith(request.get_url(), "/catalog"))
return handle_catalog(request);
if (request.get_url() == "/meta")
return handle_meta(request);
if (request.get_url() == "/search")
return handle_search(request);
if (request.get_url() == "/suggest")
return handle_suggest(request);
if (request.get_url() == "/random")
return handle_random(request);
if (request.get_url() == "/catch/external")
return handle_captured_external(request);
return handle_content(request);
} catch (std::exception& e) {
fprintf(stderr, "===== Unhandled error : %s\n", e.what());
return build_500(e.what());
} catch (...) {
fprintf(stderr, "===== Unhandled unknown error\n");
return build_500("Unknown error");
}
}
MustacheData InternalServer::get_default_data() const
{
MustacheData data;
data.set("root", m_root);
return data;
}
Response InternalServer::get_default_response() const
{
return Response(m_root, m_verbose.load(), m_withTaskbar, m_withLibraryButton, m_blockExternalLinks);
}
Response InternalServer::build_404(const RequestContext& request,
const std::string& bookName)
{
MustacheData results;
results.set("url", request.get_full_url());
auto response = get_default_response();
response.set_template(RESOURCE::templates::_404_html, results);
response.set_mimeType("text/html");
response.set_code(MHD_HTTP_NOT_FOUND);
response.set_compress(true);
response.set_taskbar(bookName, "");
return response;
}
Response InternalServer::build_500(const std::string& msg)
{
MustacheData data;
data.set("error", msg);
Response response(m_root, true, false, false, false);
response.set_template(RESOURCE::templates::_500_html, data);
response.set_mimeType("text/html");
response.set_code(MHD_HTTP_INTERNAL_SERVER_ERROR);
return response;
}
MustacheData InternalServer::homepage_data() const
{
auto data = get_default_data();
MustacheData books{MustacheData::type::list};
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto& currentBook = mp_library->getBookById(bookId);
MustacheData book;
book.set("name", mp_nameMapper->getNameForId(bookId));
book.set("title", currentBook.getTitle());
book.set("description", currentBook.getDescription());
book.set("articleCount", beautifyInteger(currentBook.getArticleCount()));
book.set("mediaCount", beautifyInteger(currentBook.getMediaCount()));
books.push_back(book);
}
data.set("books", books);
return data;
}
bool InternalServer::etag_not_needed(const RequestContext& request) const
{
const std::string url = request.get_url();
return kiwix::startsWith(url, "/catalog")
|| url == "/search"
|| url == "/suggest"
|| url == "/random"
|| url == "/catch/external";
}
ETag
InternalServer::get_matching_if_none_match_etag(const RequestContext& r) const
{
try {
const std::string etag_list = r.get_header(MHD_HTTP_HEADER_IF_NONE_MATCH);
return ETag::match(etag_list, m_server_id);
} catch (const std::out_of_range&) {
return ETag();
}
}
Response InternalServer::build_homepage(const RequestContext& request)
{
auto response = get_default_response();
response.set_template(RESOURCE::templates::index_html, homepage_data());
response.set_mimeType("text/html; charset=utf-8");
response.set_compress(true);
return response;
}
Response InternalServer::handle_meta(const RequestContext& request)
{
std::string bookName;
std::string bookId;
std::string meta_name;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
meta_name = request.get_argument("name");
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
return build_404(request, bookName);
}
if (reader == nullptr) {
return build_404(request, bookName);
}
std::string content;
std::string mimeType = "text";
if (meta_name == "title") {
content = reader->getTitle();
} else if (meta_name == "description") {
content = reader->getDescription();
} else if (meta_name == "language") {
content = reader->getLanguage();
} else if (meta_name == "name") {
content = reader->getName();
} else if (meta_name == "tags") {
content = reader->getTags();
} else if (meta_name == "date") {
content = reader->getDate();
} else if (meta_name == "creator") {
content = reader->getCreator();
} else if (meta_name == "publisher") {
content = reader->getPublisher();
} else if (meta_name == "favicon") {
reader->getFavicon(content, mimeType);
} else {
return build_404(request, bookName);
}
auto response = get_default_response();
response.set_content(content);
response.set_mimeType(mimeType);
response.set_compress(false);
response.set_cacheable();
return response;
}
Response InternalServer::handle_suggest(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_suggest\n");
}
std::string content;
std::string mimeType;
unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0;
std::string bookName;
std::string bookId;
std::string term;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
term = request.get_argument("term");
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {
return build_404(request, bookName);
}
if (m_verbose.load()) {
printf("Searching suggestions for: \"%s\"\n", term.c_str());
}
MustacheData results{MustacheData::type::list};
bool first = true;
if (reader != nullptr) {
/* Get the suggestions */
SuggestionsList_t suggestions;
reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions);
for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion[0]);
result.set("value", suggestion[0]);
result.set("first", first);
first = false;
results.push_back(result);
suggestionCount++;
}
}
/* Propose the fulltext search if possible */
if (reader->hasFulltextIndex()) {
MustacheData result;
result.set("label", "containing '" + term + "'...");
result.set("value", term + " ");
result.set("first", first);
results.push_back(result);
}
auto data = get_default_data();
data.set("suggestions", results);
auto response = get_default_response();
response.set_template(RESOURCE::templates::suggestion_json, data);
response.set_mimeType("application/json; charset=utf-8");
response.set_compress(true);
return response;
}
Response InternalServer::handle_skin(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_skin\n");
}
auto response = get_default_response();
auto resourceName = request.get_url().substr(1);
try {
response.set_content(getResource(resourceName));
} catch (const ResourceNotFound& e) {
return build_404(request, "");
}
response.set_mimeType(getMimeTypeForFile(resourceName));
response.set_compress(true);
response.set_cacheable();
return response;
}
Response InternalServer::handle_search(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_search\n");
}
std::string bookName;
std::string bookId;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
} catch (const std::out_of_range&) {}
std::string patternString;
try {
patternString = request.get_argument("pattern");
} catch (const std::out_of_range&) {}
/* Retrive geo search */
bool has_geo_query = false;
float latitude = 0;
float longitude = 0;
float distance = 0;
try {
latitude = request.get_argument<float>("latitude");
longitude = request.get_argument<float>("longitude");
distance = request.get_argument<float>("distance");
has_geo_query = true;
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}
std::shared_ptr<Reader> reader(nullptr);
try {
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {}
/* Try first to load directly the article */
if (reader != nullptr && !patternString.empty()) {
std::string patternCorrespondingUrl;
auto variants = reader->getTitleVariants(patternString);
auto variantsItr = variants.begin();
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
try {
auto entry = reader->getEntryFromTitle(*variantsItr);
entry = entry.getFinalEntry();
patternCorrespondingUrl = entry.getPath();
break;
} catch(kiwix::NoEntry& e) {
variantsItr++;
}
}
/* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) {
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" + patternCorrespondingUrl);
return response;
}
}
/* Make the search */
auto response = get_default_response();
response.set_mimeType("text/html; charset=utf-8");
response.set_taskbar(bookName, reader ? reader->getTitle() : "");
response.set_compress(true);
if ( (!reader && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data();
data.set("pattern", encodeDiples(patternString));
response.set_template(RESOURCE::templates::no_search_result_html, data);
response.set_code(MHD_HTTP_NOT_FOUND);
return response;
}
Searcher searcher;
if (reader) {
searcher.add_reader(reader.get());
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentReader = mp_library->getReaderById(bookId);
if (currentReader) {
searcher.add_reader(currentReader.get());
}
}
}
auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}
auto pageLength=25;
try{
pageLength=request.get_argument<unsigned int>("pageLength");
}catch(const std::exception&){}
if (pageLength > MAX_SEARCH_LEN) {
pageLength = MAX_SEARCH_LEN;
}
if(pageLength==0)
{
pageLength=25;
}
auto end=start+pageLength;
/* Get the results */
try {
if (patternString.empty()) {
searcher.geo_search(latitude, longitude, distance,
start, end , m_verbose.load());
} else {
searcher.search(patternString,
start, end , m_verbose.load());
}
SearchRenderer renderer(&searcher, mp_nameMapper);
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?");
renderer.setPageLength(pageLength);
response.set_content(renderer.getHtml());
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}
//changing status code if no result obtained
if(searcher.getEstimatedResultCount() == 0)
{
response.set_code(MHD_HTTP_NO_CONTENT);
}
return response;
}
Response InternalServer::handle_random(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_random\n");
}
std::string bookName;
std::string bookId;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {
return build_404(request, bookName);
}
if (reader == nullptr) {
return build_404(request, bookName);
}
try {
auto entry = reader->getRandomPage();
return build_redirect(bookName, entry.getFinalEntry());
} catch(kiwix::NoEntry& e) {
return build_404(request, bookName);
}
}
Response InternalServer::handle_captured_external(const RequestContext& request)
{
std::string source = "";
try {
source = kiwix::urlDecode(request.get_argument("source"));
} catch (const std::out_of_range& e) {}
if (source.empty())
return build_404(request, "");
auto data = get_default_data();
data.set("source", source);
auto response = get_default_response();
response.set_template(RESOURCE::templates::captured_external_html, data);
response.set_mimeType("text/html; charset=utf-8");
response.set_compress(true);
return response;
}
Response InternalServer::handle_catalog(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog");
}
std::string host;
std::string url;
try {
host = request.get_header("Host");
url = request.get_url_part(1);
} catch (const std::out_of_range&) {
return build_404(request, "");
}
if (url != "searchdescription.xml" && url != "root.xml" && url != "search") {
return build_404(request, "");
}
auto response = get_default_response();
response.set_compress(true);
if (url == "searchdescription.xml") {
response.set_template(RESOURCE::opensearchdescription_xml, get_default_data());
response.set_mimeType("application/opensearchdescription+xml");
return response;
}
zim::Uuid uuid;
kiwix::OPDSDumper opdsDumper;
opdsDumper.setRootLocation(m_root);
opdsDumper.setSearchDescriptionUrl("catalog/searchdescription.xml");
opdsDumper.setLibrary(mp_library);
response.set_mimeType("application/atom+xml; profile=opds-catalog; kind=acquisition; charset=utf-8");
std::vector<std::string> bookIdsToDump;
if (url == "root.xml") {
opdsDumper.setTitle("All zims");
uuid = zim::Uuid::generate(host);
bookIdsToDump = mp_library->filter(kiwix::Filter().valid(true).local(true).remote(true));
} else if (url == "search") {
auto filter = kiwix::Filter().valid(true).local(true).remote(true);
string query("<Empty query>");
size_t count(10);
size_t startIndex(0);
try {
query = request.get_argument("q");
filter.query(query);
} catch (const std::out_of_range&) {}
try {
filter.maxSize(extractFromString<unsigned long>(request.get_argument("maxsize")));
} catch (...) {}
try {
filter.name(request.get_argument("name"));
} catch (const std::out_of_range&) {}
try {
filter.lang(request.get_argument("lang"));
} catch (const std::out_of_range&) {}
try {
count = extractFromString<unsigned long>(request.get_argument("count"));
} catch (...) {}
try {
startIndex = extractFromString<unsigned long>(request.get_argument("start"));
} catch (...) {}
try {
filter.acceptTags(kiwix::split(request.get_argument("tag"), ";"));
} catch (...) {}
try {
filter.rejectTags(kiwix::split(request.get_argument("notag"), ";"));
} catch (...) {}
opdsDumper.setTitle("Search result for " + query);
uuid = zim::Uuid::generate();
bookIdsToDump = mp_library->filter(filter);
auto totalResults = bookIdsToDump.size();
bookIdsToDump.erase(bookIdsToDump.begin(), bookIdsToDump.begin()+startIndex);
if (count>0 && bookIdsToDump.size() > count) {
bookIdsToDump.resize(count);
}
opdsDumper.setOpenSearchInfo(totalResults, startIndex, bookIdsToDump.size());
}
opdsDumper.setId(kiwix::to_string(uuid));
response.set_content(opdsDumper.dumpOPDSFeed(bookIdsToDump));
return response;
}
namespace
{
std::string get_book_name(const RequestContext& request)
{
try {
return request.get_url_part(0);
} catch (const std::out_of_range& e) {
return std::string();
}
}
} // unnamed namespace
std::shared_ptr<Reader>
InternalServer::get_reader(const std::string& bookName) const
{
std::shared_ptr<Reader> reader;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
}
return reader;
}
Response
InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const
{
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" +
kiwix::urlEncode(entry.getPath()));
return response;
}
Response InternalServer::handle_content(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_content\n");
}
const std::string bookName = get_book_name(request);
if (bookName.empty())
return build_homepage(request);
const std::shared_ptr<Reader> reader = get_reader(bookName);
if (reader == nullptr) {
return build_404(request, bookName);
}
auto urlStr = request.get_url().substr(bookName.size()+1);
if (urlStr[0] == '/') {
urlStr = urlStr.substr(1);
}
kiwix::Entry entry;
try {
entry = reader->getEntryFromPath(urlStr);
if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page.
return build_redirect(bookName, entry.getFinalEntry());
}
} catch(kiwix::NoEntry& e) {
if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str());
return build_404(request, bookName);
}
auto response = get_default_response();
response.set_entry(entry, request);
response.set_taskbar(bookName, reader->getTitle());
if (m_verbose.load()) {
printf("Found %s\n", entry.getPath().c_str());
printf("mimeType: %s\n", response.get_mimeType().c_str());
}
return response;
}
}

View File

@@ -37,11 +37,11 @@ namespace {
// into the ETag for ETag::Option opt.
// IMPORTANT: The characters in all_options must come in sorted order (so that
// IMPORTANT: isValidOptionsString() works correctly).
const char all_options[] = "Zz";
const char all_options[] = "cz";
static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, "");
bool isValidETagBody(const std::string& s)
bool isValidServerId(const std::string& s)
{
return !s.empty() && s.find_first_of("\"/") == std::string::npos;
}
@@ -83,17 +83,17 @@ bool ETag::get_option(Option opt) const
std::string ETag::get_etag() const
{
if ( m_body.empty() )
if ( m_serverId.empty() )
return std::string();
return "\"" + m_body + "/" + m_options + "\"";
return "\"" + m_serverId + "/" + m_options + "\"";
}
ETag::ETag(const std::string& body, const std::string& options)
ETag::ETag(const std::string& serverId, const std::string& options)
{
if ( isValidETagBody(body) && isValidOptionsString(options) )
if ( isValidServerId(serverId) && isValidOptionsString(options) )
{
m_body = body;
m_serverId = serverId;
m_options = options;
}
}
@@ -115,7 +115,7 @@ ETag ETag::parse(std::string s)
return ETag(s.substr(0, i), s.substr(i+1));
}
ETag ETag::match(const std::string& etags, const std::string& body)
ETag ETag::match(const std::string& etags, const std::string& server_id)
{
std::istringstream ss(etags);
std::string etag_str;
@@ -125,7 +125,7 @@ ETag ETag::match(const std::string& etags, const std::string& body)
etag_str.pop_back();
const ETag etag = parse(etag_str);
if ( etag && etag.m_body == body )
if ( etag && etag.m_serverId == server_id )
return etag;
}

View File

@@ -28,11 +28,10 @@ namespace kiwix {
// The ETag string used by Kiwix server (more precisely, its value inside the
// double quotes) consists of two parts:
//
// 1. Body - A string uniquely identifying the object or state from which
// the resource has been obtained.
// 1. ServerId - The string obtained on server start up
//
// 2. Options - Zero or more characters encoding the type of the ETag and/or
// the values of some of the headers of the response
// 2. Options - Zero or more characters encoding the values of some of the
// headers of the response
//
// The two parts are separated with a slash (/) symbol (which is always present,
// even when the the options part is empty). Neither portion of a Kiwix ETag
@@ -41,7 +40,7 @@ namespace kiwix {
//
// "abcdefghijklmn/"
// "1234567890/z"
// "6f1d19d0-633f-087b-fb55-7ac324ff9baf/Zz"
// "1234567890/cz"
//
// The options part of the Kiwix ETag allows to correctly set the required
// headers when responding to a conditional If-None-Match request with a 304
@@ -52,7 +51,7 @@ class ETag
{
public: // types
enum Option {
ZIM_CONTENT,
CACHEABLE_ENTITY,
COMPRESSED_CONTENT,
OPTION_COUNT
};
@@ -60,10 +59,10 @@ class ETag
public: // functions
ETag() {}
void set_body(const std::string& s) { m_body = s; }
void set_server_id(const std::string& id) { m_serverId = id; }
void set_option(Option opt);
explicit operator bool() const { return !m_body.empty(); }
explicit operator bool() const { return !m_serverId.empty(); }
bool get_option(Option opt) const;
std::string get_etag() const;
@@ -77,7 +76,7 @@ class ETag
static ETag parse(std::string s);
private: // data
std::string m_body;
std::string m_serverId;
std::string m_options;
};

View File

@@ -1,192 +0,0 @@
/*
* Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "i18n.h"
#include "tools/otherTools.h"
#include <algorithm>
#include <map>
namespace kiwix
{
const char* I18nStringTable::get(const std::string& key) const
{
const I18nString* const begin = entries;
const I18nString* const end = begin + entryCount;
const I18nString* found = std::lower_bound(begin, end, key,
[](const I18nString& a, const std::string& k) {
return a.key < k;
});
return (found == end || found->key != key) ? nullptr : found->value;
}
namespace i18n
{
// this data is generated by the i18n resource compiler
extern const I18nStringTable stringTables[];
extern const size_t langCount;
}
namespace
{
class I18nStringDB
{
public: // functions
I18nStringDB() {
for ( size_t i = 0; i < kiwix::i18n::langCount; ++i ) {
const auto& t = kiwix::i18n::stringTables[i];
lang2TableMap[t.lang] = &t;
}
enStrings = lang2TableMap.at("en");
};
std::string get(const std::string& lang, const std::string& key) const {
const char* s = getStringsFor(lang)->get(key);
if ( s == nullptr ) {
s = enStrings->get(key);
if ( s == nullptr ) {
throw std::runtime_error("Invalid message id");
}
}
return s;
}
size_t getStringCount(const std::string& lang) const {
try {
return lang2TableMap.at(lang)->entryCount;
} catch(const std::out_of_range&) {
return 0;
}
}
private: // functions
const I18nStringTable* getStringsFor(const std::string& lang) const {
try {
return lang2TableMap.at(lang);
} catch(const std::out_of_range&) {
return enStrings;
}
}
private: // data
std::map<std::string, const I18nStringTable*> lang2TableMap;
const I18nStringTable* enStrings;
};
const I18nStringDB& getStringDb()
{
static const I18nStringDB stringDb;
return stringDb;
}
} // unnamed namespace
std::string getTranslatedString(const std::string& lang, const std::string& key)
{
return getStringDb().get(lang, key);
}
namespace i18n
{
std::string expandParameterizedString(const std::string& lang,
const std::string& key,
const Parameters& params)
{
const std::string tmpl = getTranslatedString(lang, key);
return render_template(tmpl, params);
}
} // namespace i18n
std::string ParameterizedMessage::getText(const std::string& lang) const
{
return i18n::expandParameterizedString(lang, msgId, params);
}
namespace
{
LangPreference parseSingleLanguagePreference(const std::string& s)
{
const size_t langStart = s.find_first_not_of(" \t\n");
if ( langStart == std::string::npos ) {
return {"", 0};
}
const size_t langEnd = s.find(';', langStart);
if ( langEnd == std::string::npos ) {
return {s.substr(langStart), 1};
}
const std::string lang = s.substr(langStart, langEnd - langStart);
// We don't care about langEnd == langStart which will result in an empty
// language name - it will be dismissed by parseUserLanguagePreferences()
float q = 1.0;
int nCharsScanned;
if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned)
&& langEnd + 1 + nCharsScanned == s.size() ) {
return {lang, q};
}
return {"", 0};
}
} // unnamed namespace
UserLangPreferences parseUserLanguagePreferences(const std::string& s)
{
UserLangPreferences result;
std::istringstream iss(s);
std::string singleLangPrefStr;
while ( std::getline(iss, singleLangPrefStr, ',') )
{
const auto langPref = parseSingleLanguagePreference(singleLangPrefStr);
if ( !langPref.lang.empty() && langPref.preference > 0 ) {
result.push_back(langPref);
}
}
return result;
}
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs)
{
if ( prefs.empty() ) {
return "en";
}
std::string bestLangSoFar("en");
float bestScoreSoFar = 0;
const auto& stringDb = getStringDb();
for ( const auto& entry : prefs ) {
const float score = entry.preference * stringDb.getStringCount(entry.lang);
if ( score > bestScoreSoFar ) {
bestScoreSoFar = score;
bestLangSoFar = entry.lang;
}
}
return bestLangSoFar;
}
} // namespace kiwix

View File

@@ -1,106 +0,0 @@
/*
* Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SERVER_I18N
#define KIWIX_SERVER_I18N
#include <string>
#include <mustache.hpp>
namespace kiwix
{
struct I18nString {
const char* const key;
const char* const value;
};
struct I18nStringTable {
const char* const lang;
const size_t entryCount;
const I18nString* const entries;
const char* get(const std::string& key) const;
};
std::string getTranslatedString(const std::string& lang, const std::string& key);
namespace i18n
{
typedef kainjow::mustache::object Parameters;
std::string expandParameterizedString(const std::string& lang,
const std::string& key,
const Parameters& params);
class GetTranslatedString
{
public:
explicit GetTranslatedString(const std::string& lang) : m_lang(lang) {}
std::string operator()(const std::string& key) const
{
return getTranslatedString(m_lang, key);
}
std::string operator()(const std::string& key, const Parameters& params) const
{
return expandParameterizedString(m_lang, key, params);
}
private:
const std::string m_lang;
};
} // namespace i18n
struct ParameterizedMessage
{
public: // types
typedef kainjow::mustache::object Parameters;
public: // functions
ParameterizedMessage(const std::string& msgId, const Parameters& params)
: msgId(msgId)
, params(params)
{}
std::string getText(const std::string& lang) const;
private: // data
const std::string msgId;
const Parameters params;
};
struct LangPreference
{
const std::string lang;
const float preference;
};
typedef std::vector<LangPreference> UserLangPreferences;
UserLangPreferences parseUserLanguagePreferences(const std::string& s);
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs);
} // namespace kiwix
#endif // KIWIX_SERVER_I18N

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,195 +0,0 @@
/*
* Copyright 2019 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_INTERNALSERVER_H
#define KIWIXLIB_SERVER_INTERNALSERVER_H
extern "C" {
#include "microhttpd_wrapper.h"
}
#include "library.h"
#include "name_mapper.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
#include <atomic>
#include <string>
#include "server/request_context.h"
#include "server/response.h"
#include "tools/concurrent_cache.h"
namespace kiwix {
struct GeoQuery {
GeoQuery()
: GeoQuery(0, 0, -1)
{}
GeoQuery(float latitude, float longitude, float distance)
: latitude(latitude), longitude(longitude), distance(distance)
{}
float latitude;
float longitude;
float distance;
explicit operator bool() const {
return distance >= 0;
}
friend bool operator<(const GeoQuery& l, const GeoQuery& r)
{
return std::tie(l.latitude, l.longitude, l.distance)
< std::tie(r.latitude, r.longitude, r.distance); // keep the same order
}
};
class SearchInfo {
public:
SearchInfo(const std::string& pattern, GeoQuery geoQuery, const Library::BookIdSet& bookIds, const std::string& bookFilterString);
zim::Query getZimQuery(bool verbose) const;
const Library::BookIdSet& getBookIds() const { return bookIds; }
friend bool operator<(const SearchInfo& l, const SearchInfo& r)
{
return std::tie(l.bookIds, l.pattern, l.geoQuery)
< std::tie(r.bookIds, r.pattern, r.geoQuery); // keep the same order
}
public: //data
std::string pattern;
GeoQuery geoQuery;
Library::BookIdSet bookIds;
std::string bookFilterQuery;
};
typedef kainjow::mustache::data MustacheData;
class OPDSDumper;
class InternalServer {
public:
InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
unsigned int multizimSearchLimit,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks,
std::string indexTemplateString,
int ipConnectionLimit);
virtual ~InternalServer();
MHD_Result handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
bool start();
void stop();
std::string getAddress() { return m_addr; }
int getPort() { return m_port; }
private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request);
std::unique_ptr<Response> build_redirect(const std::string& bookName, const zim::Item& item) const;
std::unique_ptr<Response> build_homepage(const RequestContext& request);
std::unique_ptr<Response> handle_viewer_settings(const RequestContext& request);
std::unique_ptr<Response> handle_skin(const RequestContext& request);
std::unique_ptr<Response> handle_catalog(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_root(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request, bool partial);
std::unique_ptr<Response> handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId);
std::unique_ptr<Response> handle_catalog_v2_categories(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_languages(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_illustration(const RequestContext& request);
std::unique_ptr<Response> handle_search(const RequestContext& request);
std::unique_ptr<Response> handle_search_request(const RequestContext& request);
std::unique_ptr<Response> handle_suggest(const RequestContext& request);
std::unique_ptr<Response> handle_random(const RequestContext& request);
std::unique_ptr<Response> handle_catch(const RequestContext& request);
std::unique_ptr<Response> handle_captured_external(const RequestContext& request);
std::unique_ptr<Response> handle_content(const RequestContext& request);
std::unique_ptr<Response> handle_raw(const RequestContext& request);
std::unique_ptr<Response> handle_locally_customized_resource(const RequestContext& request);
std::vector<std::string> search_catalog(const RequestContext& request,
kiwix::OPDSDumper& opdsDumper);
MustacheData get_default_data() const;
std::pair<std::string, Library::BookIdSet> selectBooks(const RequestContext& r) const;
SearchInfo getSearchInfo(const RequestContext& r) const;
bool isLocallyCustomizedResource(const std::string& url) const;
std::string getLibraryId() const;
private: // types
class LockableSuggestionSearcher;
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<std::string, std::shared_ptr<LockableSuggestionSearcher>> SuggestionSearcherCache;
private: // data
std::string m_addr;
int m_port;
std::string m_root;
int m_nbThreads;
unsigned int m_multizimSearchLimit;
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
std::string m_indexTemplateString;
int m_ipConnectionLimit;
struct MHD_Daemon* mp_daemon;
Library* mp_library;
NameMapper* mp_nameMapper;
SearchCache searchCache;
SuggestionSearcherCache suggestionSearcherCache;
std::string m_server_id;
class CustomizedResources;
std::unique_ptr<CustomizedResources> m_customizedResources;
friend std::unique_ptr<Response> Response::build(const InternalServer& server);
friend std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype);
friend std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
};
}
#endif //KIWIXLIB_SERVER_INTERNALSERVER_H

View File

@@ -1,173 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "internalServer.h"
#include "library.h"
#include "opds_dumper.h"
#include "request_context.h"
#include "response.h"
#include "tools/otherTools.h"
#include "libkiwix-resources.h"
#include <mustache.hpp>
#include <string>
#include <vector>
namespace kiwix {
std::unique_ptr<Response> InternalServer::handle_catalog_v2(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog_v2");
}
std::string url;
try {
url = request.get_url_part(2);
} catch (const std::out_of_range&) {
return HTTP404Response(*this, request)
+ urlNotFoundMsg;
}
if (url == "root.xml") {
return handle_catalog_v2_root(request);
} else if (url == "searchdescription.xml") {
const std::string endpoint_root = m_root + "/catalog/v2";
return ContentResponse::build(*this,
RESOURCE::catalog_v2_searchdescription_xml,
kainjow::mustache::object({{"endpoint_root", endpoint_root}}),
"application/opensearchdescription+xml"
);
} else if (url == "entry") {
const std::string entryId = request.get_url_part(3);
return handle_catalog_v2_complete_entry(request, entryId);
} else if (url == "entries") {
return handle_catalog_v2_entries(request, /*partial=*/false);
} else if (url == "partial_entries") {
return handle_catalog_v2_entries(request, /*partial=*/true);
} else if (url == "categories") {
return handle_catalog_v2_categories(request);
} else if (url == "languages") {
return handle_catalog_v2_languages(request);
} else if (url == "illustration") {
return handle_catalog_v2_illustration(request);
} else {
return HTTP404Response(*this, request)
+ urlNotFoundMsg;
}
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_root(const RequestContext& request)
{
const std::string libraryId = getLibraryId();
return ContentResponse::build(
*this,
RESOURCE::templates::catalog_v2_root_xml,
kainjow::mustache::object{
{"date", gen_date_str()},
{"endpoint_root", m_root + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId)},
{"all_entries_feed_id", gen_uuid(libraryId + "/entries")},
{"partial_entries_feed_id", gen_uuid(libraryId + "/partial_entries")},
{"category_list_feed_id", gen_uuid(libraryId + "/categories")},
{"language_list_feed_id", gen_uuid(libraryId + "/languages")}
},
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_entries(const RequestContext& request, bool partial)
{
OPDSDumper opdsDumper(mp_library, mp_nameMapper);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(getLibraryId());
const auto bookIds = search_catalog(request, opdsDumper);
const auto opdsFeed = opdsDumper.dumpOPDSFeedV2(bookIds, request.get_query(), partial);
return ContentResponse::build(
*this,
opdsFeed,
"application/atom+xml;profile=opds-catalog;kind=acquisition"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId)
{
try {
mp_library->getBookById(entryId);
} catch (const std::out_of_range&) {
return HTTP404Response(*this, request)
+ urlNotFoundMsg;
}
OPDSDumper opdsDumper(mp_library, mp_nameMapper);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(getLibraryId());
const auto opdsFeed = opdsDumper.dumpOPDSCompleteEntry(entryId);
return ContentResponse::build(
*this,
opdsFeed,
"application/atom+xml;type=entry;profile=opds-catalog"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_categories(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library, mp_nameMapper);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(getLibraryId());
return ContentResponse::build(
*this,
opdsDumper.categoriesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_languages(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library, mp_nameMapper);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(getLibraryId());
return ContentResponse::build(
*this,
opdsDumper.languagesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_illustration(const RequestContext& request)
{
try {
const auto bookId = request.get_url_part(3);
auto book = mp_library->getBookByIdThreadSafe(bookId);
auto size = request.get_argument<unsigned int>("size");
auto illustration = book.getIllustration(size);
return ContentResponse::build(
*this,
illustration->getData(),
illustration->mimeType
);
} catch(...) {
return HTTP404Response(*this, request)
+ urlNotFoundMsg;
}
}
} // namespace kiwix

View File

@@ -25,10 +25,8 @@
#include <sstream>
#include <cstdio>
#include <atomic>
#include <cctype>
#include "tools/stringTools.h"
#include "i18n.h"
namespace kiwix {
@@ -68,33 +66,29 @@ fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation)
} // unnamed namespace
RequestContext::RequestContext(struct MHD_Connection* connection,
std::string _rootLocation,
std::string rootLocation,
const std::string& _url,
const std::string& _method,
const std::string& version) :
rootLocation(_rootLocation),
full_url(_url),
url(fullURL2LocalURL(_url, _rootLocation)),
url(fullURL2LocalURL(_url, rootLocation)),
method(str2RequestMethod(_method)),
version(version),
requestIndex(s_requestIndex++),
acceptEncodingGzip(false),
acceptEncodingDeflate(false),
byteRange_()
{
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
MHD_get_connection_values(connection, MHD_COOKIE_KIND, &RequestContext::fill_cookie, this);
try {
acceptEncodingGzip =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
acceptEncodingDeflate =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
} catch (const std::out_of_range&) {}
try {
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
} catch (const std::out_of_range&) {}
userlang = determine_user_language();
}
RequestContext::~RequestContext()
@@ -112,23 +106,7 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
{
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->arguments[key].push_back(value == nullptr ? "" : value);
if ( ! _this->queryString.empty() ) {
_this->queryString += "&";
}
_this->queryString += urlEncode(key);
if ( value ) {
_this->queryString += "=";
_this->queryString += urlEncode(value);
}
return MHD_YES;
}
MHD_Result RequestContext::fill_cookie(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
{
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->cookies[key] = value == nullptr ? "" : value;
_this->arguments[key] = value == nullptr ? "" : value;
return MHD_YES;
}
@@ -143,19 +121,13 @@ void RequestContext::print_debug_info() const {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("arguments :\n");
for (auto& pair:arguments) {
printf(" - %s :", pair.first.c_str());
bool first = true;
for (auto& v: pair.second) {
printf("%s %s", first?"":",", v.c_str());
first = false;
}
printf("\n");
for (auto it=arguments.begin(); it!=arguments.end(); it++) {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n");
@@ -194,10 +166,6 @@ std::string RequestContext::get_full_url() const {
return full_url;
}
std::string RequestContext::get_root_path() const {
return rootLocation.empty() ? "/" : rootLocation;
}
bool RequestContext::is_valid_url() const {
return !url.empty();
}
@@ -208,46 +176,11 @@ ByteRange RequestContext::get_range() const {
template<>
std::string RequestContext::get_argument(const std::string& name) const {
return arguments.at(name)[0];
return arguments.at(name);
}
std::string RequestContext::get_header(const std::string& name) const {
return headers.at(lcAll(name));
}
std::string RequestContext::get_user_language() const
{
return userlang.lang;
}
bool RequestContext::user_language_comes_from_cookie() const
{
return userlang.selectedBy == UserLanguage::SelectorKind::COOKIE;
}
RequestContext::UserLanguage RequestContext::determine_user_language() const
{
try {
return {UserLanguage::SelectorKind::QUERY_PARAM, get_argument("userlang")};
} catch(const std::out_of_range&) {}
try {
return {UserLanguage::SelectorKind::COOKIE, cookies.at("userlang")};
} catch(const std::out_of_range&) {}
try {
const std::string acceptLanguage = get_header("Accept-Language");
const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage);
const auto lang = selectMostSuitableLanguage(userLangPrefs);
return {UserLanguage::SelectorKind::ACCEPT_LANGUAGE_HEADER, lang};
} catch(const std::out_of_range&) {}
return {UserLanguage::SelectorKind::DEFAULT, "en"};
}
std::string RequestContext::get_requested_format() const
{
return get_optional_param<std::string>("format", "html");
}
}

View File

@@ -25,11 +25,9 @@
#include <string>
#include <sstream>
#include <map>
#include <vector>
#include <stdexcept>
#include "byte_range.h"
#include "tools/stringTools.h"
extern "C" {
#include "microhttpd_wrapper.h"
@@ -70,20 +68,10 @@ class RequestContext {
std::string get_header(const std::string& name) const;
template<typename T=std::string>
T get_argument(const std::string& name) const {
return extractFromString<T>(get_argument(name));
}
std::vector<std::string> get_arguments(const std::string& name) const {
return arguments.at(name);
}
template<class T>
T get_optional_param(const std::string& name, T default_value) const
{
try {
return get_argument<T>(name);
} catch (...) {}
return default_value;
std::istringstream stream(arguments.at(name));
T v;
stream >> v;
return v;
}
@@ -91,72 +79,26 @@ class RequestContext {
std::string get_url() const;
std::string get_url_part(int part) const;
std::string get_full_url() const;
std::string get_root_path() const;
std::string get_query() const { return queryString; }
template<class F>
std::string get_query(F filter) const {
std::string q;
const char* sep = "";
for ( const auto& a : arguments ) {
if (!filter(a.first)) {
continue;
}
for (const auto& v: a.second) {
q += sep + urlEncode(a.first) + '=' + urlEncode(v);
sep = "&";
}
}
return q;
}
ByteRange get_range() const;
bool can_compress() const { return acceptEncodingGzip; }
std::string get_user_language() const;
std::string get_requested_format() const;
bool user_language_comes_from_cookie() const;
private: // types
struct UserLanguage
{
enum SelectorKind
{
QUERY_PARAM,
COOKIE,
ACCEPT_LANGUAGE_HEADER,
DEFAULT
};
SelectorKind selectedBy;
std::string lang;
};
bool can_compress() const { return acceptEncodingDeflate; }
private: // data
std::string rootLocation;
std::string full_url;
std::string url;
RequestMethod method;
std::string version;
unsigned long long requestIndex;
bool acceptEncodingGzip;
bool acceptEncodingDeflate;
ByteRange byteRange_;
std::map<std::string, std::string> headers;
std::map<std::string, std::vector<std::string>> arguments;
std::map<std::string, std::string> cookies;
std::string queryString;
UserLanguage userlang;
std::map<std::string, std::string> arguments;
private: // functions
UserLanguage determine_user_language() const;
static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_cookie(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
};

View File

@@ -1,47 +1,19 @@
/*
* Copyright 2019 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "response.h"
#include "request_context.h"
#include "internalServer.h"
#include "libkiwix-resources.h"
#include "kiwixlib-resources.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
#include "string.h"
#include <mustache.hpp>
#include <zlib.h>
#include <array>
// This is somehow a magic value.
// If this value is too small, we will compress (and lost cpu time) too much
// content.
// If this value is too big, we will not compress enough content and send too
// much data.
// If we assume that MTU is 1500 Bytes it is useless to compress
// content smaller as the content will be sent in one packet anyway.
// 1400 Bytes seems to be a common accepted limit.
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
namespace kiwix {
@@ -49,241 +21,41 @@ namespace
{
// some utilities
std::string get_mime_type(const zim::Item& item)
std::string get_mime_type(const kiwix::Entry& entry)
{
try {
return item.getMimetype();
} catch (std::exception& e) {
return entry.getMimetype();
} catch (exception& e) {
return "application/octet-stream";
}
}
bool is_compressible_mime_type(const std::string& mimeType)
{
return mimeType.find("text/") != std::string::npos
|| mimeType.find("application/javascript") != std::string::npos
|| mimeType.find("application/atom") != std::string::npos
|| mimeType.find("application/opensearchdescription") != std::string::npos
|| mimeType.find("application/json") != std::string::npos
// Web fonts
|| mimeType.find("application/font-") != std::string::npos
|| mimeType.find("application/x-font-") != std::string::npos
|| mimeType.find("application/vnd.ms-fontobject") != std::string::npos
|| mimeType.find("font/") != std::string::npos;
return mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/atom") != string::npos
|| mimeType.find("application/opensearchdescription") != string::npos
|| mimeType.find("application/json") != string::npos;
}
bool compress(std::string &content) {
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
Z_DEFAULT_STRATEGY);
if (ret != Z_OK) { return false; }
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
strm.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
std::string compressed;
std::array<char, 16384> buff{};
do {
strm.avail_out = buff.size();
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
compressed.append(buff.data(), buff.size() - strm.avail_out);
} while (strm.avail_out == 0);
assert(ret == Z_STREAM_END);
assert(strm.avail_in == 0);
content.swap(compressed);
deflateEnd(&strm);
return true;
}
const char* getCacheControlHeader(Response::Kind k)
{
switch(k) {
case Response::STATIC_RESOURCE: return "max-age=31536000, immutable";
case Response::ZIM_CONTENT: return "max-age=3600, must-revalidate";
default: return "max-age=0, must-revalidate";
}
}
} // unnamed namespace
Response::Response(bool verbose)
Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks)
: m_verbose(verbose),
m_returnCode(MHD_HTTP_OK)
{
add_header(MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
}
void Response::set_kind(Kind k)
{
m_kind = k;
if ( k == ZIM_CONTENT )
m_etag.set_option(ETag::ZIM_CONTENT);
}
std::unique_ptr<Response> Response::build(const InternalServer& server)
{
return std::unique_ptr<Response>(new Response(server.m_verbose.load()));
}
std::unique_ptr<Response> Response::build_304(const InternalServer& server, const ETag& etag)
{
auto response = Response::build(server);
response->set_code(MHD_HTTP_NOT_MODIFIED);
response->m_etag = etag;
if ( etag.get_option(ETag::ZIM_CONTENT) ) {
response->set_kind(Response::ZIM_CONTENT);
}
if ( etag.get_option(ETag::COMPRESSED_CONTENT) ) {
response->add_header(MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
return response;
}
const UrlNotFoundMsg urlNotFoundMsg;
const InvalidUrlMsg invalidUrlMsg;
std::string ContentResponseBlueprint::getMessage(const std::string& msgId) const
{
return getTranslatedString(m_request.get_user_language(), msgId);
}
std::unique_ptr<ContentResponse> ContentResponseBlueprint::generateResponseObject() const
{
auto r = ContentResponse::build(m_server, m_template, m_data, m_mimeType);
r->set_code(m_httpStatusCode);
return r;
}
HTTPErrorResponse::HTTPErrorResponse(const InternalServer& server,
const RequestContext& request,
int httpStatusCode,
const std::string& pageTitleMsgId,
const std::string& headingMsgId,
const std::string& cssUrl)
: ContentResponseBlueprint(&server,
&request,
httpStatusCode,
request.get_requested_format() == "html" ? "text/html; charset=utf-8" : "application/xml; charset=utf-8",
request.get_requested_format() == "html" ? RESOURCE::templates::error_html : RESOURCE::templates::error_xml)
{
kainjow::mustache::list emptyList;
this->m_data = kainjow::mustache::object{
{"CSS_URL", onlyAsNonEmptyMustacheValue(cssUrl) },
{"PAGE_TITLE", getMessage(pageTitleMsgId)},
{"PAGE_HEADING", getMessage(headingMsgId)},
{"details", emptyList}
};
}
HTTP404Response::HTTP404Response(const InternalServer& server,
const RequestContext& request)
: HTTPErrorResponse(server,
request,
MHD_HTTP_NOT_FOUND,
"404-page-title",
"404-page-heading")
m_root(root),
m_content(""),
m_mimeType(""),
m_returnCode(MHD_HTTP_OK),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_bookName(""),
m_bookTitle("")
{
}
HTTPErrorResponse& HTTP404Response::operator+(UrlNotFoundMsg /*unused*/)
{
const std::string requestUrl = m_request.get_full_url();
return *this + ParameterizedMessage("url-not-found", {{"url", requestUrl}});
}
HTTPErrorResponse& HTTPErrorResponse::operator+(const std::string& msg)
{
m_data["details"].push_back({"p", msg});
return *this;
}
HTTPErrorResponse& HTTPErrorResponse::operator+(const ParameterizedMessage& details)
{
return *this + details.getText(m_request.get_user_language());
}
HTTPErrorResponse& HTTPErrorResponse::operator+=(const ParameterizedMessage& details)
{
// operator+() is already a state-modifying operator (akin to operator+=)
return *this + details;
}
HTTP400Response::HTTP400Response(const InternalServer& server,
const RequestContext& request)
: HTTPErrorResponse(server,
request,
MHD_HTTP_BAD_REQUEST,
"400-page-title",
"400-page-heading")
{
}
HTTPErrorResponse& HTTP400Response::operator+(InvalidUrlMsg /*unused*/)
{
std::string requestUrl = m_request.get_full_url();
const auto query = m_request.get_query();
if (!query.empty()) {
requestUrl += "?" + encodeDiples(query);
}
kainjow::mustache::mustache msgTmpl(R"(The requested URL "{{{url}}}" is not a valid request.)");
return *this + msgTmpl.render({"url", requestUrl});
}
HTTP500Response::HTTP500Response(const InternalServer& server,
const RequestContext& request)
: HTTPErrorResponse(server,
request,
MHD_HTTP_INTERNAL_SERVER_ERROR,
"500-page-title",
"500-page-heading")
{
// operator+() is a state-modifying operator (akin to operator+=)
*this + "An internal server error occured. We are sorry about that :/";
}
std::unique_ptr<ContentResponse> HTTP500Response::generateResponseObject() const
{
const std::string mimeType = "text/html;charset=utf-8";
auto r = ContentResponse::build(m_server, m_template, m_data, mimeType);
r->set_code(m_httpStatusCode);
return r;
}
std::unique_ptr<Response> Response::build_416(const InternalServer& server, size_t resourceLength)
{
auto response = Response::build(server);
// [FIXME] (compile with recent enough version of libmicrohttpd)
// response->set_code(MHD_HTTP_RANGE_NOT_SATISFIABLE);
response->set_code(416);
std::ostringstream oss;
oss << "bytes */" << resourceLength;
response->add_header(MHD_HTTP_HEADER_CONTENT_RANGE, oss.str());
return response;
}
std::unique_ptr<Response> Response::build_redirect(const InternalServer& server, const std::string& redirectUrl)
{
auto response = Response::build(server);
response->m_returnCode = MHD_HTTP_FOUND;
response->add_header(MHD_HTTP_HEADER_LOCATION, redirectUrl);
return response;
}
static MHD_Result print_key_value (void *cls, enum MHD_ValueKind kind,
const char *key, const char *value)
@@ -294,32 +66,32 @@ static MHD_Result print_key_value (void *cls, enum MHD_ValueKind kind,
struct RunningResponse {
zim::Item item;
kiwix::Entry entry;
int range_start;
RunningResponse(zim::Item item,
RunningResponse(kiwix::Entry entry,
int range_start) :
item(item),
entry(entry),
range_start(range_start)
{}
};
static ssize_t callback_reader_from_item(void* cls,
static ssize_t callback_reader_from_entry(void* cls,
uint64_t pos,
char* buf,
size_t max)
{
RunningResponse* response = static_cast<RunningResponse*>(cls);
size_t max_size_to_set = std::min<size_t>(
size_t max_size_to_set = min<size_t>(
max,
response->item.getSize() - pos - response->range_start);
response->entry.getSize() - pos - response->range_start);
if (max_size_to_set <= 0) {
return MHD_CONTENT_READER_END_WITH_ERROR;
}
zim::Blob blob = response->item.getData(response->range_start+pos, max_size_to_set);
zim::Blob blob = response->entry.getBlob(response->range_start+pos, max_size_to_set);
memcpy(buf, blob.data(), max_size_to_set);
return max_size_to_set;
}
@@ -341,57 +113,198 @@ void print_response_info(int retCode, MHD_Response* response)
}
std::string render_template(const std::string& template_str, kainjow::mustache::data data)
{
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data urlencode{kainjow::mustache::lambda2{
[](const std::string& str,const kainjow::mustache::renderer& r) { return urlEncode(r(str), true); }}};
data.set("urlencoded", urlencode);
std::stringstream ss;
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}
void Response::introduce_taskbar()
{
kainjow::mustache::data data;
data.set("root", m_root);
data.set("content", m_bookName);
data.set("hascontent", !m_bookName.empty());
data.set("title", m_bookTitle);
data.set("withlibrarybutton", m_withLibraryButton);
auto head_content = render_template(RESOURCE::templates::head_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"<head>",
head_content);
auto taskbar_part = render_template(RESOURCE::templates::taskbar_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"<body[^>]*>",
taskbar_part);
}
void Response::inject_externallinks_blocker()
{
kainjow::mustache::data data;
data.set("root", m_root);
auto script_tag = render_template(RESOURCE::templates::external_blocker_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"<head>",
script_tag);
}
bool
ContentResponse::can_compress(const RequestContext& request) const
Response::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
bool
Response::contentDecorationAllowed() const
{
return (startsWith(m_mimeType, "text/html")
&& m_mimeType.find(";raw=true") == std::string::npos);
}
MHD_Response*
Response::create_error_response(const RequestContext& request) const
{
MHD_Response* response = MHD_create_response_from_buffer(0, NULL, MHD_RESPMEM_PERSISTENT);
if ( m_returnCode == 416 ) {
std::ostringstream oss;
oss << "bytes */" << m_byteRange.length();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
return response;
}
MHD_Response*
Response::create_raw_content_mhd_response(const RequestContext& request)
{
if (contentDecorationAllowed()) {
if (m_withTaskbar) {
introduce_taskbar();
}
if (m_blockExternalLinks) {
inject_externallinks_blocker();
}
}
bool shouldCompress = m_compress && can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
// At shis point m_etag.get_option(ETag::COMPRESSED_CONTENT) and
// shouldCompress can have different values. This can happen for a 304 (Not
// Modified) response generated while handling a conditional If-None-Match
// request. In that case the m_etag (together with its COMPRESSED_CONTENT
// option) is obtained from the ETag list of the If-None-Match header and the
// response has no body (which shouldn't be compressed).
if ( m_etag.get_option(ETag::COMPRESSED_CONTENT) ) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
return response;
}
MHD_Response*
Response::create_redirection_mhd_response() const
{
MHD_Response* response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_MUST_COPY);
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, m_content.c_str());
return response;
}
MHD_Response*
Response::create_entry_mhd_response() const
{
const auto content_length = m_byteRange.length();
MHD_Response* response = MHD_create_response_from_callback(content_length,
16384,
callback_reader_from_entry,
new RunningResponse(m_entry, m_byteRange.first()),
callback_free_response);
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
std::ostringstream oss;
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
<< "/" << m_entry.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str());
return response;
}
MHD_Response*
Response::create_mhd_response(const RequestContext& request)
{
MHD_Response* response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_PERSISTENT);
return response;
}
switch (m_mode) {
case ResponseMode::ERROR_RESPONSE:
return create_error_response(request);
MHD_Response*
ContentResponse::create_mhd_response(const RequestContext& request)
{
const bool isCompressed = can_compress(request) && compress(m_content);
case ResponseMode::RAW_CONTENT :
return create_raw_content_mhd_response(request);
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
case ResponseMode::REDIRECTION :
return create_redirection_mhd_response();
if (isCompressed) {
m_etag.set_option(ETag::COMPRESSED_CONTENT);
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
case ResponseMode::ENTRY :
return create_entry_mhd_response();
}
return response;
return nullptr;
}
MHD_Result Response::send(const RequestContext& request, MHD_Connection* connection)
{
MHD_Response* response = create_mhd_response(request);
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
getCacheControlHeader(m_kind));
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
for(auto& p: m_customHeaders) {
MHD_add_response_header(response, p.first.c_str(), p.second.c_str());
}
if ( ! request.user_language_comes_from_cookie() ) {
const std::string cookie = "userlang=" + request.get_user_language()
+ ";Path=" + request.get_root_path()
+ ";Max-Age=31536000";
MHD_add_response_header(response, MHD_HTTP_HEADER_SET_COOKIE, cookie.c_str());
if ( m_mode != ResponseMode::ERROR_RESPONSE ) {
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
}
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
@@ -405,95 +318,48 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect
return ret;
}
ContentResponse::ContentResponse(const std::string& root, bool verbose, const std::string& content, const std::string& mimetype) :
Response(verbose),
m_root(root),
m_content(content),
m_mimeType(mimetype)
{
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
void Response::set_template(const std::string& template_str, kainjow::mustache::data data) {
set_content(render_template(template_str, data));
}
std::unique_ptr<ContentResponse> ContentResponse::build(
const InternalServer& server,
const std::string& content,
const std::string& mimetype)
{
return std::unique_ptr<ContentResponse>(new ContentResponse(
server.m_root,
server.m_verbose.load(),
content,
mimetype));
void Response::set_content(const std::string& content) {
m_content = content;
m_mode = ResponseMode::RAW_CONTENT;
}
std::unique_ptr<ContentResponse> ContentResponse::build(
const InternalServer& server,
const std::string& template_str,
kainjow::mustache::data data,
const std::string& mimetype)
{
auto content = render_template(template_str, data);
return ContentResponse::build(server, content, mimetype);
void Response::set_redirection(const std::string& url) {
m_content = url;
m_mode = ResponseMode::REDIRECTION;
m_returnCode = MHD_HTTP_FOUND;
}
ItemResponse::ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange) :
Response(verbose),
m_item(item),
m_mimeType(mimetype)
{
m_byteRange = byterange;
set_kind(Response::ZIM_CONTENT);
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
}
void Response::set_entry(const Entry& entry, const RequestContext& request) {
m_entry = entry;
m_mode = ResponseMode::ENTRY;
std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item)
{
const std::string mimetype = get_mime_type(item);
auto byteRange = request.get_range().resolve(item.getSize());
const bool noRange = byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if (noRange && is_compressible_mime_type(mimetype)) {
// Return a contentResponse
auto response = ContentResponse::build(server, item.getData(), mimetype);
response->set_kind(Response::ZIM_CONTENT);
response->m_byteRange = byteRange;
return std::move(response);
const std::string mimeType = get_mime_type(entry);
set_mimeType(mimeType);
set_cacheable();
m_byteRange = request.get_range().resolve(entry.getSize());
const bool noRange = m_byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if ( noRange && is_compressible_mime_type(mimeType) ) {
zim::Blob raw_content = entry.getBlob();
const std::string content = string(raw_content.data(), raw_content.size());
set_content(content);
set_compress(true);
} else if ( m_byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE ) {
set_code(416);
set_content("");
m_mode = ResponseMode::ERROR_RESPONSE;
}
if (byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE) {
auto response = Response::build_416(server, item.getSize());
response->set_kind(Response::ZIM_CONTENT);
return response;
}
return std::unique_ptr<Response>(new ItemResponse(
server.m_verbose.load(),
item,
mimetype,
byteRange));
}
MHD_Response*
ItemResponse::create_mhd_response(const RequestContext& request)
void Response::set_taskbar(const std::string& bookName, const std::string& bookTitle)
{
const auto content_length = m_byteRange.length();
MHD_Response* response = MHD_create_response_from_callback(content_length,
16384,
callback_reader_from_item,
new RunningResponse(m_item, m_byteRange.first()),
callback_free_response);
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
std::ostringstream oss;
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
<< "/" << m_item.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str());
return response;
m_bookName = bookName;
m_bookTitle = bookTitle;
}

View File

@@ -22,205 +22,80 @@
#define KIWIXLIB_SERVER_RESPONSE_H
#include <string>
#include <map>
#include <mustache.hpp>
#include "byte_range.h"
#include "entry.h"
#include "etag.h"
#include "i18n.h"
#include <zim/item.h>
extern "C" {
#include "microhttpd_wrapper.h"
}
namespace zim {
class Archive;
} // namespace zim
namespace kiwix {
class InternalServer;
enum class ResponseMode {
ERROR_RESPONSE,
RAW_CONTENT,
REDIRECTION,
ENTRY
};
class RequestContext;
class Response {
public:
enum Kind
{
STATIC_RESOURCE,
ZIM_CONTENT,
DYNAMIC_CONTENT
};
public:
Response(bool verbose);
virtual ~Response() = default;
static std::unique_ptr<Response> build(const InternalServer& server);
static std::unique_ptr<Response> build_304(const InternalServer& server, const ETag& etag);
static std::unique_ptr<Response> build_416(const InternalServer& server, size_t resourceLength);
static std::unique_ptr<Response> build_redirect(const InternalServer& server, const std::string& redirectUrl);
Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks);
~Response() = default;
MHD_Result send(const RequestContext& request, MHD_Connection* connection);
void set_template(const std::string& template_str, kainjow::mustache::data data);
void set_content(const std::string& content);
void set_redirection(const std::string& url);
void set_entry(const Entry& entry, const RequestContext& request);
void set_mimeType(const std::string& mimeType) { m_mimeType = mimeType; }
void set_code(int code) { m_returnCode = code; }
void set_kind(Kind k);
Kind get_kind() const { return m_kind; }
void set_etag_body(const std::string& id) { m_etag.set_body(id); }
void add_header(const std::string& name, const std::string& value) { m_customHeaders[name] = value; }
void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); }
void set_server_id(const std::string& id) { m_etag.set_server_id(id); }
void set_etag(const ETag& etag) { m_etag = etag; }
void set_compress(bool compress) { m_compress = compress; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
int getReturnCode() const { return m_returnCode; }
std::string get_mimeType() const { return m_mimeType; }
private: // functions
virtual MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_error_response(const RequestContext& request) const;
protected: // data
Kind m_kind = DYNAMIC_CONTENT;
bool m_verbose;
int m_returnCode;
ByteRange m_byteRange;
ETag m_etag;
std::map<std::string, std::string> m_customHeaders;
friend class ItemResponse;
};
class ContentResponse : public Response {
public:
ContentResponse(
const std::string& root,
bool verbose,
const std::string& content,
const std::string& mimetype);
static std::unique_ptr<ContentResponse> build(
const InternalServer& server,
const std::string& content,
const std::string& mimetype);
static std::unique_ptr<ContentResponse> build(
const InternalServer& server,
const std::string& template_str,
kainjow::mustache::data data,
const std::string& mimetype);
private:
MHD_Response* create_mhd_response(const RequestContext& request);
void introduce_taskbar();
void inject_externallinks_blocker();
bool can_compress(const RequestContext& request) const;
bool contentDecorationAllowed() const;
private: // functions
MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_error_response(const RequestContext& request) const;
MHD_Response* create_raw_content_mhd_response(const RequestContext& request);
MHD_Response* create_redirection_mhd_response() const;
MHD_Response* create_entry_mhd_response() const;
private:
private: // data
bool m_verbose;
ResponseMode m_mode;
std::string m_root;
std::string m_content;
Entry m_entry;
std::string m_mimeType;
};
class ContentResponseBlueprint
{
public: // functions
ContentResponseBlueprint(const InternalServer* server,
const RequestContext* request,
int httpStatusCode,
const std::string& mimeType,
const std::string& templateStr)
: m_server(*server)
, m_request(*request)
, m_httpStatusCode(httpStatusCode)
, m_mimeType(mimeType)
, m_template(templateStr)
{}
virtual ~ContentResponseBlueprint() = default;
operator std::unique_ptr<ContentResponse>() const
{
return generateResponseObject();
}
operator std::unique_ptr<Response>() const
{
return operator std::unique_ptr<ContentResponse>();
}
protected: // functions
std::string getMessage(const std::string& msgId) const;
virtual std::unique_ptr<ContentResponse> generateResponseObject() const;
public: //data
const InternalServer& m_server;
const RequestContext& m_request;
const int m_httpStatusCode;
const std::string m_mimeType;
const std::string m_template;
kainjow::mustache::data m_data;
};
struct HTTPErrorResponse : ContentResponseBlueprint
{
HTTPErrorResponse(const InternalServer& server,
const RequestContext& request,
int httpStatusCode,
const std::string& pageTitleMsgId,
const std::string& headingMsgId,
const std::string& cssUrl = "");
HTTPErrorResponse& operator+(const std::string& msg);
HTTPErrorResponse& operator+(const ParameterizedMessage& errorDetails);
HTTPErrorResponse& operator+=(const ParameterizedMessage& errorDetails);
};
class UrlNotFoundMsg {};
extern const UrlNotFoundMsg urlNotFoundMsg;
struct HTTP404Response : HTTPErrorResponse
{
HTTP404Response(const InternalServer& server,
const RequestContext& request);
using HTTPErrorResponse::operator+;
HTTPErrorResponse& operator+(UrlNotFoundMsg /*unused*/);
};
class InvalidUrlMsg {};
extern const InvalidUrlMsg invalidUrlMsg;
struct HTTP400Response : HTTPErrorResponse
{
HTTP400Response(const InternalServer& server,
const RequestContext& request);
using HTTPErrorResponse::operator+;
HTTPErrorResponse& operator+(InvalidUrlMsg /*unused*/);
};
struct HTTP500Response : HTTPErrorResponse
{
HTTP500Response(const InternalServer& server,
const RequestContext& request);
private: // overrides
// generateResponseObject() is overriden in order to produce a minimal
// response without any need for additional resources from the server
std::unique_ptr<ContentResponse> generateResponseObject() const override;
};
class ItemResponse : public Response {
public:
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
static std::unique_ptr<Response> build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
private:
MHD_Response* create_mhd_response(const RequestContext& request);
zim::Item m_item;
std::string m_mimeType;
int m_returnCode;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
bool m_compress;
std::string m_bookName;
std::string m_bookTitle;
ByteRange m_byteRange;
ETag m_etag;
};
}

View File

@@ -12,31 +12,45 @@
UnixImpl::UnixImpl():
m_pid(0),
m_running(false),
m_shouldQuit(false)
m_mutex(PTHREAD_MUTEX_INITIALIZER),
m_waitingThread()
{
}
UnixImpl::~UnixImpl()
{
kill();
m_shouldQuit = true;
m_waitingThread.join();
// Android has no pthread_cancel :(
#ifdef __ANDROID__
pthread_kill(m_waitingThread, SIGUSR1);
#else
pthread_cancel(m_waitingThread);
#endif
}
#ifdef __ANDROID__
void thread_exit_handler(int sig) {
pthread_exit(0);
}
#endif
void* UnixImpl::waitForPID(void* _self)
{
UnixImpl* self = static_cast<UnixImpl*>(_self);
while (true) {
if (!waitpid(self->m_pid, NULL, WNOHANG)) {
break;
}
if (self->m_shouldQuit) {
return nullptr;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
#ifdef __ANDROID__
struct sigaction actions;
memset(&actions, 0, sizeof(actions));
sigemptyset(&actions.sa_mask);
actions.sa_flags = 0;
actions.sa_handler = thread_exit_handler;
sigaction(SIGUSR1, &actions, NULL);
#endif
UnixImpl* self = static_cast<UnixImpl*>(_self);
waitpid(self->m_pid, NULL, 0);
pthread_mutex_lock(&self->m_mutex);
self->m_running = false;
pthread_mutex_unlock(&self->m_mutex);
return self;
}
@@ -60,7 +74,7 @@ void UnixImpl::run(commandLine_t& commandLine)
default:
m_pid = pid;
m_running = true;
m_waitingThread = std::thread(waitForPID, this);
pthread_create(&m_waitingThread, NULL, waitForPID, this);
break;
}
}
@@ -72,5 +86,8 @@ bool UnixImpl::kill()
bool UnixImpl::isRunning()
{
return m_running;
pthread_mutex_lock(&m_mutex);
bool ret = m_running;
pthread_mutex_unlock(&m_mutex);
return ret;
}

View File

@@ -3,16 +3,16 @@
#include "subprocess.h"
#include <atomic>
#include <thread>
#include <pthread.h>
class UnixImpl : public SubprocessImpl
{
private:
int m_pid;
std::atomic<bool> m_running;
std::atomic<bool> m_shouldQuit;
std::thread m_waitingThread;
bool m_running;
pthread_mutex_t m_mutex;
pthread_t m_waitingThread;
public:
UnixImpl();

View File

@@ -11,8 +11,7 @@
WinImpl::WinImpl():
m_pid(0),
m_running(false),
m_subprocessHandle(INVALID_HANDLE_VALUE),
m_waitingThreadHandle(INVALID_HANDLE_VALUE)
m_handle(INVALID_HANDLE_VALUE)
{
InitializeCriticalSection(&m_criticalSection);
}
@@ -20,15 +19,14 @@ WinImpl::WinImpl():
WinImpl::~WinImpl()
{
kill();
WaitForSingleObject(m_waitingThreadHandle, INFINITE);
CloseHandle(m_subprocessHandle);
CloseHandle(m_handle);
DeleteCriticalSection(&m_criticalSection);
}
DWORD WINAPI WinImpl::waitForPID(void* _self)
{
WinImpl* self = static_cast<WinImpl*>(_self);
WaitForSingleObject(self->m_subprocessHandle, INFINITE);
WaitForSingleObject(self->m_handle, INFINITE);
EnterCriticalSection(&self->m_criticalSection);
self->m_running = false;
@@ -81,16 +79,16 @@ void WinImpl::run(commandLine_t& commandLine)
&procInfo))
{
m_pid = procInfo.dwProcessId;
m_subprocessHandle = procInfo.hProcess;
m_handle = procInfo.hProcess;
CloseHandle(procInfo.hThread);
m_running = true;
m_waitingThreadHandle = CreateThread(NULL, 0, &waitForPID, this, 0, NULL);
CreateThread(NULL, 0, &waitForPID, this, 0, NULL );
}
}
bool WinImpl::kill()
{
return TerminateProcess(m_subprocessHandle, 0);
return TerminateProcess(m_handle, 0);
}
bool WinImpl::isRunning()

View File

@@ -11,8 +11,7 @@ class WinImpl : public SubprocessImpl
private:
int m_pid;
bool m_running;
HANDLE m_subprocessHandle;
HANDLE m_waitingThreadHandle;
HANDLE m_handle;
CRITICAL_SECTION m_criticalSection;
public:

View File

@@ -1,128 +0,0 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "archiveTools.h"
#include "tools.h"
#include "pathTools.h"
#include "otherTools.h"
#include "stringTools.h"
#include <zim/error.h>
#include <zim/item.h>
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name) {
try {
return archive.getMetadata(name);
} catch (zim::EntryNotFound& e) {
return "";
}
}
std::string getArchiveTitle(const zim::Archive& archive) {
std::string value = getMetadata(archive, "Title");
if (value.empty()) {
value = getLastPathElement(archive.getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
std::string getMetaDescription(const zim::Archive& archive) {
std::string value;
value = getMetadata(archive, "Description");
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
value = getMetadata(archive, "Subtitle");
}
return value;
}
std::string getMetaTags(const zim::Archive& archive, bool original) {
std::string tags_str = getMetadata(archive, "Tags");
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
std::string getMetaName(const zim::Archive& archive) {
return getMetadata(archive, "Name");
}
std::string getMetaDate(const zim::Archive& archive) {
return getMetadata(archive, "Date");
}
std::string getMetaCreator(const zim::Archive& archive) {
return getMetadata(archive, "Creator");
}
std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}
std::string getMetaFlavour(const zim::Archive& archive) {
return getMetadata(archive, "Flavour");
}
bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem(size);
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
unsigned int getArchiveFileSize(const zim::Archive& archive) {
return archive.getFilesize() / 1024;
}
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
}
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path)
{
try {
return archive.getEntryByPath(path);
} catch (zim::EntryNotFound& e) {
if (path.empty() || path == "/") {
return archive.getMainEntry();
}
}
throw zim::EntryNotFound("Cannot find entry for non empty path");
}
} // kiwix

View File

@@ -1,56 +0,0 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ARCHIVETOOLS_H
#define KIWIX_ARCHIVETOOLS_H
#include <zim/archive.h>
#include <tools/otherTools.h>
/**
* This file contains all the functions that would make handling data related to
* an archive easier.
**/
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name);
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
std::string getMetaFlavour(const zim::Archive& archive);
bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
std::string& content, std::string& mimeType);
unsigned int getArchiveMediaCount(const zim::Archive& archive);
unsigned int getArchiveArticleCount(const zim::Archive& archive);
unsigned int getArchiveFileSize(const zim::Archive& archive);
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
}
#endif

View File

@@ -1,208 +0,0 @@
/*
* Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_CONCURRENT_CACHE_H
#define ZIM_CONCURRENT_CACHE_H
#include "lrucache.h"
#include <future>
#include <mutex>
namespace kiwix
{
/**
ConcurrentCache implements a concurrent thread-safe cache
Compared to kiwix::lru_cache, each access operation is slightly more expensive.
However, different slots of the cache can be safely accessed concurrently
with minimal blocking. Concurrent access to the same element is also
safe, and, in case of a cache miss, will block until that element becomes
available.
*/
template <typename Key, typename Value>
class ConcurrentCache
{
private: // types
typedef std::shared_future<Value> ValuePlaceholder;
typedef lru_cache<Key, ValuePlaceholder> Impl;
public: // types
explicit ConcurrentCache(size_t maxEntries)
: impl_(maxEntries)
{}
// Gets the entry corresponding to the given key. If the entry is not in the
// cache, it is obtained by calling f() (without any arguments) and the
// result is put into the cache.
//
// The cache as a whole is locked only for the duration of accessing
// the respective slot. If, in the case of the a cache miss, the generation
// of the missing element takes a long time, only attempts to access that
// element will block - the rest of the cache remains open to concurrent
// access.
template<class F>
Value getOrPut(const Key& key, F f)
{
std::promise<Value> valuePromise;
std::unique_lock<std::mutex> l(lock_);
const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
l.unlock();
if ( x.miss() ) {
try {
valuePromise.set_value(f());
} catch (std::exception& e) {
drop(key);
throw;
}
}
return x.value().get();
}
bool drop(const Key& key)
{
std::unique_lock<std::mutex> l(lock_);
return impl_.drop(key);
}
size_t setMaxSize(size_t new_size) {
std::unique_lock<std::mutex> l(lock_);
return impl_.setMaxSize(new_size);
}
protected: // data
Impl impl_;
std::mutex lock_;
};
/**
WeakStore represent a thread safe store (map) of weak ptr.
It allows to store weak_ptr from shared_ptr and retrieve shared_ptr from
potential non expired weak_ptr.
It is not limited in size.
*/
template<typename Key, typename Value>
class WeakStore {
private: // types
typedef std::weak_ptr<Value> WeakValue;
public:
explicit WeakStore() = default;
std::shared_ptr<Value> get(const Key& key)
{
std::lock_guard<std::mutex> l(m_lock);
auto it = m_weakMap.find(key);
if (it != m_weakMap.end()) {
auto shared = it->second.lock();
if (shared) {
return shared;
} else {
m_weakMap.erase(it);
}
}
throw std::runtime_error("No weak ptr");
}
void add(const Key& key, std::shared_ptr<Value> shared)
{
std::lock_guard<std::mutex> l(m_lock);
m_weakMap[key] = WeakValue(shared);
}
private: //data
std::map<Key, WeakValue> m_weakMap;
std::mutex m_lock;
};
template <typename Key, typename RawValue>
class ConcurrentCache<Key, std::shared_ptr<RawValue>>
{
private: // types
typedef std::shared_ptr<RawValue> Value;
typedef std::shared_future<Value> ValuePlaceholder;
typedef lru_cache<Key, ValuePlaceholder> Impl;
public: // types
explicit ConcurrentCache(size_t maxEntries)
: impl_(maxEntries)
{}
// Gets the entry corresponding to the given key. If the entry is not in the
// cache, it is obtained by calling f() (without any arguments) and the
// result is put into the cache.
//
// The cache as a whole is locked only for the duration of accessing
// the respective slot. If, in the case of the a cache miss, the generation
// of the missing element takes a long time, only attempts to access that
// element will block - the rest of the cache remains open to concurrent
// access.
template<class F>
Value getOrPut(const Key& key, F f)
{
std::promise<Value> valuePromise;
std::unique_lock<std::mutex> l(lock_);
const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
l.unlock();
if ( x.miss() ) {
// Try to get back the shared_ptr from the weak_ptr first.
try {
valuePromise.set_value(m_weakStore.get(key));
} catch(const std::runtime_error& e) {
try {
const auto value = f();
valuePromise.set_value(value);
m_weakStore.add(key, value);
} catch (std::exception& e) {
drop(key);
throw;
}
}
}
return x.value().get();
}
bool drop(const Key& key)
{
std::unique_lock<std::mutex> l(lock_);
return impl_.drop(key);
}
size_t setMaxSize(size_t new_size) {
std::unique_lock<std::mutex> l(lock_);
return impl_.setMaxSize(new_size);
}
protected: // data
std::mutex lock_;
Impl impl_;
WeakStore<Key, RawValue> m_weakStore;
};
} // namespace kiwix
#endif // ZIM_CONCURRENT_CACHE_H

View File

@@ -1,175 +0,0 @@
/*
* Copyrigth (c) 2021, Matthieu Gautier <mgautier@kymeria.fr>
* Copyright (c) 2020, Veloman Yunkan
* Copyright (c) 2014, lamerman
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of lamerman nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* File: lrucache.hpp
* Author: Alexander Ponomarev
*
* Created on June 20, 2013, 5:09 PM
*/
#ifndef _LRUCACHE_HPP_INCLUDED_
#define _LRUCACHE_HPP_INCLUDED_
#include <map>
#include <list>
#include <set>
#include <cstddef>
#include <stdexcept>
#include <cassert>
namespace kiwix {
template<typename key_t, typename value_t>
class lru_cache {
public: // types
typedef typename std::pair<key_t, value_t> key_value_pair_t;
typedef typename std::list<key_value_pair_t>::iterator list_iterator_t;
enum AccessStatus {
HIT, // key was found in the cache
PUT, // key was not in the cache but was created by the getOrPut() access
MISS // key was not in the cache; get() access failed
};
class AccessResult
{
const AccessStatus status_;
const value_t val_;
public:
AccessResult(const value_t& val, AccessStatus status)
: status_(status), val_(val)
{}
AccessResult() : status_(MISS), val_() {}
bool hit() const { return status_ == HIT; }
bool miss() const { return !hit(); }
const value_t& value() const
{
if ( status_ == MISS )
throw std::range_error("There is no such key in cache");
return val_;
}
operator const value_t& () const { return value(); }
};
public: // functions
explicit lru_cache(size_t max_size) :
_max_size(max_size) {
}
// If 'key' is present in the cache, returns the associated value,
// otherwise puts the given value into the cache (and returns it with
// a status of a cache miss).
AccessResult getOrPut(const key_t& key, const value_t& value) {
auto it = _cache_items_map.find(key);
if (it != _cache_items_map.end()) {
_cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
return AccessResult(it->second->second, HIT);
} else {
putMissing(key, value);
return AccessResult(value, PUT);
}
}
void put(const key_t& key, const value_t& value) {
auto it = _cache_items_map.find(key);
if (it != _cache_items_map.end()) {
_cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
it->second->second = value;
} else {
putMissing(key, value);
}
}
AccessResult get(const key_t& key) {
auto it = _cache_items_map.find(key);
if (it == _cache_items_map.end()) {
return AccessResult();
} else {
_cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
return AccessResult(it->second->second, HIT);
}
}
bool drop(const key_t& key) {
try {
auto list_it = _cache_items_map.at(key);
_cache_items_list.erase(list_it);
_cache_items_map.erase(key);
return true;
} catch (std::out_of_range& e) {
return false;
}
}
bool exists(const key_t& key) const {
return _cache_items_map.find(key) != _cache_items_map.end();
}
size_t size() const {
return _cache_items_map.size();
}
size_t setMaxSize(size_t new_size) {
size_t previous = _max_size;
_max_size = new_size;
return previous;
}
std::set<key_t> keys() const {
std::set<key_t> keys;
for(auto& item:_cache_items_map) {
keys.insert(item.first);
}
return keys;
}
private: // functions
void putMissing(const key_t& key, const value_t& value) {
assert(_cache_items_map.find(key) == _cache_items_map.end());
_cache_items_list.push_front(key_value_pair_t(key, value));
_cache_items_map[key] = _cache_items_list.begin();
while (_cache_items_map.size() > _max_size) {
_cache_items_map.erase(_cache_items_list.back().first);
_cache_items_list.pop_back();
}
}
private: // data
std::list<key_value_pair_t> _cache_items_list;
std::map<key_t, list_iterator_t> _cache_items_map;
size_t _max_size;
};
} // namespace kiwix
#endif /* _LRUCACHE_HPP_INCLUDED_ */

View File

@@ -1,6 +1,5 @@
/*
* Copyright 2012 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright 2021 Nikhil Tanwar <2002nikhiltanwar@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,7 +17,6 @@
* MA 02110-1301, USA.
*/
#include "tools.h"
#include <tools/networkTools.h>
#include <stdio.h>
@@ -31,17 +29,6 @@
#include <iostream>
#include <stdexcept>
#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#include <iostream>
#else
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <netdb.h>
#endif
size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
{
@@ -70,104 +57,3 @@ std::string kiwix::download(const std::string& url) {
}
return ss.str();
}
std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
std::map<std::string, std::string> interfaces;
#ifdef _WIN32
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
if (sd == INVALID_SOCKET) {
std::cerr << "Failed to get a socket. Error " << WSAGetLastError() << std::endl;
return interfaces;
}
INTERFACE_INFO InterfaceList[20];
unsigned long nBytesReturned;
if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList,
sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) {
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() << std::endl;
return interfaces;
}
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
for (int i = 0; i < nNumInterfaces; ++i) {
sockaddr_in *pAddress;
pAddress = (sockaddr_in *) & (InterfaceList[i].iiAddress.AddressIn);
if(pAddress->sin_family == AF_INET) {
/* Add to the map */
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
interfaces[interfaceName] = interfaceName;
}
}
#else
/* Get Network interfaces information */
char buf[16384];
struct ifconf ifconf;
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
ifconf.ifc_len = sizeof(buf);
ifconf.ifc_buf=buf;
if(ioctl(fd, SIOCGIFCONF, &ifconf)!=0) {
perror("ioctl(SIOCGIFCONF)");
}
/* Go through each interface */
struct ifreq *ifreq;
ifreq = ifconf.ifc_req;
for (int i = 0; i < ifconf.ifc_len; ) {
if (ifreq->ifr_addr.sa_family == AF_INET) {
/* Get the network interface ip */
char host[128] = { 0 };
const int error = getnameinfo(&(ifreq->ifr_addr), sizeof(ifreq->ifr_addr),
host, sizeof(host),
0, 0, NI_NUMERICHOST);
if (!error) {
std::string interfaceName = std::string(ifreq->ifr_name);
std::string interfaceIp = std::string(host);
/* Add to the map */
interfaces[interfaceName] = interfaceIp;
} else {
perror("getnameinfo()");
}
}
/* some systems have ifr_addr.sa_len and adjust the length that
* way, but not mine. weird */
size_t len;
#ifndef __linux__
len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
#else
len = sizeof(*ifreq);
#endif
ifreq = (struct ifreq*)((char*)ifreq+len);
i += len;
}
#endif
return interfaces;
}
std::string kiwix::getBestPublicIp() {
auto interfaces = getNetworkInterfaces();
#ifndef _WIN32
const char* const prioritizedNames[] =
{ "eth0", "eth1", "wlan0", "wlan1", "en0", "en1" };
for(auto name: prioritizedNames) {
auto it = interfaces.find(name);
if(it != interfaces.end()) {
return it->second;
}
}
#endif
const char* const prefixes[] = { "192.168", "172.16.", "10.0" };
for(auto prefix : prefixes){
for(auto& itr : interfaces) {
auto interfaceIp = itr.second;
if (interfaceIp.find(prefix) == 0) {
return interfaceIp;
}
}
}
return "127.0.0.1";
}

View File

@@ -17,14 +17,8 @@
* MA 02110-1301, USA.
*/
// Implement function declared in tools.h and tools/otherTools.h
#include "tools.h"
#include "tools/otherTools.h"
#include <algorithm>
#include <iomanip>
#ifdef _WIN32
#include <windows.h>
#else
@@ -32,16 +26,11 @@
#endif
#include "tools/stringTools.h"
#include "server/i18n.h"
#include "libkiwix-resources.h"
#include <map>
#include <sstream>
#include <pugixml.hpp>
#include <zim/uuid.h>
#include <zim/suggestion_iterator.h>
static std::map<std::string, std::string> codeisomapping {
{ "aa", "aar" },
@@ -291,107 +280,3 @@ bool kiwix::convertStrToBool(const std::string& value)
throw std::domain_error(ss.str());
}
std::string kiwix::gen_date_str()
{
auto now = std::time(0);
auto tm = std::localtime(&now);
std::stringstream is;
is << std::setw(2) << std::setfill('0')
<< 1900+tm->tm_year << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mon+1 << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
return is.str();
}
std::string kiwix::gen_uuid(const std::string& s)
{
return kiwix::to_string(zim::Uuid::generate(s));
}
kainjow::mustache::data kiwix::onlyAsNonEmptyMustacheValue(const std::string& s)
{
return s.empty()
? kainjow::mustache::data(false)
: kainjow::mustache::data(s);
}
std::string kiwix::render_template(const std::string& template_str, kainjow::mustache::data data)
{
kainjow::mustache::mustache tmpl(template_str);
std::stringstream ss;
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}
namespace
{
std::string escapeBackslashes(const std::string& s)
{
std::string es;
es.reserve(s.size());
for (char c : s) {
if ( c == '\\' ) {
es.push_back('\\');
}
es.push_back(c);
}
return es;
}
std::string makeFulltextSearchSuggestion(const std::string& lang,
const std::string& queryString)
{
return kiwix::i18n::expandParameterizedString(lang, "suggest-full-text-search",
{
{"SEARCH_TERMS", queryString}
}
);
}
} // unnamed namespace
kiwix::Suggestions::Suggestions()
: m_data(kainjow::mustache::data::type::list)
{
}
void kiwix::Suggestions::add(const zim::SuggestionItem& suggestion)
{
kainjow::mustache::data result;
const std::string label = suggestion.hasSnippet()
? suggestion.getSnippet()
: suggestion.getTitle();
result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(suggestion.getTitle()));
result.set("kind", "path");
result.set("path", escapeBackslashes(suggestion.getPath()));
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}
void kiwix::Suggestions::addFTSearchSuggestion(const std::string& uiLang,
const std::string& queryString)
{
kainjow::mustache::data result;
const std::string label = makeFulltextSearchSuggestion(uiLang, queryString);
result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(queryString + " "));
result.set("kind", "pattern");
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}
std::string kiwix::Suggestions::getJSON() const
{
kainjow::mustache::data data;
data.set("suggestions", m_data);
return render_template(RESOURCE::templates::suggestion_json, data);
}

View File

@@ -17,10 +17,7 @@
* MA 02110-1301, USA.
*/
// Implement method defined in <kiwix/tools.h> and "tools/pathTools.h"
#include "tools.h"
#include "tools/pathTools.h"
#include <stdexcept>
#ifdef __APPLE__
@@ -62,6 +59,7 @@
#define PATH_MAX 1024
#endif
#ifdef _WIN32
std::string WideToUtf8(const std::wstring& wstr)
{
@@ -80,7 +78,7 @@ std::wstring Utf8ToWide(const std::string& str)
}
#endif
bool kiwix::isRelativePath(const std::string& path)
bool isRelativePath(const std::string& path)
{
#ifdef _WIN32
if (path.size() < 3 ) {
@@ -175,7 +173,7 @@ std::vector<std::string> normalizeParts(std::vector<std::string>& parts, bool ab
return ret;
}
std::string kiwix::computeRelativePath(const std::string& path, const std::string& absolutePath)
std::string computeRelativePath(const std::string& path, const std::string& absolutePath)
{
auto parts = kiwix::split(path, SEPARATOR, false);
auto pathParts = normalizeParts(parts, false);
@@ -200,11 +198,11 @@ std::string kiwix::computeRelativePath(const std::string& path, const std::strin
return ret;
}
std::string kiwix::computeAbsolutePath(const std::string& path, const std::string& relativePath)
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath)
{
std::string absolutePath = path;
if (path.empty()) {
absolutePath = kiwix::getCurrentDirectory();
absolutePath = getCurrentDirectory();
}
auto parts = kiwix::split(absolutePath, SEPARATOR, false);
@@ -217,7 +215,7 @@ std::string kiwix::computeAbsolutePath(const std::string& path, const std::strin
return ret;
}
std::string kiwix::removeLastPathElement(const std::string& path)
std::string removeLastPathElement(const std::string& path)
{
auto parts_ = kiwix::split(path, SEPARATOR, false);
auto parts = normalizeParts(parts_, false);
@@ -228,7 +226,7 @@ std::string kiwix::removeLastPathElement(const std::string& path)
return ret;
}
std::string kiwix::appendToDirectory(const std::string& directoryPath, const std::string& filename)
std::string appendToDirectory(const std::string& directoryPath, const std::string& filename)
{
std::string newPath = directoryPath;
if (!directoryPath.empty() && directoryPath.back() != SEPARATOR[0]) {
@@ -238,7 +236,7 @@ std::string kiwix::appendToDirectory(const std::string& directoryPath, const std
return newPath;
}
std::string kiwix::getLastPathElement(const std::string& path)
std::string getLastPathElement(const std::string& path)
{
auto parts_ = kiwix::split(path, SEPARATOR);
auto parts = normalizeParts(parts_, false);
@@ -269,7 +267,7 @@ std::string getFileSizeAsString(const std::string& path)
return convert.str();
}
std::string kiwix::getFileContent(const std::string& path)
std::string getFileContent(const std::string& path)
{
#ifdef _WIN32
auto wpath = Utf8ToWide(path);
@@ -302,21 +300,19 @@ std::string kiwix::getFileContent(const std::string& path)
return content;
}
bool kiwix::fileExists(const std::string& path)
bool fileExists(const std::string& path)
{
#ifdef _WIN32
return (_waccess_s(Utf8ToWide(path).c_str(), 0) == 0);
return PathFileExistsW(Utf8ToWide(path).c_str());
#else
return (access(path.c_str(), F_OK) == 0);
#endif
}
bool kiwix::fileReadable(const std::string& path)
{
#ifdef _WIN32
return (_waccess_s(Utf8ToWide(path).c_str(), 4) == 0);
#else
return (access(path.c_str(), R_OK) == 0);
bool flag = false;
std::fstream fin;
fin.open(path.c_str(), std::ios::in);
if (fin.is_open()) {
flag = true;
}
fin.close();
return flag;
#endif
}
@@ -343,7 +339,7 @@ std::string makeTmpDirectory()
_wmkdir(ctmp);
return WideToUtf8(ctmp);
#else
char _template_array[] = {"/tmp/libkiwix_XXXXXX"};
char _template_array[] = {"/tmp/kiwix-lib_XXXXXX"};
std::string dir = mkdtemp(_template_array);
return dir;
#endif
@@ -370,7 +366,7 @@ bool copyFile(const std::string& sourcePath, const std::string& destPath)
#endif
}
std::string kiwix::getExecutablePath(bool realPathOnly)
std::string getExecutablePath(bool realPathOnly)
{
if (!realPathOnly) {
char* cAppImage = ::getenv("APPIMAGE");
@@ -424,7 +420,7 @@ bool writeTextFile(const std::string& path, const std::string& content)
return true;
}
std::string kiwix::getCurrentDirectory()
std::string getCurrentDirectory()
{
#ifdef _WIN32
wchar_t* a_cwd = _wgetcwd(NULL, 0);
@@ -438,7 +434,7 @@ std::string kiwix::getCurrentDirectory()
return ret;
}
std::string kiwix::getDataDirectory()
std::string getDataDirectory()
{
// Try to get the dataDir from the `KIWIX_DATA_DIR` env var
#ifdef _WIN32
@@ -507,7 +503,7 @@ static std::map<std::string, std::string> extMimeTypes = {
};
/* Try to get the mimeType from the file extension */
std::string kiwix::getMimeTypeForFile(const std::string& filename)
std::string getMimeTypeForFile(const std::string& filename)
{
std::string mimeType = "text/plain";
auto pos = filename.find_last_of(".");
@@ -528,3 +524,4 @@ std::string kiwix::getMimeTypeForFile(const std::string& filename)
return mimeType;
}

View File

@@ -18,6 +18,7 @@
*/
#include <tools/regexTools.h>
#include <tools/lock.h>
#include <unicode/regex.h>
#include <unicode/ucnv.h>
@@ -25,10 +26,10 @@
#include <memory>
#include <map>
#include <stdexcept>
#include <mutex>
#include <pthread.h>
std::map<std::string, std::shared_ptr<icu::RegexPattern>> regexCache;
static std::mutex regexLock;
static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, icu::UnicodeString& content)
{
@@ -38,7 +39,7 @@ std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, icu::U
pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
// Redo the search with a lock to avoid race condition.
std::lock_guard<std::mutex> l(regexLock);
kiwix::Lock l(&regexLock);
try {
pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
@@ -75,3 +76,22 @@ std::string replaceRegex(const std::string& content,
uresult.toUTF8String(tmp);
return tmp;
}
std::string appendToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement)
{
ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildMatcher(regex, ucontent);
if (matcher->find()) {
UErrorCode status = U_ZERO_ERROR;
ucontent.insert(matcher->end(status), ureplacement);
std::string tmp;
ucontent.toUTF8String(tmp);
return tmp;
}
return content;
}

View File

@@ -17,11 +17,9 @@
* MA 02110-1301, USA.
*/
// Implement function declared in tools.h and tools/stringTools.h
#include "tools.h"
#include "tools/stringTools.h"
#include <tools/stringTools.h>
#include "tools/pathTools.h"
#include <tools/pathTools.h>
#include <unicode/normlzr.h>
#include <unicode/rep.h>
#include <unicode/translit.h>
@@ -49,24 +47,6 @@ void kiwix::loadICUExternalTables()
#endif
}
kiwix::ICULanguageInfo::ICULanguageInfo(const std::string& langCode)
: locale(langCode.c_str())
{}
std::string kiwix::ICULanguageInfo::iso3Code() const
{
return locale.getISO3Language();
}
std::string kiwix::ICULanguageInfo::selfName() const
{
icu::UnicodeString langSelfNameICUString;
locale.getDisplayLanguage(locale, langSelfNameICUString);
std::string langSelfName;
langSelfNameICUString.toUTF8String(langSelfName);
return langSelfName;
}
std::string kiwix::removeAccents(const std::string& text)
{
loadICUExternalTables();
@@ -161,14 +141,15 @@ std::string kiwix::encodeDiples(const std::string& str)
return result;
}
namespace
{
/* urlEncode() based on javascript encodeURI() &
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
bool isReservedUrlChar(char c)
{
switch (c) {
case ';':
case ',':
case '/':
case '?':
case ':':
case '@':
@@ -176,22 +157,22 @@ bool isReservedUrlChar(char c)
case '=':
case '+':
case '$':
case '#':
return true;
default:
return false;
}
}
bool isHarmlessUriChar(char c)
bool needsEscape(char c, bool encodeReserved)
{
if (c >= 'a' && c <= 'z')
return true;
return false;
if (c >= 'A' && c <= 'Z')
return true;
return false;
if (c >= '0' && c <= '9')
return true;
return false;
if (isReservedUrlChar(c))
return encodeReserved;
switch (c) {
case '-':
case '_':
@@ -202,46 +183,8 @@ bool isHarmlessUriChar(char c)
case '\'':
case '(':
case ')':
case '/':
return true;
}
return false;
}
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
{
if (isHarmlessUriChar(c))
return false;
switch (c) {
case '/': // There is no reason to encode the path separator in the general
// case. It must be encoded only in a path component when its
// semantics of a path separator has to be suppressed.
return false;
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
// a non-encoded @ will make "abc" and "def" to be interpreted as
// username and host components, respectively
return target == kiwix::URIComponentKind::PATH;
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
// a non-encoded : will make "abc" and "def" to be interpreted as
// host and port components, respectively
return target == kiwix::URIComponentKind::PATH;
case '?': // A non-encoded '?' acts as a separator between the path
// and query components
return target == kiwix::URIComponentKind::PATH;
case '&': return target == kiwix::URIComponentKind::QUERY;
case '=': return target == kiwix::URIComponentKind::QUERY;
case '+': return target == kiwix::URIComponentKind::QUERY;
case '#': // A non-encoded '#' in either path or query-component
// would mark the beginning of the fragment component
return true;
}
return true;
}
@@ -267,43 +210,23 @@ int hexToInt(char c) {
}
}
} // unnamed namespace
std::string kiwix::urlEncode(const std::string& value)
std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (const char c : value) {
if (isHarmlessUriChar(c)) {
os << c;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {
if (!needsEscape(*it, encodeReserved)) {
os << *it;
} else {
const unsigned int charVal = static_cast<unsigned char>(c);
os << '%' << std::setw(2) << std::setfill('0') << charVal;
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
}
}
return os.str();
}
namespace kiwix
{
std::string uriEncode(URIComponentKind target, const std::string& value)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (const char c : value) {
if ( mustBeUriEncodedFor(target, c) ) {
const unsigned int charVal = static_cast<unsigned char>(c);
os << '%' << std::setw(2) << std::setfill('0') << charVal;
} else {
os << c;
}
}
return os.str();
}
} // namespace kiwix
std::string kiwix::urlDecode(const std::string& value, bool component)
{
std::ostringstream os;
@@ -324,15 +247,15 @@ std::string kiwix::urlDecode(const std::string& value, bool component)
int iHi = hexToInt(hi);
int iLo = hexToInt(lo);
if (iHi < 0 || iLo < 0) {
// Invalid escape sequence
os << '%' << hi << lo;
continue;
// Invalid escape sequence
os << '%' << hi << lo;
continue;
}
char c = (char)(iHi << 4 | iLo);
if (!component && isReservedUrlChar(c)) {
os << '%' << hi << lo;
os << '%' << hi << lo;
} else {
os << c;
os << c;
}
} else {
os << *it;
@@ -345,8 +268,7 @@ std::string kiwix::urlDecode(const std::string& value, bool component)
/* Split string in a token array */
std::vector<std::string> kiwix::split(const std::string& str,
const std::string& delims,
bool dropEmpty,
bool keepDelim)
bool trimEmpty)
{
std::string::size_type lastPos = 0;
std::string::size_type pos = 0;
@@ -354,17 +276,14 @@ std::vector<std::string> kiwix::split(const std::string& str,
while( (pos = str.find_first_of(delims, lastPos)) < str.length() )
{
auto token = str.substr(lastPos, pos - lastPos);
if (!dropEmpty || !token.empty()) {
if (!trimEmpty || !token.empty()) {
tokens.push_back(token);
}
if (keepDelim) {
tokens.push_back(str.substr(pos, 1));
}
lastPos = pos + 1;
}
auto token = str.substr(lastPos);
if (!dropEmpty || !token.empty()) {
if (!trimEmpty || !token.empty()) {
tokens.push_back(token);
}
return tokens;
@@ -472,16 +391,3 @@ bool kiwix::startsWith(const std::string& base, const std::string& start)
&& std::equal(start.begin(), start.end(), base.begin());
}
std::vector<std::string> kiwix::getTitleVariants(const std::string& title) {
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}
template<>
std::string kiwix::extractFromString(const std::string& str) {
return str;
}

View File

@@ -1,77 +0,0 @@
/*
* Copyright 2021 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <iostream>
#include <sstream>
#include <version.h>
#include <zim/zim.h>
#include <kiwix_config.h>
#include <unicode/uversion.h>
#include <pugixml.hpp>
#include <curl/curl.h>
#include <microhttpd.h>
#include <xapian.h>
#include <mustache.hpp>
#include <zlib.h>
namespace kiwix
{
LibVersions getVersions() {
LibVersions versions = {
{ "libkiwix", LIBKIWIX_VERSION },
{ "libzim", LIBZIM_VERSION },
{ "libxapian", XAPIAN_VERSION },
{ "libcurl", LIBCURL_VERSION },
{ "libmicrohttpd", MHD_get_version() },
{ "libz", ZLIB_VERSION }
};
// U_ICU_VERSION does not include the patch level if 0
std::ostringstream libicu_version;
libicu_version << U_ICU_VERSION_MAJOR_NUM << "." << U_ICU_VERSION_MINOR_NUM << "." << U_ICU_VERSION_PATCHLEVEL_NUM;
versions.push_back({ "libicu", libicu_version.str() });
// No human readable version string for pugixml
const unsigned pugixml_major = (PUGIXML_VERSION - PUGIXML_VERSION % 1000) / 1000;
const unsigned pugixml_minor = (PUGIXML_VERSION - pugixml_major * 1000 - PUGIXML_VERSION % 10) / 10;
const unsigned pugixml_patch = PUGIXML_VERSION - pugixml_major * 1000 - pugixml_minor * 10;
std::ostringstream libpugixml_version;
libpugixml_version << pugixml_major << "." << pugixml_minor << "." << pugixml_patch;
versions.push_back({ "libpugixml", libpugixml_version.str() });
// Needs version 5.0 of Mustache
#if defined(KAINJOW_MUSTACHE_VERSION_MAJOR)
std::ostringstream libmustache_version;
libmustache_version << KAINJOW_MUSTACHE_VERSION_MAJOR << "." <<
KAINJOW_MUSTACHE_VERSION_MINOR << "." << KAINJOW_MUSTACHE_VERSION_PATCH;
versions.push_back({ "libmustache", libmustache_version.str() });
#endif
return versions;
}
void printVersions(std::ostream& out) {
LibVersions versions = getVersions();
for (const auto& iter : versions) {
out << (iter != versions.front() ? "+ " : "")
<< iter.first << " " << iter.second << std::endl;
}
}
} //namespace kiwix

View File

@@ -0,0 +1,13 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="kiwix.org.kiwixlib"
>
<application android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true"
>
</application>
</manifest>

87
src/wrapper/java/book.cpp Normal file
View File

@@ -0,0 +1,87 @@
/*
* Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include "org_kiwix_kiwixlib_Book.h"
#include "utils.h"
#include "book.h"
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Book_allocate(
JNIEnv* env, jobject thisObj)
{
allocate<kiwix::Book>(env, thisObj);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Book_dispose(JNIEnv* env, jobject thisObj)
{
dispose<kiwix::Book>(env, thisObj);
}
#define BOOK (getPtr<kiwix::Book>(env, thisObj))
METHOD(void, Book, update__Lorg_kiwix_kiwixlib_Book_2, jobject otherBook)
{
BOOK->update(*getPtr<kiwix::Book>(env, otherBook));
}
METHOD(void, Book, update__Lorg_kiwix_kiwixlib_JNIKiwixReader_2, jobject reader)
{
BOOK->update(**Handle<kiwix::Reader>::getHandle(env, reader));
}
#define GETTER(retType, name) JNIEXPORT retType JNICALL \
Java_org_kiwix_kiwixlib_Book_##name (JNIEnv* env, jobject thisObj) \
{ \
auto cRet = BOOK->name(); \
retType ret = c2jni(cRet, env); \
return ret; \
}
GETTER(jstring, getId)
GETTER(jstring, getPath)
GETTER(jboolean, isPathValid)
GETTER(jstring, getTitle)
GETTER(jstring, getDescription)
GETTER(jstring, getLanguage)
GETTER(jstring, getCreator)
GETTER(jstring, getPublisher)
GETTER(jstring, getDate)
GETTER(jstring, getUrl)
GETTER(jstring, getName)
GETTER(jstring, getFlavour)
GETTER(jstring, getTags)
GETTER(jlong, getArticleCount)
GETTER(jlong, getMediaCount)
GETTER(jlong, getSize)
GETTER(jstring, getFavicon)
GETTER(jstring, getFaviconUrl)
GETTER(jstring, getFaviconMimeType)
METHOD(jstring, Book, getTagStr, jstring tagName) try {
auto cRet = BOOK->getTagStr(jni2c(tagName, env));
return c2jni(cRet, env);
} catch(...) {
return c2jni<std::string>("", env);
}
#undef GETTER

View File

@@ -0,0 +1,63 @@
/*
* Copyright (C) 2019-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include "org_kiwix_kiwixlib_Filter.h"
#include "library.h"
#include "utils.h"
/* Kiwix Reader JNI functions */
METHOD0(void, Filter, allocate) {
allocate<kiwix::Filter>(env, thisObj);
}
METHOD0(void, Filter, dispose) {
dispose<kiwix::Library>(env, thisObj);
}
#define FILTER (getPtr<kiwix::Filter>(env, thisObj))
#define FORWARD(name, args_type) \
METHOD(jobject, Filter, name, args_type value) { \
FILTER->name(jni2c(value, env)); \
return thisObj; \
}
#define FORWARDA(name, args_type) \
METHOD(jobject, Filter, name, jobjectArray value) { \
FILTER->name(jni2c<args_type>(value, env)); \
return thisObj; \
}
FORWARD(local, jboolean)
FORWARD(remote, jboolean)
FORWARD(valid, jboolean)
FORWARDA(acceptTags, jstring)
FORWARDA(rejectTags, jstring)
FORWARD(lang, jstring)
FORWARD(publisher, jstring)
FORWARD(creator, jstring)
FORWARD(maxSize, jlong)
FORWARD(query, jstring)

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include "org_kiwix_kiwixlib_JNIICU.h"
#include <iostream>
#include <string>
#include "unicode/putil.h"
#include "utils.h"
#if __ANDROID__
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
#else
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
#endif
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIICU_setDataDirectory(
JNIEnv* env, jclass kclass, jstring dirStr)
{
std::string cPath = jni2c(dirStr, env);
Lock l;
try {
u_setDataDirectory(cPath.c_str());
} catch (...) {
std::cerr << "Unable to set data directory " << cPath << std::endl;
}
}

View File

@@ -0,0 +1,496 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include <zim/file.h>
#include <exception>
#include "org_kiwix_kiwixlib_JNIKiwixReader.h"
#include "tools/base64.h"
#include "reader.h"
#include "utils.h"
/* Kiwix Reader JNI functions */
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
JNIEnv* env, jobject obj, jstring filename)
{
std::string cPath = jni2c(filename, env);
LOG("Attempting to create reader with: %s", cPath.c_str());
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(cPath);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
{
Handle<kiwix::Reader>::dispose(env, obj);
}
#define READER (Handle<kiwix::Reader>::getHandle(env, obj))
/* Kiwix library functions */
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj)
{
jstring url;
try {
std::string cUrl = READER->getMainPage().getPath();
url = c2jni(cUrl, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM main page");
LOG(e.what());
url = NULL;
}
return url;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getId(JNIEnv* env, jobject obj)
{
jstring id;
try {
std::string cId = READER->getId();
id = c2jni(cId, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM id");
LOG(e.what());
id = NULL;
}
return id;
}
JNIEXPORT jint JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFileSize(JNIEnv* env, jobject obj)
{
jint size = 0;
try {
int cSize = READER->getFileSize();
size = c2jni(cSize, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM file size");
LOG(e.what());
}
return size;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getCreator(JNIEnv* env, jobject obj)
{
jstring creator;
try {
std::string cCreator = READER->getCreator();
creator = c2jni(cCreator, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM creator");
LOG(e.what());
creator = NULL;
}
return creator;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPublisher(JNIEnv* env, jobject obj)
{
jstring publisher;
try {
std::string cPublisher = READER->getPublisher();
publisher = c2jni(cPublisher, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM publish");
LOG(e.what());
publisher = NULL;
}
return publisher;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getName(JNIEnv* env, jobject obj)
{
jstring name;
try {
std::string cName = READER->getName();
name = c2jni(cName, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM name");
LOG(e.what());
name = NULL;
}
return name;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFavicon(JNIEnv* env, jobject obj)
{
jstring favicon;
try {
std::string cContent;
std::string cMime;
READER->getFavicon(cContent, cMime);
favicon = c2jni(
base64_encode(cContent),
env);
} catch (std::exception& e) {
LOG("Unable to get ZIM favicon");
LOG(e.what());
favicon = NULL;
}
return favicon;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDate(JNIEnv* env, jobject obj)
{
jstring date;
try {
std::string cDate = READER->getDate();
date = c2jni(cDate, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM date");
LOG(e.what());
date = NULL;
}
return date;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getLanguage(JNIEnv* env, jobject obj)
{
jstring language;
try {
std::string cLanguage = READER->getLanguage();
language = c2jni(cLanguage, env);
} catch (std::exception& e) {
LOG("Unable to get ZIM language");
LOG(e.what());
language = NULL;
}
return language;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType(
JNIEnv* env, jobject obj, jstring url)
{
jstring mimeType;
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
auto cMimeType = entry.getMimetype();
mimeType = c2jni(cMimeType, env);
} catch (std::exception& e) {
LOG("Unable to get mime-type for url: %s", cUrl.c_str());
LOG(e.what());
mimeType = NULL;
}
return mimeType;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_checkUrl(
JNIEnv* env, jobject obj, jstring url)
{
jstring finalUrl;
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
finalUrl = c2jni(entry.getPath(), env);
} catch (std::exception& e) {
finalUrl = c2jni(std::string(), env);
}
return finalUrl;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent(
JNIEnv* env, jobject obj, jobject url, jobject titleObj, jobject mimeTypeObj, jobject sizeObj)
{
/* Default values */
setStringObjValue("", titleObj, env);
setStringObjValue("", mimeTypeObj, env);
setIntObjValue(0, sizeObj, env);
jbyteArray data = env->NewByteArray(0);
/* Retrieve the content */
std::string cUrl = getStringObjValue(url, env);
unsigned int cSize = 0;
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
bool isRedirect = entry.isRedirect();
entry = entry.getFinalEntry();
cSize = entry.getSize();
setIntObjValue(cSize, sizeObj, env);
setStringObjValue(entry.getMimetype(), mimeTypeObj, env);
setStringObjValue(entry.getTitle(), titleObj, env);
if (isRedirect) {
setStringObjValue(entry.getPath(), url, env);
} else {
data = env->NewByteArray(cSize);
env->SetByteArrayRegion(
data, 0, cSize, reinterpret_cast<const jbyte*>(entry.getBlob().data()));
}
} catch (std::exception& e) {
LOG("Unable to get content for url: %s", cUrl.c_str());
LOG(e.what());
}
return data;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPart(
JNIEnv* env, jobject obj, jstring url, jint offset, jint len, jobject sizeObj)
{
jbyteArray data = env->NewByteArray(0);
setIntObjValue(0, sizeObj, env);
/* Default values */
/* Retrieve the content */
std::string cUrl = jni2c(url, env);
unsigned int cOffset = jni2c(offset, env);
unsigned int cLen = jni2c(len, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
if (cLen == 0) {
setIntObjValue(entry.getSize(), sizeObj, env);
} else if (cOffset+cLen < entry.getSize()) {
auto blob = entry.getBlob(cOffset, cLen);
data = env->NewByteArray(cLen);
env->SetByteArrayRegion(
data, 0, cLen, reinterpret_cast<const jbyte*>(blob.data()));
setIntObjValue(cLen, sizeObj, env);
}
} catch (std::exception& e) {
LOG("Unable to get partial content for url: %s (%u : %u)", cUrl.c_str(), cOffset, cLen);
LOG(e.what());
}
return data;
}
JNIEXPORT jlong JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getArticleSize(
JNIEnv* env, jobject obj, jstring url)
{
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
return c2jni(entry.getSize(), env);
} catch(std::exception& e) {
LOG("Unable to get size for url : %s", cUrl.c_str());
LOG(e.what());
}
return c2jni(0, env);
}
JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
JNIEnv* env, jobject obj, jstring url)
{
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair");
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V");
jobject pair = env->NewObject(classPair, midPairinit);
setPairObjValue("", 0, pair, env);
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
auto part_info = entry.getDirectAccessInfo();
setPairObjValue(part_info.first, part_info.second, pair, env);
} catch (std::exception& e) {
LOG("Unable to get direct access info for url: %s", cUrl.c_str());
LOG(e.what());
}
return pair;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
jobject obj,
jstring prefix,
jint count)
{
jboolean retVal = JNI_FALSE;
std::string cPrefix = jni2c(prefix, env);
unsigned int cCount = jni2c(count, env);
try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
retVal = JNI_TRUE;
}
#pragma GCC diagnostic pop
} catch (std::exception& e) {
LOG("Unable to get search results for pattern: %s", cPrefix.c_str());
LOG(e.what());
}
return retVal;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
jobject obj,
jobject titleObj,
jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
std::string cUrl;
try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->getNextSuggestion(cTitle, cUrl)) {
setStringObjValue(cTitle, titleObj, env);
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
}
#pragma GCC diagnostic pop
} catch (std::exception& e) {
LOG("Unable to get next suggestion");
LOG(e.what());
}
return retVal;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env,
jobject obj,
jstring title,
jobject urlObj)
{
std::string cTitle = jni2c(title, env);
try {
auto entry = READER->getEntryFromTitle(cTitle);
entry = entry.getFinalEntry();
setStringObjValue(entry.getPath(), urlObj, env);
return JNI_TRUE;
} catch (std::exception& e) {
LOG("Unable to get url for title %s: ", cTitle.c_str());
LOG(e.what());
}
return JNI_FALSE;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle(
JNIEnv* env, jobject obj)
{
jstring title;
try {
std::string cTitle = READER->getTitle();
title = c2jni(cTitle, env);
} catch (std::exception& e) {
LOG("Unable to get zim title");
LOG(e.what());
title = NULL;
}
return title;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDescription(JNIEnv* env, jobject obj)
{
jstring description;
try {
std::string cDescription = READER->getDescription();
description = c2jni(cDescription, env);
} catch (std::exception& e) {
LOG("Unable to get zim description");
LOG(e.what());
description = NULL;
}
return description;
}
JNIEXPORT jint JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getArticleCount(JNIEnv* env, jobject obj)
{
jint articleCount = 0;
try {
auto cArticleCount = READER->getArticleCount();
articleCount = c2jni(cArticleCount, env);
} catch (std::exception& e) {
LOG("Unable to get article count.");
LOG(e.what());
}
return articleCount;
}
JNIEXPORT jint JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getMediaCount(JNIEnv* env, jobject obj)
{
jint mediaCount = 0;
try {
auto cMediaCount = READER->getMediaCount();
mediaCount = c2jni(cMediaCount, env);
} catch (std::exception& e) {
LOG("Unable to get media count.");
LOG(e.what());
}
return mediaCount;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage(
JNIEnv* env, jobject obj, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cUrl;
try {
std::string cUrl = READER->getRandomPage().getPath();
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
} catch (std::exception& e) {
LOG("Unable to get random page");
LOG(e.what());
}
return retVal;
}

View File

@@ -0,0 +1,124 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixSearcher.h"
#include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h"
#include "reader.h"
#include "searcher.h"
#include "utils.h"
#define SEARCHER (Handle<kiwix::Searcher>::getHandle(env, obj))
#define RESULT (Handle<kiwix::Result>::getHandle(env, obj))
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_dispose(JNIEnv* env, jobject obj)
{
Handle<kiwix::Searcher>::dispose(env, obj);
}
/* Kiwix Reader JNI functions */
JNIEXPORT jlong JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNativeHandle(JNIEnv* env,
jobject obj)
{
kiwix::Searcher* searcher = new kiwix::Searcher();
return reinterpret_cast<jlong>(new Handle<kiwix::Searcher>(searcher));
}
/* Kiwix library functions */
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_addReader(
JNIEnv* env, jobject obj, jobject reader)
{
auto searcher = SEARCHER;
searcher->add_reader(*(Handle<kiwix::Reader>::getHandle(env, reader)));
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_search(
JNIEnv* env, jobject obj, jstring query, jint count)
{
std::string cquery = jni2c(query, env);
unsigned int ccount = jni2c(count, env);
SEARCHER->search(cquery, 0, ccount);
}
JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNextResult(JNIEnv* env,
jobject obj)
{
jobject result = nullptr;
kiwix::Result* cresult = SEARCHER->getNextResult();
if (cresult != nullptr) {
jclass resultclass
= env->FindClass("org/kiwix/kiwixlib/JNIKiwixSearcher$Result");
jmethodID ctor = env->GetMethodID(
resultclass, "<init>", "(Lorg/kiwix/kiwixlib/JNIKiwixSearcher;JLorg/kiwix/kiwixlib/JNIKiwixSearcher;)V");
result = env->NewObject(resultclass, ctor, obj, reinterpret_cast<jlong>(new Handle<kiwix::Result>(cresult)), obj);
}
return result;
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_dispose(
JNIEnv* env, jobject obj)
{
Handle<kiwix::Result>::dispose(env, obj);
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getUrl(JNIEnv* env,
jobject obj)
{
try {
return c2jni(RESULT->get_url(), env);
} catch (...) {
return nullptr;
}
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getTitle(JNIEnv* env,
jobject obj)
{
try {
return c2jni(RESULT->get_title(), env);
} catch (...) {
return nullptr;
}
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getSnippet(JNIEnv* env,
jobject obj)
{
return c2jni(RESULT->get_snippet(), env);
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getContent(JNIEnv* env,
jobject obj)
{
return c2jni(RESULT->get_content(), env);
}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixServer.h"
#include "tools/base64.h"
#include "server.h"
#include "utils.h"
/* Kiwix Reader JNI functions */
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixServer_getNativeServer(
JNIEnv* env, jobject obj, jobject jLibrary)
{
LOG("Attempting to create server");
Lock l;
try {
auto library = getPtr<kiwix::Library>(env, jLibrary);
kiwix::Server* server = new kiwix::Server(library);
return reinterpret_cast<jlong>(new Handle<kiwix::Server>(server));
} catch (std::exception& e) {
LOG("Error creating the server");
LOG(e.what());
return 0;
}
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_dispose(JNIEnv* env, jobject obj)
{
Handle<kiwix::Server>::dispose(env, obj);
}
#define SERVER (Handle<kiwix::Server>::getHandle(env, obj))
/* Kiwix library functions */
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setRoot(JNIEnv* env, jobject obj, jstring jRoot)
{
std::string root = jni2c(jRoot, env);
SERVER->setRoot(root);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setAddress(JNIEnv* env, jobject obj, jstring jAddress)
{
std::string address = jni2c(jAddress, env);
SERVER->setAddress(address);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setPort(JNIEnv* env, jobject obj, int port)
{
SERVER->setPort(port);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setNbThreads(JNIEnv* env, jobject obj, int threads)
{
SERVER->setNbThreads(threads);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setTaskbar(JNIEnv* env, jobject obj, jboolean withTaskbar, jboolean withLibraryButton)
{
SERVER->setTaskbar(withTaskbar, withLibraryButton);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setBlockExternalLinks(JNIEnv* env, jobject obj, jboolean blockExternalLinks)
{
SERVER->setBlockExternalLinks(blockExternalLinks);
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_start(JNIEnv* env, jobject obj)
{
return SERVER->start();
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_stop(JNIEnv* env, jobject obj)
{
SERVER->stop();
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright (C) 2019-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include "org_kiwix_kiwixlib_Library.h"
#include "library.h"
#include "reader.h"
#include "utils.h"
/* Kiwix Reader JNI functions */
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Library_allocate(
JNIEnv* env, jobject thisObj)
{
allocate<kiwix::Library>(env, thisObj);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Library_dispose(JNIEnv* env, jobject thisObj)
{
dispose<kiwix::Library>(env, thisObj);
}
#define LIBRARY (getPtr<kiwix::Library>(env, thisObj))
/* Kiwix library functions */
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_Library_addBook(
JNIEnv* env, jobject thisObj, jstring path)
{
auto cPath = jni2c(path, env);
try {
kiwix::Reader reader(cPath);
kiwix::Book book;
book.update(reader);
return LIBRARY->addBook(book);
} catch (std::exception& e) {
LOG("Unable to add the book");
LOG(e.what()); }
return false;
}
METHOD(jobject, Library, getBookById, jstring id) {
auto cId = jni2c(id, env);
auto cBook = new kiwix::Book(LIBRARY->getBookById(cId));
jclass cls = env->FindClass("org/kiwix/kiwixlib/Book");
jmethodID constructorId = env->GetMethodID(cls, "<init>", "()V");
jobject book = env->NewObject(cls, constructorId);
setPtr(env, book, cBook);
return book;
}
METHOD(jint, Library, getBookCount, jboolean localBooks, jboolean remoteBooks) {
return LIBRARY->getBookCount(localBooks, remoteBooks);
}
METHOD0(jobjectArray, Library, getBooksIds) {
return c2jni(LIBRARY->getBooksIds(), env);
}
METHOD(jobjectArray, Library, filter, jobject filterObj) {
auto filter = getPtr<kiwix::Filter>(env, filterObj);
return c2jni(LIBRARY->filter(*filter), env);
}
METHOD0(jobjectArray, Library, getBooksLanguages) {
return c2jni(LIBRARY->getBooksLanguages(), env);
}
METHOD0(jobjectArray, Library, getBooksCreators) {
return c2jni(LIBRARY->getBooksCreators(), env);
}
METHOD0(jobjectArray, Library, getBooksPublisher) {
return c2jni(LIBRARY->getBooksPublishers(), env);
}

View File

@@ -0,0 +1,132 @@
/*
* Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <jni.h>
#include <zim/file.h>
#include "org_kiwix_kiwixlib_Manager.h"
#include "manager.h"
#include "utils.h"
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Manager_allocate(
JNIEnv* env, jobject thisObj, jobject libraryObj)
{
auto lib = getPtr<kiwix::Library>(env, libraryObj);
allocate<kiwix::Manager>(env, thisObj, lib);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_Manager_dispose(JNIEnv* env, jobject thisObj)
{
dispose<kiwix::Manager>(env, thisObj);
}
#define MANAGER (getPtr<kiwix::Manager>(env, thisObj))
/* Kiwix manager functions */
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_Manager_readFile(
JNIEnv* env, jobject thisObj, jstring path)
{
auto cPath = jni2c(path, env);
try {
return MANAGER->readFile(cPath);
} catch (std::exception& e) {
LOG("Unable to get readFile");
LOG(e.what());
}
return false;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_Manager_readXml(
JNIEnv* env, jobject thisObj, jstring content, jstring libraryPath)
{
auto cContent = jni2c(content, env);
auto cPath = jni2c(libraryPath, env);
try {
return MANAGER->readXml(cContent, false, cPath);
} catch (std::exception& e) {
LOG("Unable to get ZIM id");
LOG(e.what());
}
return false;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_Manager_readOpds(
JNIEnv* env, jobject thisObj, jstring content, jstring urlHost)
{
auto cContent = jni2c(content, env);
auto cUrl = jni2c(urlHost, env);
try {
return MANAGER->readOpds(cContent, cUrl);
} catch (std::exception& e) {
LOG("Unable to get ZIM id");
LOG(e.what());
}
return false;
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_Manager_readBookmarkFile(
JNIEnv* env, jobject thisObj, jstring path)
{
auto cPath = jni2c(path, env);
try {
return MANAGER->readBookmarkFile(cPath);
} catch (std::exception& e) {
LOG("Unable to get ZIM id");
LOG(e.what());
}
return false;
}
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_Manager_addBookFromPath(
JNIEnv* env, jobject thisObj,
jstring pathToOpen, jstring pathToSave, jstring url, jboolean checkMetaData)
{
auto cPathToOpen = jni2c(pathToOpen, env);
auto cPathToSave = jni2c(pathToSave, env);
auto cUrl = jni2c(url, env);
jstring id = NULL;
try {
auto cId = MANAGER->addBookFromPathAndGetId(cPathToOpen, cPathToSave, cUrl, checkMetaData);
if ( !cId.empty() ) {
id = c2jni(cId, env);
}
} catch (std::exception& e) {
LOG("Unable to get ZIM file size");
LOG(e.what());
}
return id;
}

View File

@@ -0,0 +1,55 @@
java_sources = files([
'org/kiwix/kiwixlib/JNIICU.java',
'org/kiwix/kiwixlib/Book.java',
'org/kiwix/kiwixlib/JNIKiwixReader.java',
'org/kiwix/kiwixlib/Library.java',
'org/kiwix/kiwixlib/Manager.java',
'org/kiwix/kiwixlib/Filter.java',
'org/kiwix/kiwixlib/JNIKiwixSearcher.java',
'org/kiwix/kiwixlib/JNIKiwixServer.java',
'org/kiwix/kiwixlib/JNIKiwixInt.java',
'org/kiwix/kiwixlib/JNIKiwixString.java',
'org/kiwix/kiwixlib/JNIKiwixBool.java',
'org/kiwix/kiwixlib/JNIKiwixException.java',
'org/kiwix/kiwixlib/Pair.java'
])
kiwix_jni = custom_target('jni',
input: java_sources,
output: ['org_kiwix_kiwixlib_JNIKiwix.h',
'org_kiwix_kiwixlib_Book.h',
'org_kiwix_kiwixlib_JNIKiwixReader.h',
'org_kiwix_kiwixlib_Library.h',
'org_kiwix_kiwixlib_Manager.h',
'org_kiwix_kiwixlib_Filter.h',
'org_kiwix_kiwixlib_JNIKiwixServer.h',
'org_kiwix_kiwixlib_JNIKiwixSearcher.h',
'org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h'],
command:['javac', '-d', '@OUTDIR@', '-h', '@OUTDIR@', '@INPUT@']
)
jni_sources = files([
'kiwixicu.cpp',
'book.cpp',
'kiwixreader.cpp',
'library.cpp',
'manager.cpp',
'filter.cpp',
'kiwixsearcher.cpp',
'kiwixserver.cpp',
])
kiwix_sources += jni_sources + [kiwix_jni]
if 'java' in wrapper
kiwix_jar = jar('kiwixlib', java_sources)
#junit_jar = files('org/kiwix/testing/junit-4.13.jar')
#test_jar = jar('testing', 'org/kiwix/testing/test.java',
# link_with: [kiwix_jar, junit_jar])
#test('javatest', test_jar)
endif
install_subdir('org', install_dir: 'kiwix-lib/java', exclude_directories: ['kiwix/testing'])
install_subdir('res', install_dir: 'kiwix-lib')
install_data('AndroidManifest.xml', install_dir: 'kiwix-lib')

Some files were not shown because too many files have changed in this diff Show More