Compare commits

..

1 Commits

Author SHA1 Message Date
Matthieu Gautier
e2e42ac65c Patch httplib.h for gcc maybe-uninitialized warning.
Recent version of gcc warn about the use `buf` where it may be
uninitialized.
But here, buf is used as a output buffer to initialize it.

I'm not sure we can fix this differently than ignoring the warning.
2021-06-30 17:30:30 +02:00
338 changed files with 34785 additions and 28136 deletions

27
.github/move.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
# Configuration for Move Issues - https://github.com/dessant/move-issues
# Delete the command comment when it contains no other content
deleteCommand: true
# Close the source issue after moving
closeSourceIssue: true
# Lock the source issue after moving
lockSourceIssue: false
# Mention issue and comment authors
mentionAuthors: true
# Preserve mentions in the issue content
keepContentMentions: true
# Move labels that also exist on the target repository
moveLabels: true
# Set custom aliases for targets
# aliases:
# r: repo
# or: owner/repo
# Repository to extend settings from
# _extends: repo

View File

@@ -1,200 +1,163 @@
name: CI
on:
push:
branches:
- main
pull_request:
on: [push]
jobs:
macOS:
strategy:
fail-fast: false
matrix:
target:
- macos-aarch64-dyn
- macos-x86_64-dyn
- ios-arm64-dyn
- ios-x86_64-dyn
include:
- target: macos-aarch64-dyn
arch_name: arm64-apple-macos
run_test: true
- target: macos-x86_64-dyn
arch_name: x86_64-apple-darwin
run_test: true
- target: ios-arm64-dyn
arch_name: aarch64-apple-ios
run_test: false
- target: ios-x86_64-dyn
arch_name: x86-apple-ios-simulator
run_test: false
runs-on: macos-15
env:
HOME: /Users/runner
Macos:
runs-on: macos-latest
steps:
- name: Retrieve source code
uses: actions/checkout@v4
- name: Checkout code
uses: actions/checkout@v1
- name: Setup python 3.5
uses: actions/setup-python@v1
with:
python-version: '3.5'
- name: Install packages
run: |
brew update
brew install ninja meson
- name: Install dependencies
uses: kiwix/kiwix-build/actions/dl_deps_archive@main
with:
target_platform: ${{ matrix.target }}
- name: Compile
env:
PKG_CONFIG_PATH: ${{env.HOME}}/BUILD_${{matrix.arch_name}}/INSTALL/lib/pkgconfig
CPPFLAGS: -I${{env.HOME}}/BUILD_${{matrix.arch_name}}/INSTALL/include
MESON_OPTION: --default-library=shared -Db_coverage=true
MESON_CROSSFILE: ${{env.HOME}}/BUILD_${{matrix.arch_name}}/meson_cross_file.txt
brew install gcovr pkg-config ninja
- name: Install python modules
run: pip3 install meson==0.49.2 pytest
- name: Install deps
shell: bash
run: |
if [ -e $MESON_CROSSFILE ]; then
MESON_OPTION="$MESON_OPTION --cross-file $MESON_CROSSFILE -Dstatic-linkage=true"
fi
meson . build ${MESON_OPTION}
ninja -C build
- name: Test libkiwix
if: matrix.run_test
ARCHIVE_NAME=deps2_osx_native_dyn_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C $HOME
- name: Compile
shell: bash
run: |
export PKG_CONFIG_PATH=$HOME/BUILD_native_dyn/INSTALL/lib/pkgconfig
export CPPFLAGS="-I$HOME/BUILD_native_dyn/INSTALL/include"
meson . build --default-library=shared -Db_coverage=true
cd build
ninja
- name: Test
shell: bash
run: |
export LD_LIBRARY_PATH=$HOME/BUILD_native_dyn/INSTALL/lib:$HOME/BUILD_native_dyn/INSTALL/lib64
cd build
meson test --verbose
ninja coverage
env:
SKIP_BIG_MEMORY_TEST: 1
LD_LIBRARY_PATH: ${{env.HOME}}/BUILD_${{matrix.arch_name}}/INSTALL/lib:${{env.HOME}}/BUILD_${{matrix.arch_name}}/INSTALL/lib64
run: meson test -C build --verbose
Windows:
runs-on: windows-2022
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install packages
run:
choco install pkgconfiglite ninja
- name: Install python modules
run: pip3 install meson
- name: Setup MSVC compiler
uses: bus1/cabuild/action/msdevshell@v1
with:
architecture: x64
- name: Install dependencies
uses: kiwix/kiwix-build/actions/dl_deps_archive@main
with:
target_platform: win-x86_64-static
- name: Compile
shell: cmd
- name: Publish coverage
shell: bash
run: |
set PKG_CONFIG_PATH=%cd%\BUILD_win-amd64\INSTALL\lib\pkgconfig
set CPPFLAGS=-I%cd%\BUILD_win-amd64\INSTALL\include
meson.exe setup . build -Dwerror=false --default-library=static --buildtype=release
cd build
ninja.exe
- name: Test
shell: cmd
run: |
cd build
meson.exe test --verbose
curl https://codecov.io/bash -o codecov.sh
bash codecov.sh -n osx_native_dyn -Z
rm codecov.sh
env:
WAIT_TIME_FACTOR_TEST: 10
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
Linux:
strategy:
fail-fast: false
matrix:
target:
- linux-x86_64-static
- linux-x86_64-dyn
- android-arm
- android-arm64
image_variant: ['jammy']
name:
- native_static
- native_dyn
- native_dyn_bionic
- android_arm
- android_arm64
- win32_static
- win32_dyn
include:
- target: linux-x86_64-static
- name: native_static
target: native_static
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
arch_name: linux-x86_64
run_test: true
coverage: true
- target: linux-x86_64-dyn
- name: native_dyn
target: native_dyn
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
arch_name: linux-x86_64
run_test: true
coverage: true
- target: android-arm
lib_postfix: '/arm-linux-androideabi'
arch_name: arm-linux-androideabi
run_test: false
coverage: false
- target: android-arm64
lib_postfix: '/aarch64-linux-android'
arch_name: aarch64-linux-android
run_test: false
coverage: false
- name: native_dyn_bionic
target: native_dyn
image_variant: bionic
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm
target: android_arm
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm64
target: android_arm64
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: win32_static
target: win32_static
image_variant: f31
lib_postfix: '64'
- name: win32_dyn
target: win32_dyn
image_variant: f31
lib_postfix: '64'
env:
HOME: /home/runner
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
container:
image: "ghcr.io/kiwix/kiwix-build_ci_${{matrix.image_variant}}:2025-06-07"
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-26"
steps:
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
uses: kiwix/kiwix-build/actions/dl_deps_archive@main
with:
target_platform: ${{ matrix.target }}
shell: python
run: |
from subprocess import check_call
from os import environ
command = [
'git', 'clone',
'https://github.com/${{github.repository}}',
'--depth=1',
'--branch', '${{steps.extract_branch.outputs.branch}}'
]
check_call(command, cwd=environ['HOME'])
- name: Install deps
shell: bash
run: |
ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C /home/runner
- name: Compile
shell: bash
run: |
meson --version
if [[ "${{matrix.target}}" =~ .*-dyn ]]; then
if [[ "${{matrix.target}}" =~ .*_dyn ]]; then
MESON_OPTION="--default-library=shared"
else
MESON_OPTION="--default-library=static"
fi
if [ -e "${{env.HOME}}/BUILD_${{matrix.arch_name}}/meson_cross_file.txt" ]; then
MESON_OPTION="$MESON_OPTION --cross-file ${{env.HOME}}/BUILD_${{matrix.arch_name}}/meson_cross_file.txt"
else
if [[ "${{matrix.target}}" =~ native_.* ]]; then
MESON_OPTION="$MESON_OPTION -Db_coverage=true"
else
MESON_OPTION="$MESON_OPTION --cross-file $HOME/BUILD_${{matrix.target}}/meson_cross_file.txt"
fi
if [[ "${{matrix.target}}" =~ android-.* ]]; then
MESON_OPTION="$MESON_OPTION -Dstatic-linkage=true"
if [[ "${{matrix.target}}" =~ android_.* ]]; then
MESON_OPTION="$MESON_OPTION -Dandroid=true"
fi
cd $HOME/libkiwix
meson . build ${MESON_OPTION}
cd build
ninja
env:
PKG_CONFIG_PATH: "/home/runner/BUILD_${{matrix.arch_name}}/INSTALL/lib/pkgconfig:/home/runner/BUILD_${{matrix.arch_name}}/INSTALL/lib${{matrix.lib_postfix}}/pkgconfig"
CPPFLAGS: "-I/home/runner/BUILD_${{matrix.arch_name}}/INSTALL/include"
PKG_CONFIG_PATH: "/home/runner/BUILD_${{matrix.target}}/INSTALL/lib/pkgconfig:/home/runner/BUILD_${{matrix.target}}/INSTALL/lib${{matrix.lib_postfix}}/pkgconfig"
CPPFLAGS: "-I/home/runner/BUILD_${{matrix.target}}/INSTALL/include"
- name: Test
if: matrix.run_test
if: startsWith(matrix.target, 'native_')
shell: bash
run: |
cd build
cd $HOME/libkiwix/build
meson test --verbose
if [[ "${{matrix.coverage}}" = "true" ]]; then
ninja coverage
fi
ninja coverage
env:
LD_LIBRARY_PATH: "/home/runner/BUILD_${{matrix.arch_name}}/INSTALL/lib:/home/runner/BUILD_${{matrix.arch_name}}/INSTALL/lib${{matrix.lib_postfix}}"
LD_LIBRARY_PATH: "/home/runner/BUILD_${{matrix.target}}/INSTALL/lib:/home/runner/BUILD_${{matrix.target}}/INSTALL/lib${{matrix.lib_postfix}}"
SKIP_BIG_MEMORY_TEST: 1
- name: Publish coverage
if: matrix.coverage
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
shell: bash
run: |
cd $HOME/libkiwix
curl https://codecov.io/bash -o codecov.sh
bash codecov.sh -n "${OS_NAME}_${{matrix.target}}" -Z
rm codecov.sh
if: startsWith(matrix.target, 'native_') && matrix.image_variant == 'xenial'
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -1,29 +1,15 @@
name: Packages
on:
pull_request:
push:
branches:
- main
release:
types: [published]
on: [push, pull_request]
jobs:
build-deb:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
distro:
# - debian-unstable
# - debian-trixie
# - debian-bookworm
# - debian-bullseye
- ubuntu-noble
- ubuntu-jammy
distro: [ubuntu-hirsute, ubuntu-groovy, ubuntu-focal, ubuntu-bionic]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v2
# Determine which PPA we should upload to
- name: PPA
@@ -31,81 +17,71 @@ jobs:
run: |
if [[ $REF == refs/tags* ]]
then
echo "ppa=kiwixteam/release" >> $GITHUB_OUTPUT
echo "::set-output name=ppa::kiwixteam/release"
else
echo "ppa=kiwixteam/dev" >> $GITHUB_OUTPUT
echo "::set-output name=ppa::kiwixteam/dev"
fi
env:
REF: ${{ github.ref }}
- uses: legoktm/gh-action-auto-dch@main
- uses: legoktm/gh-action-auto-dch@master
with:
fullname: Kiwix builder
email: release+launchpad@kiwix.org
distro: ${{ matrix.distro }}
# - uses: legoktm/gh-action-build-deb@debian-unstable
# if: matrix.distro == 'debian-unstable'
# name: Build package for debian-unstable
# id: build-debian-unstable
# with:
# args: --no-sign
#
# - uses: legoktm/gh-action-build-deb@b47978ba8498dc8b8153cc3b5f99a5fc1afa5de1 # pin@debian-trixie
# if: matrix.distro == 'debian-trixie'
# name: Build package for debian-trixie
# id: build-debian-trixie
# with:
# args: --no-sign
#
# - uses: legoktm/gh-action-build-deb@1f4e86a6bb34aaad388167eaf5eb85d553935336 # pin@debian-bookworm
# if: matrix.distro == 'debian-bookworm'
# name: Build package for debian-bookworm
# id: build-debian-bookworm
# with:
# args: --no-sign
#
# - uses: legoktm/gh-action-build-deb@084b4263209252ec80a75d2c78a586192c17f18d # pin@debian-bullseye
# if: matrix.distro == 'debian-bullseye'
# name: Build package for debian-bullseye
# id: build-debian-bullseye
# with:
# args: --no-sign
- uses: legoktm/gh-action-build-deb@9114a536498b65c40b932209b9833aa942bf108d # pin@ubuntu-noble
if: matrix.distro == 'ubuntu-noble'
name: Build package for ubuntu-noble
id: build-ubuntu-noble
- uses: legoktm/gh-action-build-deb@ubuntu-hirsute
if: matrix.distro == 'ubuntu-hirsute'
name: Build package for ubuntu-hirsute
id: build-ubuntu-hirsute
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-jammy
if: matrix.distro == 'ubuntu-jammy'
name: Build package for ubuntu-jammy
id: build-ubuntu-jammy
- uses: legoktm/gh-action-build-deb@ubuntu-groovy
if: matrix.distro == 'ubuntu-groovy'
name: Build package for ubuntu-groovy
id: build-ubuntu-groovy
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: actions/upload-artifact@v4
- uses: legoktm/gh-action-build-deb@ubuntu-focal
if: matrix.distro == 'ubuntu-focal'
name: Build package for ubuntu-focal
id: build-ubuntu-focal
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-bionic
if: matrix.distro == 'ubuntu-bionic'
name: Build package for ubuntu-bionic
id: build-ubuntu-bionic
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: actions/upload-artifact@v2
with:
name: Packages for ${{ matrix.distro }}
path: output
- uses: legoktm/gh-action-dput@main
- uses: legoktm/gh-action-dput@master
name: Upload dev package
# Only upload on pushes to main
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' && startswith(matrix.distro, 'ubuntu-')
# Only upload on pushes to master
if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' && startswith(matrix.distro, 'ubuntu-')
with:
gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
repository: ppa:kiwixteam/dev
packages: output/*_source.changes
- uses: legoktm/gh-action-dput@main
- uses: legoktm/gh-action-dput@master
name: Upload release package
if: github.event_name == 'release' && startswith(matrix.distro, 'ubuntu-')
# Only upload on pushes to master or tag
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') && startswith(matrix.distro, 'ubuntu-')
with:
gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
repository: ppa:kiwixteam/release
packages: output/*_source.changes

2
.gitignore vendored
View File

@@ -5,5 +5,3 @@ subprojects/googletest-release*
build/
.vscode/
builddir/
.cache/
.clangd/

View File

@@ -1,21 +0,0 @@
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# We recommend specifying your dependencies to enable reproducible builds:
# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: docs/requirements.txt

283
ChangeLog
View File

@@ -1,288 +1,7 @@
libkiwix 14.1.1
===============
* Server:
- Fix regression for kiwix-serve --nosearchbar (@veloman-yunkan #1250)
- Avoid results content interpretation... crash in fulltext search (@vighnesh-sawant #1241)
- Fix for intermittent /content/blank.html errors (@veloman-yunkan #1249)
libkiwix 14.1.0
===============
* Server:
- Viewer detects & tracks intrapage navigation anchors too (@veloman-yunkan #1213)
- Add support for catalog only mode (@veloman-yunkan #1219)
- Add API which returns server access url (@vighnesh-sawant #1234)
- Fix chrome searchbar placeholder text overflow (@aditii2712 #1185)
- Fix magnet link queryStyring (@rgaudin #1160)
- Improve chrome printing stylesheet (@kelson42 #1202)
- Default white background (@kelson42 #1205)
* Other:
- Switched to the new libzim illustrations API (@veloman-yunkan #1226)
- Stop building Windows with DEBUG symbols in CI (@kelson42 #1165)
- Update many things in the CI/CD (@kelson42 #1203 #1194 #1209 #1207 #1235)
- Requires now libzim 9.4.0 (@kelson42 #1231)
- Fix compilation for FreeBSD (@OICe2 #1173 #1174)
- Wait up to 1s to let aria2c to start before complaining (@kelson42 #1169)
libkiwix 14.0.0
===============
* Server:
- Support of IPv6 (@veloman-yunkan @aryanA101a #1074 #1093)
- Better public IP configuration/detection (@sgourdas #1132)
- Fix API errors in catalog searches if Xapian keyword in used (@veloman-yunkan #1137)
- Clearly define which Web browsers are supported (@kelson42 @rgaudin @jaifroid @benoit74 #1132)
- Improve welcome page download buttons (@veloman-yunkan #1094)
- Better handling of external (non-HTTP) links (@veloman-yunkan #1123)
- Fix book illustration size on welcome page to 48x48 pixels (@veloman-yunkan #1127)
- Remove "Multiple Languages" in language filter (@veloman-yunkan #1098)
- Stop transforming tags casing (@kelson42 @veloman-yunkan #1079 #1121)
- ZIM file size consistently advertised in MiB (@harsha-mangena #1132)
- Few new supported languages in the filter (@kelson42 #1080)
- Improve accesskeys (@kelson42 #1075)
- Add OpenSearch <link> to head of pages (@kelson42 #1070)
* Compilation/Packaging:
- Multiple fixes around deb packaging (@kelson42 #1108 #1114 #1135)
- Generating of libkiwix.pc via Meson (@veloman-yunkan #1133)
- Native Windows CI/CD (@mgautierfr @kelson42 #1113 #1125)
- Better check (maximum) libzim version (@kelson42 #1124)
- Multiple automated tests improvements (@veloman-yunkan #1068 #1067)
* Other:
- Deleted supported env. variable `$KIWIX_DATA_DIR` and `kiwix::getDataDirectory()` (@sgourdas #1107)
- New string slugification for filenames (@shaopenglin #1105)
- Multiple improvements around aria2c download mgmt. (@veloman-yunkan #1097)
libkiwix 13.1.0
===============
* Server:
- Properly translated error pages (@veloman-yunkan #1032)
- Properly translated search result page (@veloman-yunkan #1046)
- Default UI language is resolved in frontend (@veloman-yunkan #1044)
- Better support of older Web browsers by polyfilling replaceAll() (@veloman-yunkan #1054)
* New API to migrate bookmarks between books (@mgautierfr #1043)
* Fixed compilation on Haiku OS (@Begasus #1048)
libkiwix 13.0.0
===============
* Server:
- Improved look & feel of kiwix-serve UI (@veloman-yunkan #917 #1021)
- Increase tolerance to malformed (control characters) ZIM entry titles (@veloman-yunkan #1023)
- API allowing to filter many categories at once (@juuz0 #974)
- Cookie-less user language control (@veloman-yumkan #997)
- Hack to fix Mirrorbrain based broken magnet URLs (@rgaudin #1001)
* Fix handling of books with 'Name' metadata with dots (@mgautier #1016)
* New method beautifyFileSize() to provide nice-looking book sizes (@vuuz0 #971)
* Fix a few missing includes (@mgautierfr #978)
* New functions to read - kiwix-serve - languages and categories streams (@juuz0 #967)
* Add support of Fon language (@kelson42 #1013)
* C++17 code base compliancy (@mgautierfr #996)
* Use everywhere std::shared_ptr in place of raw pointer (@mgautierfr #991)
* Do not use [[nodiscard]] attribute on compiler not supporting it (@mgautierfr #1003)
* Add a non minified version of autoComplete.js (@mgautierfr #1008)
* Multiple CI/CD improvements (@kelson42 #982)
libkiwix 12.1.0
===============
* Server:
- Introduce a `/nojs` endpoint to browse catalog and zim files with a browser without js (@juuz0 #897)
- Translate the viewer (@veloman-yunkan #871 #846)
- Display `mul` on tile when zim is multi-languages (@juuz0 #934)
- Suggestion links point to the `/content` endpoint (@veloman-yunkan #862)
- Correctly compress web fonts in http answers (@kelson42 #856)
- Correctly encode link in suggestions (@veloman-yunkan #859 #860 #963)
- Correctly encode url redirection (@veloman-yunkan #866 #890)
- Properly handle user language, through cookies and http headers (@veloman-yunkan #849 #869)
- Fix url encoding (@veloman-yunkan #870)
- Fix viewer for viewer for SeaMonkey (@veloman-yunkan #887)
- Make the downloader threadsafe (@mgautierfr #886)
- Add RSS feed in the main page (pointing to the catalog) (@juuz0 #882 #920)
- Correctly set the mimetype for json and ico (@veloman-yunkan #892)
- `count=-1` correspond to unlimited count (instead of 0) (@veloman-yunkan #894)
- Keep the navigation bar on top (@juuz0 #896)
- Make the viewer's iframe "safe" (@veloman-yunkan #906 #930)
- Correctly escape search link in XML Opds output (@veloman-yunkan #936)
- Store values needed for the viewer js in the url fragment instead of the query string (@juuz0 #907)
- Get rid of legacy OPDS API usage in the viewer (@veloman-yunkan #939)
- Fix charset encoding declaration in OPDS response MIME types (@veloman-yunkan #942)
- Fix PDF in the viewer (@veloman-yunkan #940)
- Fix external links handling in the viewer (@veloman-yunkan #959)
- Add tests of searching with accents (@mgautierfs #954)
* Fix handling of missing illustration in the book (@veloman-yunkan #961)
* Add support for multi languages zim files (@veloman-yunkan #904)
* Fix includes for openbsd (@bentley #949)
* Fix pathes in git to allow git clone on Windows (@adamlamar #868)
* Switch to `main` as principal branch (instead of `master`) (@kelson42)
* Remove libkiwix android publisher from the repository (@kelson42 #884)
* Various fixes of meson and CI. (@mgautierfr @kelson42)
libkiwix 12.0.0
===============
* [API Break] Remove wrapper around libzim (@mgautierfr #789)
* Allow kiwix-serve to use custom resource files (@veloman-yunkan #779)
* Properly handle searchProtocolPrefix when rendering search result (@veloman-yunkan #823)
* Prevent search on multi language content (@veloman-yunkan #838)
* Use new `zim::Archive::getMediaCount` from libzim (@mgautierfr #836)
* Catalog:
- Include tags in free text catalog search (@veloman-yunkan #802)
- Illustration's url is based on book's uuid (@veloman-yunkan #804)
- Cleanup of the opds-dumper (@veloman-yunkan #829)
- Allow filtering of catalog content using multiple languages (@veloman-yunkan #841)
- Make opds-dumper respect the namemapper (@mgautierfr #837)
* Server:
- Correctly handle `\` in suggestion json generation (@veloman-yunkan #843)
- Better http caching (@veloman-yunkan #833)
- Make `/suggest` endpoint thread-safe (@veloman-yunkan #834)
- Better redirection of main page (@veloman-yunkan #827)
- Remove jquery (@mgautierfr @juuz0 #796)
- Better Viewer of zim content :
. Introduce `/content` endpoints (@veloman-yunkan #806)
. Switch to iframe based content viewer (@veloman-yunkan #716)
- Optimised design of the welcome page:
. Alignement (@juuz0 @kelson42 #786)
. Exit download modal on pressing escape key (@juzz0 #800)
. Add favicon for different devices (@juzz0 #805)
. Fix auto hidding of the toolbar (@veloman-yunkan #821)
. Allow user to filter books by tags in the front page (@juuz0 #711)
* CI :
- Trigger CI on pull_request (@kelson42 #791)
- Drop Ubuntu Impish packaging (@legoktm #825)
- Add Ubuntu Kinetic packaging (@legoktm #801)
* Testing:
- Test ICULanguageInfo (@veloman-yunkan #795)
- Introduce fake `test` language to test i18n (@veloman-yunkan #848)
* Fix documentation (@kelson42 #816)
* Udpate translation (#787 #839 #847)
libkiwix 11.0.0
===============
* [server] Add support for internationalization (@veloman-yunkan #679)
* [server] Use gzip compression instead of deflat (mgautierfr #757)
* [server] Version the static resources. This allow better invalidating
browser cache when resources are changed (@veloman-yunkan #712)
* [server|front] Use integer to query the host for page length (@juuz0 #772)
* [server] Improve multizim search API:
- Improvement of the cache system
- Better API to select on which books to search in.
- SysAdmin is now able to limit the number of book we search in for a multizim search
* [server] Introduce a opensearch API for multizim fulltext search
* [wrapper] Remove java wrapper
* Testing:
- Testing of search result pages content (@veloman-yunkan #765)
- Better testing structure of xml search result (@veloman-yunkan #780)
libkiwix 10.1.1
===============
* Correctly detect the number of article for older zims (<=6) (@mgautier #743)
* [server] Fix fulltext search (@mgautierfr #724)
* [server][internal] New way to build Error message (@veloman-yunkan #732 #738 #744)
* Fix CI (@mgautierfr #736)
libkiwix 10.1.0
===============
This release is an important one as it fixes a Xss vulnerability introduced
in libkiwix 10.0.0
* [SECURITY] Fix a Xss attack vulnerability (introduced in 10.0.0) (@juuz0 #721)
* [server] Add a option to set a limit on the number of connexion per IP (@kelson42 #700)
* [server] Do not display a lang tag in the UI if the book has no language (@juuz0 #706)
* [server] Add the book title associated to a search results (@thavelick #705, @mgautierfr #718)
* Add `dc:issued` to opds output stream (@veloman-yunkan #715)
* Add handling of several languages not provided by ICU (@juuz0 #701)
* [server] Add a caching system for search and suggestion (@maneeshpm #620)
* Fix cross-compilation (@kelson42 #703)
* Add unit-testing of suggestions and error pages (@veloman-yunkan #709 #710 #727)
* Better testing system of html response (@veloman-yunkan #725)
libkiwix 10.0.1
===============
* [server] The catalog search interpret `count=0` as no limit.
This was the case for a long time. This was changed unintentionally
(@veloman-yunkan #686)
* [server] Correctly generere a human friendly title in the server frontend.
(@juuz0 #687, @kelson42 #689)
* [server] Fix download button if there is no url do download from.
(@juuz0 #691)
* Add non-minified isotope.pkdg.js
Needed for debian packaging as we need the source and minified version is
not the source (@legoktm #693)
* [server] Add a tooltip with the full language for the lang tag.
* CI fixes (@kelson42 @legoktm)
libkiwix 10.0.0
===============
This release is huge release.
The project has been renamed to libkiwix, it is more coherent with the library name.
* Server front page :
- Use js in the front page to display the available book,
using the OPDS stream as source. The front page is now populated only with
the visible books and user can search for books. (@MananJethwany #530, #541, #534)
(@kelson42 #628)
- Revamp css (@MananJethwany #559)
- Correctly Convert 3iso language code to 2iso (@juuz0 #672)
* Server suggestions search :
- Add pagination for suggestion search (@maneeshpm #591)
- Fix suggestion system (@MananJethwany #498)
- Provide the kind and path (when adapted) to the suggestion answer (@MananJethwany #464)
- The displayed suggestion have now highligth on the searched terms (@maneeshpm #505)
- Properly handle html encoding of suggestions (@veloman-yunkan #458)
* Server improvements :
- Remove meta endpoints (@mgautier #669)
- Add raw endpoints to get the raw content of a zim (@mgautierfr #646)
- Add details on 404 error pages (@soumyankar #490)
- Fix headbar insertion when `<head>` tag has attributes (@kelson42 #440)
- Better headbar insertion (after charset definition) (@kelson42 #442)
* New OPDS Stream v2 :
- Add a list of categories (@veloman-yunkan)
- Support for partial entries (@veloman-yunkan #602)
- Support multiple icons size in the OPDS stream (@veloman-yunkan #577 #630)
- Add language endpoint to catalog (@veloman-yunkan #553)
- Add illustration API to get the illustration of a book (@mgautierfr #645)
- OPDS search can now filter books by category (@veloman-yunkan #459)
* Library improvements :
- Allow the libray to be live reloaded when the library.xml changes (@veloman-yunkan #636)
- Properly handle removing of book from the library (@veloman-yunkan #485)
- Use xapian to search for books in the library (@veloman-yunkan #460, #488)
* Added methods/functions :
- Fix `fileExist` and introduce `fileReadable` (@juuz0 #668)
- Add `getVersions` and `printVersions` functions (@kelson42 #665)
- Add `getNetworkInterfaces()` and `getBestPublicIP()` functions (@juuz0 #622)
- Add `get_zimid()` method to the search result (@maneeshpm #510)
* Various improvements :
- Better secret value for aria2c rpc (@juuz0 #666)
- Avoid duplicated Archive/Reader in the Searcher (@veloman-yunkan #648)
- Add basic documentation (@mgautierfr #640)
- Do not use Reader internally (@maneeshpm #536 #576)
- Remove dependency headers from our public headers (@mgautierfr #574)
- Downloader now don't write metalink on the filesystem (@kelson42 #502)
- Support opening a zim file using a fd (@veloman-yukan #429)
- Use C++11 std::thread instead of pthread (@mgautierfr #445)
- [READER] Do not crash if zim file has no `Counter` metadata (@mgautierfr #449)
- Ensure libzim dependency is compiled with xapian (@mgautierfr #434)
- Support video and audio mimetype in `getMediaCount` (@kelson42 #439)
- Better parsing of the counterMap (@kelson42 #437)
- Adapt libkiwix to libzim 7.0.0 (@mgautierfr #428)
- Remove deprecated methods (@mgautierfr)
- CI: Build package for Ubuntu Hirsute, Impish and Jammy (@legoktm #431 #568) and remove Groovy
- Fix compilation for FreeBSD (@swills g#432)
- Many fixes and improvement (@MananJethwany, @maneeshpm, @veloman-yunkan, @mgautierfr)
* ...
kiwix-lib 9.4.1
===============

108
README.md
View File

@@ -5,12 +5,10 @@ The Libkiwix provides the [Kiwix](https://kiwix.org) software suite
core. It contains the code shared by all Kiwix ports (Windows,
GNU/Linux, macOS, Android, iOS, ...).
[![Release](https://img.shields.io/github/v/tag/kiwix/libkiwix?label=release&sort=semver)](https://download.kiwix.org/release/libkiwix/)
[![Repositories](https://img.shields.io/repology/repositories/libkiwix?label=repositories)](https://github.com/kiwix/libkiwix/wiki/Repology)
[![Build Status](https://github.com/kiwix/libkiwix/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/kiwix/libkiwix/actions?query=branch%3Amain)
[![Doc](https://readthedocs.org/projects/libkiwix/badge/?style=flat)](https://libkiwix.readthedocs.org/en/latest/?badge=latest)
[![Build Status](https://github.com/kiwix/libkiwix/workflows/CI/badge.svg?query=branch%3Amaster)](https://github.com/kiwix/libkiwix/actions?query=branch%3Amaster)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/libkiwix/badge)](https://www.codefactor.io/repository/github/kiwix/libkiwix)
[![Codecov](https://codecov.io/gh/kiwix/libkiwix/branch/main/graph/badge.svg)](https://codecov.io/gh/kiwix/libkiwix)
[![Codecov](https://codecov.io/gh/kiwix/libkiwix/branch/master/graph/badge.svg)](https://codecov.io/gh/kiwix/libkiwix)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
Disclaimer
@@ -24,9 +22,9 @@ with the Libkiwix compilation itself, we recommend to have a look to
Preamble
--------
Although the Libkiwix can be (cross-)compiled on/for many systems, the
Although the Libkiwix can be (cross-)compiled on/for many sytems, the
following documentation explains how to do it on POSIX ones. It is
primarily thought for GNU/Linux systems and has been tested on recent
primarly thought for GNU/Linux systems and has been tested on recent
releases of Ubuntu and Fedora.
Dependencies
@@ -54,7 +52,7 @@ The following dependency needs to be available at runtime:
These dependencies may or may not be packaged by your operating
system. They may also be packaged but only in an older version. The
compilation script will tell you if one of them is missing or too old.
In the worst case, you will have to download and compile bleeding edge
In the worse case, you will have to download and compile bleeding edge
version by hand.
If you want to install these dependencies locally, then use the
@@ -93,41 +91,6 @@ Meson. If you want statically linked libraries, you can add
Depending of you system, `ninja` may be called `ninja-build`.
The android wrapper uses deprecated methods of libkiwix so it cannot be compiled
with `werror=true` (the default). So you must pass `-Dwerror=false` to meson:
```bash
meson . build -Dwrapper=android -Dwerror=false
ninja -C build
```
Static files compilation
------------------------
Libkiwix has a few static files 'compiled' within the binary
code. This is mostly Javascript/HTML/pictures necessary for the HTTP
daemon.
These static files are available in the `static` directory and are
compiled by custom Python code available in this repository `scripts`
directory. This happens automatically at compilation time without any
additional command to run.
To avoid HTTP caching issues, the URLs (to the static content) are
appended with a `cacheid` parameter (this is called "cache
busting"). This `cacheid` value derived from the
[sha1sum](https://en.wikipedia.org/wiki/Sha1sum) of each targeted
static file. As a consequence, each time you change a static file, the
corresponding `cacheid` value will change.
To properly test this feature, this `cacheid` needs to be added
manually to the automated tests and has to be commited. After
modifying the needed static file, [run the automated
tests](#Testing). They will fail, but the inspection of the testing
log will give you the new `cacheid` value(s). Finally update
`test/server.cpp` with the appropriate `cacheid` value(s) which have
changed.
Testing
-------
@@ -151,7 +114,7 @@ where you want to install the libraries. After the installation
succeeded, you may need to run `ldconfig` (as `root`).
Uninstallation
--------------
------------
If you want to uninstall the Kiwix library:
```bash
@@ -161,55 +124,6 @@ ninja -C build uninstall
Like for the installation, you might need to run the command as `root`
(or using `sudo`).
Custom Index Page
-----------------
to use custom welcome page mention `customIndexPage` argument in `kiwix::internalServer()` or use `kiwix::server->setCustomIndexTemplate()`.
(note - while using custom html file please mention all external links as absolute path.)
to create a HTML template with custom JS you need to have a look at various OPDS based endpoints as mentioned [here](https://wiki.kiwix.org/wiki/OPDS) to load books.
To use JS provided by kiwix-serve you can use the following template to start with ->
```
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title><-- Custom Tittle --></title>
<script src="{{root}}/skin/isotope.pkgd.min.js" defer></script>
<script src="{{root}}/skin/iso6391To3.js"></script>
<script type="text/javascript" src="{{root}}/skin/index.js" defer></script>
</head>
<body>
</body>
</html>
```
- To get books listed using `index.js` add - `<div class="book__list"></div>` under body tag.
- To get number of books listed add - `<h3 class="kiwixHomeBody__results"></h3>` under body tag.
- To add language select box add - `<select id="languageFilter"></select>` under body tag.
- To add category select box add - `<select id="categoryFilter"></select>` under body tag.
- To add search box for books use following form -
```
<form id='kiwixSearchForm'>
<input type="text" name="q" placeholder="Search" id="searchFilter" class='kiwixSearch filter'>
<input type="submit" class="kiwixButton" value="Search"/>
</form>
```
If you compile manually Libmicrohttpd, you might need to compile it
without GNU TLS, a bug here will impeach further compilation
otherwise.
If the compilation still fails, you might need to get a more recent
version of a dependency than the one packaged by your Linux
distribution. Try then with a source tarball distributed by the
problematic upstream project or even directly from the source code
repository.
Troubleshooting
---------------
@@ -232,6 +146,16 @@ cp ninja ../bin
cd ..
```
If you compile manually Libmicrohttpd, you might need to compile it
without GNU TLS, a bug here will empeach further compilation
otherwise.
If the compilation still fails, you might need to get a more recent
version of a dependency than the one packaged by your Linux
distribution. Try then with a source tarball distributed by the
problematic upstream project or even directly from the source code
repository.
License
-------

13
android-kiwix-lib-publisher/.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild

View File

@@ -0,0 +1,25 @@
// Top-level build file where you can add configuration options common to all sub-projects/modules.
buildscript {
repositories {
google()
jcenter()
}
dependencies {
classpath 'com.android.tools.build:gradle:3.4.1'
// NOTE: Do not place your application dependencies here; they belong
// in the individual module build.gradle files
}
}
allprojects {
repositories {
google()
jcenter()
}
}
task clean(type: Delete) {
delete rootProject.buildDir
}

View File

@@ -0,0 +1,15 @@
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx1536m
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official

View File

Binary file not shown.

View File

@@ -0,0 +1,6 @@
#Wed Jun 19 15:28:39 BST 2019
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-5.1.1-all.zip

172
android-kiwix-lib-publisher/gradlew vendored Executable file
View File

@@ -0,0 +1,172 @@
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"

84
android-kiwix-lib-publisher/gradlew.bat vendored Executable file
View File

@@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@@ -0,0 +1 @@
/build

View File

@@ -0,0 +1,64 @@
apply plugin: 'com.android.library'
apply plugin: 'maven'
android {
compileSdkVersion 28
defaultConfig {
minSdkVersion 15
targetSdkVersion 28
versionCode 1
versionName "1.0"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
}
dependencies {
implementation 'com.getkeepsafe.relinker:relinker:1.3.1'
}
task writePom {
pom {
project {
groupId 'org.kiwix.kiwixlib'
artifactId 'kiwixlib'
version '10.0.0' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
packaging 'aar'
name 'kiwixlib'
url 'https://github.com/kiwix/libkiwix'
licenses {
license {
name 'GPLv3'
url 'https://www.gnu.org/licenses/gpl-3.0.en.html'
}
}
developers {
developer {
id 'kiwix'
name 'kiwix'
email 'contact@kiwix.org'
}
}
scm {
connection 'https://github.com/kiwix/libkiwix.git'
developerConnection 'https://github.com/kiwix/libkiwix.git'
url 'https://github.com/kiwix/libkiwix'
}
}
}.withXml {
def dependenciesNode = asNode().appendNode('dependencies')
//Iterate over the implementation dependencies, adding a <dependency> node for each
configurations.implementation.allDependencies.each {
def dependencyNode = dependenciesNode.appendNode('dependency')
dependencyNode.appendNode('groupId', it.group)
dependencyNode.appendNode('artifactId', it.name)
dependencyNode.appendNode('version', it.version)
}
}.writeTo("$buildDir/pom.xml")
}

View File

@@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View File

@@ -0,0 +1,10 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.kiwix.kiwixlib">
<application
android:allowBackup="true"
android:supportsRtl="true">
</application>
</manifest>

View File

@@ -0,0 +1 @@
include ':kiwixLibAndroid'

17
debian/control vendored
View File

@@ -3,12 +3,13 @@ Priority: optional
Maintainer: Kiwix team <kiwix@kiwix.org>
Build-Depends: debhelper-compat (= 13),
meson,
pkgconf,
libzim-dev (>= 9.0), libzim-dev (<< 10.0),
pkg-config,
libzim-dev (>= 6.1.8),
libcurl4-gnutls-dev,
libicu-dev,
libgtest-dev,
libkainjow-mustache-dev,
liblzma-dev,
libmicrohttpd-dev,
libpugixml-dev,
zlib1g-dev
@@ -21,13 +22,12 @@ Package: libkiwix-dev
Section: libdevel
Architecture: any
Multi-Arch: same
Depends: libkiwix14 (= ${binary:Version}), ${misc:Depends}, python3,
libzim-dev (>= 9.0), libzim-dev (<< 10.0),
Depends: libkiwix10 (= ${binary:Version}), ${misc:Depends}, python3,
libzim-dev (>= 6.0.0),
libicu-dev,
libpugixml-dev,
libcurl4-gnutls-dev,
libmicrohttpd-dev,
zlib1g-dev
libmicrohttpd-dev
Description: library of common code for Kiwix (development)
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all
@@ -35,12 +35,11 @@ Description: library of common code for Kiwix (development)
.
This package contains development files.
Package: libkiwix14
Package: libkiwix10
Architecture: any
Multi-Arch: same
Depends: ${shlibs:Depends}, ${misc:Depends}, aria2
Conflicts: libkiwix0, libkiwix3, libkiwix9, libkiwix10, libkiwix11, libkiwix12, libkiwix13
Replaces: libkiwix0, libkiwix3, libkiwix9, libkiwix10, libkiwix11, libkiwix12, libkiwix13
Conflicts: libkiwix0, libkiwix3, libkiwix9
Description: library of common code for Kiwix
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all

View File

@@ -1,2 +1 @@
usr/share/man/man1/kiwix-compile-resources.1*
usr/share/man/man1/kiwix-compile-i18n.1*

View File

2
docs/.gitignore vendored
View File

@@ -1,2 +0,0 @@
api
xml

View File

@@ -1,68 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'libkiwix'
copyright = '2022, libkiwix-team'
author = 'libkiwix-team'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'breathe',
'exhale'
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
breathe_projects = {
"libkiwix": "./xml"
}
breathe_default_project = 'libkiwix'
exhale_args = {
"containmentFolder": "./api",
"rootFileName": "ref_api.rst",
"rootFileTitle": "Reference API",
"doxygenStripFromPath":"..",
"treeViewIsBootstrap": True,
"createTreeView" : True,
"exhaleExecutesDoxygen": True,
"exhaleDoxygenStdin": "INPUT = ../include"
}
primary_domain = 'cpp'
highlight_language = 'cpp'

View File

@@ -1,14 +0,0 @@
.. libkiwix documentation master file, created by
sphinx-quickstart on Fri Jul 24 15:40:50 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to libkiwix's documentation!
==================================
.. toctree::
:maxdepth: 2
:caption: Contents:
usage
api/ref_api

View File

@@ -1,7 +0,0 @@
sphinx = find_program('sphinx-build', native:true)
sphinx_target = run_target('doc',
command: [sphinx, '-bhtml',
meson.current_source_dir(),
meson.current_build_dir()])

View File

@@ -1,3 +0,0 @@
breathe
exhale
sphinx_rtd_theme

View File

@@ -1,15 +0,0 @@
Libkiwix programming
====================
Introduction
------------
libkiwix is written in C++. To use the library, you need the include files of libkiwix have
to link against libzim.
Errors are handled with exceptions. When something goes wrong, libkiwix throws an error,
which is always derived from std::exception.
All classes are defined in the namespace kiwix.
libkiwix is a set of tools to manage zim files and provide some common functionnality.

36
format_code.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/usr/bin/bash
files=(
"include/library.h"
"include/common/stringTools.h"
"include/common/pathTools.h"
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
"include/xapianSearcher.h"
"include/searcher.h"
"src/library.cpp"
"src/android/kiwix.cpp"
"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java"
"src/android/org/kiwix/kiwixlib/JNIKiwix.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixString.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java"
"src/searcher.cpp"
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"
"src/manager.cpp"
"src/reader.cpp"
)
for i in "${files[@]}"
do
echo $i
clang-format -i -style=file $i
done

View File

@@ -21,54 +21,28 @@
#define KIWIX_BOOK_H
#include <string>
#include <vector>
#include <memory>
#include <mutex>
#include "common.h"
namespace pugi {
class xml_node;
}
namespace zim {
class Archive;
}
namespace kiwix
{
class OPDSDumper;
class Reader;
/**
* A class to store information about a book (a zim file)
*/
class Book
{
public: // types
class Illustration
{
friend class Book;
public:
uint16_t width = 48;
uint16_t height = 48;
std::string mimeType;
std::string url;
const std::string& getData() const;
private:
mutable std::string data;
mutable std::mutex mutex;
};
typedef std::vector<std::shared_ptr<const Illustration>> Illustrations;
public: // functions
public:
Book();
~Book();
bool update(const Book& other);
void update(const zim::Archive& archive);
void update(const Reader& reader);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
std::string getHumanReadableIdFromPath() const;
@@ -79,9 +53,7 @@ class Book
bool isPathValid() const { return m_pathValid; }
const std::string& getTitle() const { return m_title; }
const std::string& getDescription() const { return m_description; }
DEPRECATED const std::string& getLanguage() const { return m_language; }
const std::string& getCommaSeparatedLanguages() const { return m_language; }
const std::vector<std::string> getLanguages() const;
const std::string& getLanguage() const { return m_language; }
const std::string& getCreator() const { return m_creator; }
const std::string& getPublisher() const { return m_publisher; }
const std::string& getDate() const { return m_date; }
@@ -96,13 +68,9 @@ class Book
const uint64_t& getArticleCount() const { return m_articleCount; }
const uint64_t& getMediaCount() const { return m_mediaCount; }
const uint64_t& getSize() const { return m_size; }
DEPRECATED const std::string& getFavicon() const;
DEPRECATED const std::string& getFaviconUrl() const;
DEPRECATED const std::string& getFaviconMimeType() const;
Illustrations getIllustrations() const;
std::shared_ptr<const Illustration> getIllustration(unsigned int size) const;
const std::string& getFavicon() const;
const std::string& getFaviconUrl() const { return m_faviconUrl; }
const std::string& getFaviconMimeType() const { return m_faviconMimeType; }
const std::string& getDownloadId() const { return m_downloadId; }
void setReadOnly(bool readOnly) { m_readOnly = readOnly; }
@@ -123,13 +91,14 @@ class Book
void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; }
void setMediaCount(uint64_t mediaCount) { m_mediaCount = mediaCount; }
void setSize(uint64_t size) { m_size = size; }
void setFavicon(const std::string& favicon) { m_favicon = favicon; }
void setFaviconMimeType(const std::string& faviconMimeType) { m_faviconMimeType = faviconMimeType; }
void setDownloadId(const std::string& downloadId) { m_downloadId = downloadId; }
private: // functions
private:
std::string getCategoryFromTags() const;
const Illustration& getDefaultIllustration() const;
protected: // data
protected:
std::string m_id;
std::string m_downloadId;
std::string m_path;
@@ -150,11 +119,9 @@ class Book
uint64_t m_mediaCount = 0;
bool m_readOnly = false;
uint64_t m_size = 0;
Illustrations m_illustrations;
// Used as the return value of getDefaultIllustration() when no default
// illustration is found in the book
static const Illustration missingDefaultIllustration;
mutable std::string m_favicon;
std::string m_faviconUrl;
std::string m_faviconMimeType;
};
}

View File

@@ -29,33 +29,19 @@ class xml_node;
namespace kiwix
{
class Book;
/**
* A class to store information about a bookmark (an article in a book)
*/
class Bookmark
{
public:
/**
* Create an empty bookmark.
*
* Bookmark must be populated with `set*` methods
*/
Bookmark();
/**
* Create a bookmark given a Book, a path and a title.
*/
Bookmark(const Book& book, const std::string& path, const std::string& title);
~Bookmark();
void updateFromXml(const pugi::xml_node& node);
const std::string& getBookId() const { return m_bookId; }
const std::string& getBookTitle() const { return m_bookTitle; }
const std::string& getBookName() const { return m_bookName; }
const std::string& getBookFlavour() const { return m_bookFlavour; }
const std::string& getUrl() const { return m_url; }
const std::string& getTitle() const { return m_title; }
const std::string& getLanguage() const { return m_language; }
@@ -63,8 +49,6 @@ class Bookmark
void setBookId(const std::string& bookId) { m_bookId = bookId; }
void setBookTitle(const std::string& bookTitle) { m_bookTitle = bookTitle; }
void setBookName(const std::string& bookName) { m_bookName = bookName; }
void setBookFlavour(const std::string& bookFlavour) { m_bookFlavour = bookFlavour; }
void setUrl(const std::string& url) { m_url = url; }
void setTitle(const std::string& title) { m_title = title; }
void setLanguage(const std::string& language) { m_language = language; }
@@ -73,8 +57,6 @@ class Bookmark
protected:
std::string m_bookId;
std::string m_bookTitle;
std::string m_bookName;
std::string m_bookFlavour;
std::string m_url;
std::string m_title;
std::string m_language;

View File

@@ -16,7 +16,6 @@
namespace kiwix {
enum class IpMode { IPV4, IPV6, ALL, AUTO }; // AUTO: Server decides the protocol
typedef zim::size_type size_type;
typedef zim::offset_type offset_type;

View File

@@ -25,7 +25,6 @@
#include <map>
#include <memory>
#include <stdexcept>
#include <mutex>
namespace kiwix
{
@@ -44,14 +43,6 @@ class AriaError : public std::runtime_error {
};
/**
* A representation of a current download.
*
* `Download` is not thread safe. User must care to not call method on a
* same download from different threads.
* However, it is safe to use different `Download`s from different threads.
*/
class Download {
public:
typedef enum { K_ACTIVE, K_WAITING, K_PAUSED, K_ERROR, K_COMPLETE, K_REMOVED, K_UNKNOWN } StatusResult;
@@ -62,89 +53,19 @@ class Download {
: mp_aria(p_aria),
m_status(K_UNKNOWN),
m_did(did) {};
/**
* Update the status of the download.
*
* This call make an aria rpc call and is blocking.
* Some download (started with a metalink) are in fact several downloads.
* - A first one to download the metadlink.
* - A second one to download the real file.
*
* If `follow` is true, updateStatus tries to detect that and tracks
* the second download when the first one is finished.
* By passing false to `follow`, `Download` will only track the first download.
*
* `getFoo` methods are based on the last statusUpdate.
*
* @param follow: Do we have to follow following downloads.
*/
void updateStatus(bool follow);
/**
* Pause the download (and call updateStatus)
*/
void updateStatus(bool follow=false);
void pauseDownload();
/**
* Resume the download (and call updateStatus)
*/
void resumeDownload();
/**
* Cancel the download.
*
* A canceled downlod cannot be resume and updateStatus does nothing.
* However, you can still get information based on the last known information.
*/
void cancelDownload();
/*
* Get the status of the download.
*/
StatusResult getStatus() const { return m_status; }
/*
* Get the id of the download.
*/
const std::string& getDid() const { return m_did; }
/*
* Get the id of the "second" download.
*
* Set only if the "first" download is a metalink and is complete.
*/
const std::string& getFollowedBy() const { return m_followedBy; }
/*
* Get the total length of the download.
*/
uint64_t getTotalLength() const { return m_totalLength; }
/*
* Get the completed length of the download.
*/
uint64_t getCompletedLength() const { return m_completedLength; }
/*
* Get the download speed of the download.
*/
uint64_t getDownloadSpeed() const { return m_downloadSpeed; }
/*
* Get the verified length of the download.
*/
uint64_t getVerifiedLength() const { return m_verifiedLength; }
/*
* Get the path (local file) of the download.
*/
const std::string& getPath() const { return m_path; }
/*
* Get the download uris of the download.
*/
const std::vector<std::string>& getUris() const { return m_uris; }
StatusResult getStatus() { return m_status; }
std::string getDid() { return m_did; }
std::string getFollowedBy() { return m_followedBy; }
uint64_t getTotalLength() { return m_totalLength; }
uint64_t getCompletedLength() { return m_completedLength; }
uint64_t getDownloadSpeed() { return m_downloadSpeed; }
uint64_t getVerifiedLength() { return m_verifiedLength; }
std::string getPath() { return m_path; }
std::vector<std::string>& getUris() { return m_uris; }
protected:
std::shared_ptr<Aria2> mp_aria;
@@ -162,69 +83,23 @@ class Download {
/**
* A tool to download things.
*
* A Downloader manages `Download` using aria2 in the background.
* `Downloader` is threadsafe.
* However, the returned `Download`s are NOT threadsafe.
*/
class Downloader
{
public: // types
typedef std::vector<std::pair<std::string, std::string>> Options;
public: // functions
/*
* Create a new Downloader object.
*
* @param sessionFileDir: The directory where aria2 will store its session file.
*/
explicit Downloader(std::string sessionFileDir);
public:
Downloader();
virtual ~Downloader();
void close();
/**
* Start a new download.
*
* This method is thread safe and returns a pointer to a newly created
* `Download` or an existing one with a matching URI. In the latter case
* the options parameter is ignored, which can lead to surprising results.
* For example, if the old and new download requests (sharing the same URI)
* have different values for the download directory or output file name
* options, after the download is reported to be complete the downloaded file
* will be present only at the location specified for the first request.
*
* User should call `update` on the returned `Download` to have an accurate status.
*
* @param uri: The uri of the thing to download.
* @param downloadDir: The download directory where the thing should be stored (takes precedence over any "dir" in `options`).
* @param options: A series of pair <option_name, option_value> to pass to aria.
* @return: The newly created Download.
*/
std::shared_ptr<Download> startDownload(const std::string& uri, const std::string& downloadDir, Options options = {});
Download* startDownload(const std::string& uri, const std::vector<std::pair<std::string, std::string>>& options = {});
Download* getDownload(const std::string& did);
/**
* Get a download corrsponding to a download id (did)
* User should call `update` on the returned `Download` to have an accurate status.
*
* @param did: The download id to search for.
* @return: The Download corresponding to did.
* @throw: Throw std::out_of_range if did is not found.
*/
std::shared_ptr<Download> getDownload(const std::string& did);
size_t getNbDownload() { return m_knownDownloads.size(); }
std::vector<std::string> getDownloadIds();
/**
* Get the number of downloads currently managed.
*/
size_t getNbDownload() const;
/**
* Get the ids of the managed downloads.
*/
std::vector<std::string> getDownloadIds() const;
private: // data
mutable std::mutex m_lock;
std::map<std::string, std::shared_ptr<Download>> m_knownDownloads;
private:
std::map<std::string, std::unique_ptr<Download>> m_knownDownloads;
std::shared_ptr<Aria2> mp_aria;
};
}

183
include/entry.h Normal file
View File

@@ -0,0 +1,183 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ENTRY_H
#define KIWIX_ENTRY_H
#include <stdio.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <exception>
#include <string>
#include "common.h"
using namespace std;
namespace kiwix
{
class NoEntry : public std::exception {};
/**
* A entry represent an.. entry in a zim file.
*/
class Entry
{
public:
/**
* Construct an entry making reference to an zim article.
*
* @param article a zim::Article object
*/
Entry(zim::Entry entry);
virtual ~Entry() = default;
/**
* Get the path of the entry.
*
* The path is the "key" of an entry.
*
* @return the path of the entry.
*/
std::string getPath() const { return entry.getPath(); }
/**
* Get the title of the entry.
*
* @return the title of the entry.
*/
std::string getTitle() const { return entry.getTitle(); }
/**
* Get the content of the entry.
*
* The string is a copy of the content.
* If you don't want to do a copy, use get_blob.
*
* @return the content of the entry.
*/
std::string getContent() const { return entry.getItem().getData(); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @param size The size of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
/**
* Get the info for direct access to the content of the entry.
*
* Some entry (ie binary ones) have their content plain stored
* in the zim file. Knowing the offset where the content is stored
* an user can directly read the content in the zim file bypassing the
* libkiwix/libzim.
*
* @return A pair specifying where to read the content.
* The string is the real file to read (may be different that .zim
* file if zim is cut).
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
/**
* Get the size of the entry.
*
* @return the size of the entry.
*/
size_type getSize() const;
/**
* Get the mime_type of the entry.
*
* @return the mime_type of the entry.
*/
std::string getMimetype() const;
/**
* Get if the entry is a redirect entry.
*
* @return True if the entry is a redirect.
*/
bool isRedirect() const;
/**
* Get if the entry is a link target entry.
*
* @return True if the entry is a link target.
*/
bool isLinkTarget() const;
/**
* Get if the entry is a deleted entry.
*
* @return True if the entry is a deleted entry.
*/
bool isDeleted() const;
/**
* Get the entry pointed by this entry.
*
* @return the entry pointed.
* @throw NoEntry if the entry is not a redirected entry.
*/
Entry getRedirectEntry() const;
/**
* Get the final entry pointed by this entry.
*
* Follow the redirection until a "not redirecting" entry is found.
* If the entry is not a redirected entry, return the entry itself.
*
* @return the final entry.
*/
Entry getFinalEntry() const;
/**
* Get the zim entry wrapped by this (kiwix) entry
*
* @return the zim entry
*/
const zim::Entry& getZimEntry() const { return entry; }
private:
zim::Entry entry;
};
}
#endif // KIWIX_ENTRY_H

View File

@@ -1,92 +0,0 @@
/*
* Copyright 2024 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_I18N
#define KIWIX_I18N
#include <map>
#include <string>
namespace kiwix
{
std::string getTranslatedString(const std::string& lang, const std::string& key);
namespace i18n
{
typedef std::map<std::string, std::string> Parameters;
std::string expandParameterizedString(const std::string& lang,
const std::string& key,
const Parameters& params);
class GetTranslatedString
{
public:
explicit GetTranslatedString(const std::string& lang) : m_lang(lang) {}
std::string operator()(const std::string& key) const
{
return getTranslatedString(m_lang, key);
}
std::string operator()(const std::string& key, const Parameters& params) const
{
return expandParameterizedString(m_lang, key, params);
}
private:
const std::string m_lang;
};
} // namespace i18n
class ParameterizedMessage
{
public: // types
typedef i18n::Parameters Parameters;
public: // functions
ParameterizedMessage(const std::string& msgId, const Parameters& params)
: msgId(msgId)
, params(params)
{}
std::string getText(const std::string& lang) const;
const std::string& getMsgId() const { return msgId; }
const Parameters& getParams() const { return params; }
private: // data
const std::string msgId;
const Parameters params;
};
inline ParameterizedMessage nonParameterizedMessage(const std::string& msgId)
{
const ParameterizedMessage::Parameters noParams;
return ParameterizedMessage(msgId, noParams);
}
std::string translateBookCategory(const std::string& lang, const std::string& category);
} // namespace kiwix
#endif // KIWIX_I18N

View File

@@ -22,4 +22,4 @@
#include "library.h"
#endif
#endif

View File

@@ -24,9 +24,6 @@
#include <vector>
#include <map>
#include <memory>
#include <mutex>
#include <zim/archive.h>
#include <zim/search.h>
#include "book.h"
#include "bookmark.h"
@@ -34,10 +31,6 @@
#define KIWIX_LIBRARY_VERSION "20110515"
namespace Xapian {
class WritableDatabase;
};
namespace kiwix
{
@@ -55,22 +48,6 @@ enum supportedListMode {
NOVALID = 1 << 5
};
enum MigrationMode {
/** When migrating bookmarks, do not allow to migrate to an older book than the currently pointed one
* (or date stored in the bookmark if book is invalid)
*
* If no newer books are found, no upgrade is made.
*/
UPGRADE_ONLY = 0,
/** Try hard to do a migration. This mostly does:
* - Try to find a newer book.
* - If book is invalid: find a best book, potentially older.
* Older book will never be returned if current book is a valid one.
*/
ALLOW_DOWNGRADE = 1,
};
class Filter {
public: // types
using Tags = std::vector<std::string>;
@@ -87,7 +64,6 @@ class Filter {
std::string _query;
bool _queryIsPartial;
std::string _name;
std::string _flavour;
public: // functions
Filter();
@@ -126,30 +102,13 @@ class Filter {
Filter& acceptTags(const Tags& tags);
Filter& rejectTags(const Tags& tags);
/**
* Set the filter to only accept books in the specified category.
*
* Multiple categories can be specified as a comma-separated list (in
* which case a book in any of those categories will match).
*/
Filter& category(std::string category);
/**
* Set the filter to only accept books in the specified language.
*
* Multiple languages can be specified as a comma-separated list (in
* which case a book in any of those languages will match).
*/
Filter& lang(std::string lang);
Filter& publisher(std::string publisher);
Filter& creator(std::string creator);
Filter& maxSize(size_t size);
Filter& query(std::string query, bool partial=true);
Filter& name(std::string name);
Filter& flavour(std::string flavour);
Filter& clearLang();
Filter& clearCategory();
bool hasQuery() const;
const std::string& getQuery() const { return _query; }
@@ -170,9 +129,6 @@ class Filter {
bool hasCreator() const;
const std::string& getCreator() const { return _creator; }
bool hasFlavour() const;
const std::string& getFlavour() const { return _flavour; }
const Tags& getAcceptTags() const { return _acceptTags; }
const Tags& getRejectTags() const { return _rejectTags; }
@@ -183,68 +139,31 @@ private: // functions
};
class ZimSearcher : public zim::Searcher
{
public:
explicit ZimSearcher(zim::Searcher&& searcher)
: zim::Searcher(searcher)
{}
std::unique_lock<std::mutex> getLock() {
return std::unique_lock<std::mutex>(m_mutex);
}
virtual ~ZimSearcher() = default;
private:
std::mutex m_mutex;
};
template<typename, typename>
class ConcurrentCache;
template<typename, typename>
class MultiKeyCache;
using LibraryPtr = std::shared_ptr<Library>;
using ConstLibraryPtr = std::shared_ptr<const Library>;
// Some compiler we use don't have [[nodiscard]] attribute.
// We don't want to declare `create` with it in this case.
#define LIBKIWIX_NODISCARD
#if defined __has_cpp_attribute
#if __has_cpp_attribute (nodiscard)
#undef LIBKIWIX_NODISCARD
#define LIBKIWIX_NODISCARD [[nodiscard]]
#endif
#endif
/**
* A Library store several books.
*/
class Library: public std::enable_shared_from_this<Library>
class Library
{
std::map<std::string, kiwix::Book> m_books;
std::map<std::string, std::shared_ptr<Reader>> m_readers;
std::vector<kiwix::Bookmark> m_bookmarks;
class BookDB;
std::unique_ptr<BookDB> m_bookDB;
public:
typedef uint64_t Revision;
typedef std::vector<std::string> BookIdCollection;
typedef std::map<std::string, int> AttributeCounts;
typedef std::set<std::string> BookIdSet;
private:
Library();
public:
LIBKIWIX_NODISCARD static LibraryPtr create() {
return LibraryPtr(new Library());
}
Library();
~Library();
/**
* Library is not a copiable object. However it can be moved.
*/
Library(const Library& ) = delete;
Library(Library&& ) = delete;
Library(Library&& );
void operator=(const Library& ) = delete;
Library& operator=(Library&& ) = delete;
Library& operator=(Library&& );
/**
* Add a book to the library.
@@ -258,11 +177,6 @@ class Library: public std::enable_shared_from_this<Library>
*/
bool addBook(const Book& book);
/**
* A self-explanatory alias for addBook()
*/
bool addOrUpdateBook(const Book& book) { return addBook(book); }
/**
* Add a bookmark to the library.
*
@@ -271,7 +185,7 @@ class Library: public std::enable_shared_from_this<Library>
void addBookmark(const Bookmark& bookmark);
/**
* Remove a bookmark
* Remove a bookmarkk
*
* @param zimId The zimId of the bookmark.
* @param url The url of the bookmark.
@@ -279,78 +193,11 @@ class Library: public std::enable_shared_from_this<Library>
*/
bool removeBookmark(const std::string& zimId, const std::string& url);
/**
* Migrate all invalid bookmarks.
*
* All invalid bookmarks (ie pointing to unknown books, no check is made on bookmark pointing to
* invalid articles of valid book) will be migrated (if possible) to a better book.
* "Better book", will be determined using method `getBestTargetBookId`.
*
* @return A tuple<int, int>: <The number of bookmarks updated>, <Number of invalid bookmarks before migration was performed>.
*/
std::tuple<int, int> migrateBookmarks(MigrationMode migrationMode = ALLOW_DOWNGRADE);
/**
* Migrate all bookmarks associated to a specific book.
*
* All bookmarks associated to `sourceBookId` book will be migrated to a better book.
* "Better book", will be determined using method `getBestTargetBookId`.
*
* @param sourceBookId the source bookId of the bookmarks to migrate.
* @param migrationMode how we will find the best book.
* @return The number of bookmarks updated.
*/
int migrateBookmarks(const std::string& sourceBookId, MigrationMode migrationMode = UPGRADE_ONLY);
/**
* Migrate bookmarks
*
* Migrate all bookmarks pointing to `source` to `destination`.
*
* @param sourceBookId the source bookId of the bookmarks to migrate.
* @param targetBookId the destination bookId to migrate the bookmarks to.
* @return The number of bookmarks updated.
*/
int migrateBookmarks(const std::string& sourceBookId, const std::string& targetBookId);
/**
* Get the best available bookId for a bookmark.
*
* Given a bookmark, return the best available bookId.
* "best available bookId" is determined using heuristitcs based on book name, flavour and date.
*
* @param bookmark The bookmark to search the bookId for.
* @param migrationMode The migration mode to use.
* @return A bookId. Potentially empty string if no suitable book found.
*/
std::string getBestTargetBookId(const Bookmark& bookmark, MigrationMode migrationMode) const;
/**
* Get the best bookId for a combination of book's name, flavour and date.
*
* Given a bookName (mandatory), try to find the best book.
* If preferedFlavour is given, will try to find a book with the same flavour. If not found, return a book with a different flavour.
* If minDate is given, return a book newer than minDate. If not found, return a empty bookId.
*
* @param bookName The name of the book
* @param preferedFlavour The prefered flavour.
* @param minDate the minimal book date acceptable. Must be a string in the format "YYYY-MM-DD".
* @return A bookId corresponding to the query, or empty string if not found.
*/
std::string getBestTargetBookId(const std::string& bookName, const std::string& preferedFlavour="", const std::string& minDate="") const;
// XXX: This is a non-thread-safe operation
const Book& getBookById(const std::string& id) const;
// XXX: This is a non-thread-safe operation
Book& getBookById(const std::string& id);
const Book& getBookByPath(const std::string& path) const;
Book getBookByIdThreadSafe(const std::string& id) const;
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
return getSearcherByIds(BookIdSet{id});
}
std::shared_ptr<ZimSearcher> getSearcherByIds(const BookIdSet& ids);
Book& getBookByPath(const std::string& path);
std::shared_ptr<Reader> getReaderById(const std::string& id);
/**
* Remove a book from the library.
@@ -392,13 +239,6 @@ class Library: public std::enable_shared_from_this<Library>
*/
std::vector<std::string> getBooksLanguages() const;
/**
* Get all languagues of the books in the library with counts.
*
* @return A list of languages with the count of books in each language.
*/
AttributeCounts getBooksLanguagesWithCounts() const;
/**
* Get all categories of the books in the library.
*
@@ -434,6 +274,17 @@ class Library: public std::enable_shared_from_this<Library>
*/
BookIdCollection getBooksIds() const;
/**
* Filter the library and generate a new one with the keep elements.
*
* This is equivalent to `listBookIds(ALL, UNSORTED, search)`.
*
* @param search List only books with search in the title or description.
* @return The list of bookIds corresponding to the query.
*/
DEPRECATED BookIdCollection filter(const std::string& search) const;
/**
* Filter the library and return the id of the keep elements.
*
@@ -453,59 +304,45 @@ class Library: public std::enable_shared_from_this<Library>
void sort(BookIdCollection& bookIds, supportedListSortBy sortBy, bool ascending) const;
/**
* Return the current revision of the library.
* List books in the library.
*
* The revision of the library is updated (incremented by one) by
* the addBook() and removeBookById() operations.
*
* @return Current revision of the library.
* @param mode The mode of listing :
* - LOCAL  : list only local books (with a path).
* - REMOTE : list only remote books (with an url).
* - VALID  : list only valid books (without a path or with a
* path pointing to a valid zim file).
* - NOLOCAL : list only books without valid path.
* - NOREMOTE : list only books without url.
* - NOVALID : list only books not valid.
* - ALL : Do not do any filter (LOCAL or REMOTE)
* - Flags can be combined.
* @param sortBy Attribute to sort by the book list.
* @param search List only books with search in the title, description.
* @param language List only books in this language.
* @param creator List only books of this creator.
* @param publisher List only books of this publisher.
* @param maxSize Do not list book bigger than maxSize.
* Set to 0 to cancel this filter.
* @return The list of bookIds corresponding to the query.
*/
Revision getRevision() const;
/**
* Remove books that have not been updated since the specified revision.
*
* @param rev the library revision to use
* @return Count of books that were removed by this operation.
*/
uint32_t removeBooksNotUpdatedSince(Revision rev);
DEPRECATED BookIdCollection listBooksIds(
int supportedListMode = ALL,
supportedListSortBy sortBy = UNSORTED,
const std::string& search = "",
const std::string& language = "",
const std::string& creator = "",
const std::string& publisher = "",
const std::vector<std::string>& tags = {},
size_t maxSize = 0) const;
friend class OPDSDumper;
friend class libXMLDumper;
private: // types
typedef const std::string& (Book::*BookStrPropMemFn)() const;
struct Entry : Book
{
Library::Revision lastUpdatedRevision = 0;
};
private: // functions
AttributeCounts getBookAttributeCounts(BookStrPropMemFn p) const;
std::vector<std::string> getBookPropValueSet(BookStrPropMemFn p) const;
BookIdCollection filterViaBookDB(const Filter& filter) const;
std::string getBestFromBookCollection(BookIdCollection books, const Bookmark& bookmark, MigrationMode migrationMode) const;
unsigned int getBookCount_not_protected(const bool localBooks, const bool remoteBooks) const;
void updateBookDB(const Book& book);
void dropCache(const std::string& bookId);
private: //data
mutable std::recursive_mutex m_mutex;
Library::Revision m_revision;
std::map<std::string, Entry> m_books;
using ArchiveCache = ConcurrentCache<std::string, std::shared_ptr<zim::Archive>>;
std::unique_ptr<ArchiveCache> mp_archiveCache;
using SearcherCache = MultiKeyCache<std::string, std::shared_ptr<ZimSearcher>>;
std::unique_ptr<SearcherCache> mp_searcherCache;
std::vector<kiwix::Bookmark> m_bookmarks;
std::unique_ptr<Xapian::WritableDatabase> m_bookDB;
};
// We don't need it anymore and we don't want to polute any other potential usage
// of `LIBKIWIX_NODISCARD` token.
#undef LIBKIWIX_NODISCARD
}
#endif

View File

@@ -22,10 +22,10 @@
#include "book.h"
#include "library.h"
#include "reader.h"
#include <string>
#include <vector>
#include <memory>
namespace pugi {
class xml_document;
@@ -34,25 +34,26 @@ class xml_document;
namespace kiwix
{
class LibraryManipulator
{
public: // functions
explicit LibraryManipulator(LibraryPtr library);
virtual ~LibraryManipulator();
class LibraryManipulator {
public:
virtual ~LibraryManipulator() {}
virtual bool addBookToLibrary(Book book) = 0;
virtual void addBookmarkToLibrary(Bookmark bookmark) = 0;
};
LibraryPtr getLibrary() const { return library; }
bool addBookToLibrary(const Book& book);
void addBookmarkToLibrary(const Bookmark& bookmark);
uint32_t removeBooksNotUpdatedSince(Library::Revision rev);
protected: // overrides
virtual void bookWasAddedToLibrary(const Book& book);
virtual void bookmarkWasAddedToLibrary(const Bookmark& bookmark);
virtual void booksWereRemovedFromLibrary();
private: // data
LibraryPtr library;
class DefaultLibraryManipulator : public LibraryManipulator {
public:
DefaultLibraryManipulator(Library* library) :
library(library) {}
virtual ~DefaultLibraryManipulator() {}
bool addBookToLibrary(Book book) {
return library->addBook(book);
}
void addBookmarkToLibrary(Bookmark bookmark) {
library->addBookmark(bookmark);
}
private:
kiwix::Library* library;
};
/**
@@ -60,12 +61,10 @@ class LibraryManipulator
*/
class Manager
{
public: // types
typedef std::vector<std::string> Paths;
public: // functions
explicit Manager(LibraryManipulator manipulator);
explicit Manager(LibraryPtr library);
public:
Manager(LibraryManipulator* manipulator);
Manager(Library* library);
~Manager();
/**
* Read a `library.xml` and add book in the file to the library.
@@ -73,22 +72,10 @@ class Manager
* @param path The (utf8) path to the `library.xml`.
* @param readOnly Set if the libray path could be overwritten latter with
* updated content.
* @param trustLibrary use book metadata coming from XML.
* @return True if file has been properly parsed.
*/
bool readFile(const std::string& path, bool readOnly = true, bool trustLibrary = true);
/**
* Sync the contents of the library with one or more `library.xml` files.
*
* The metadata of the library files is trusted unconditionally.
* Any books not present in the input library.xml files are removed
* from the library.
*
* @param paths The (utf8) paths to the `library.xml` files.
*/
void reload(const Paths& paths);
/**
* Load a library content store in the string.
*
@@ -163,7 +150,8 @@ class Manager
uint64_t m_itemsPerPage = 0;
protected:
kiwix::LibraryManipulator manipulator;
kiwix::LibraryManipulator* manipulator;
bool mustDeleteManipulator;
bool readBookFromPath(const std::string& path, Book* book);
bool parseXmlDom(const pugi::xml_document& doc,

View File

@@ -4,15 +4,27 @@ headers = [
'common.h',
'library.h',
'manager.h',
'libxml_dumper.h',
'opds_dumper.h',
'downloader.h',
'reader.h',
'entry.h',
'searcher.h',
'search_renderer.h',
'server.h',
'spelling_correction.h',
'kiwixserve.h',
'name_mapper.h',
'tools.h',
'version.h',
'i18n.h'
'name_mapper.h'
]
install_headers(headers, subdir:'kiwix')
install_headers(
'tools/base64.h',
'tools/networkTools.h',
'tools/otherTools.h',
'tools/pathTools.h',
'tools/regexTools.h',
'tools/stringTools.h',
subdir:'kiwix/tools'
)

View File

@@ -22,8 +22,6 @@
#include <string>
#include <map>
#include <memory>
#include <mutex>
namespace kiwix
{
@@ -33,15 +31,15 @@ class Library;
class NameMapper {
public:
virtual ~NameMapper() = default;
virtual std::string getNameForId(const std::string& id) const = 0;
virtual std::string getIdForName(const std::string& name) const = 0;
virtual std::string getNameForId(const std::string& id) = 0;
virtual std::string getIdForName(const std::string& name) = 0;
};
class IdNameMapper : public NameMapper {
public:
virtual std::string getNameForId(const std::string& id) const { return id; };
virtual std::string getIdForName(const std::string& name) const { return name; };
virtual std::string getNameForId(const std::string& id) { return id; };
virtual std::string getIdForName(const std::string& name) { return name; };
};
class HumanReadableNameMapper : public NameMapper {
@@ -50,34 +48,13 @@ class HumanReadableNameMapper : public NameMapper {
std::map<std::string, std::string> m_nameToId;
public:
HumanReadableNameMapper(const kiwix::Library& library, bool withAlias);
HumanReadableNameMapper(kiwix::Library& library, bool withAlias);
virtual ~HumanReadableNameMapper() = default;
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;
private:
void mapName(const kiwix::Library& lib, std::string name, std::string id);
virtual std::string getNameForId(const std::string& id);
virtual std::string getIdForName(const std::string& name);
};
class UpdatableNameMapper : public NameMapper {
typedef std::shared_ptr<NameMapper> NameMapperHandle;
public:
UpdatableNameMapper(std::shared_ptr<Library> library, bool withAlias);
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;
void update();
private:
NameMapperHandle currentNameMapper() const;
private:
mutable std::mutex mutex;
std::shared_ptr<Library> library;
NameMapperHandle nameMapper;
const bool withAlias;
};
}

View File

@@ -26,9 +26,11 @@
#include <pugixml.hpp>
#include "tools/base64.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "library.h"
#include "name_mapper.h"
#include "library_dumper.h"
#include "reader.h"
using namespace std;
@@ -39,10 +41,11 @@ namespace kiwix
* A tool to dump a `Library` into a opds stream.
*
*/
class OPDSDumper : public LibraryDumper
class OPDSDumper
{
public:
OPDSDumper(const Library* library, const NameMapper* NameMapper);
OPDSDumper() = default;
OPDSDumper(Library* library);
~OPDSDumper();
/**
@@ -59,32 +62,48 @@ class OPDSDumper : public LibraryDumper
*
* @param bookIds the ids of the books to include in the feed
* @param query the query used to obtain the list of book ids
* @param partial whether the feed should include partial or complete entries
* @return The OPDS feed.
*/
std::string dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const;
/**
* Dump the OPDS complete entry document.
*
* @param bookId the id of the book
* @return The OPDS complete entry document.
*/
std::string dumpOPDSCompleteEntry(const std::string& bookId) const;
std::string dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query) const;
/**
* Dump the categories OPDS feed.
*
* @param categories list of category names
* @return The OPDS feed.
*/
std::string categoriesOPDSFeed() const;
std::string categoriesOPDSFeed(const std::vector<std::string>& categories) const;
/**
* Dump the languages OPDS feed.
* Set the id of the library.
*
* @return The OPDS feed.
* @param id the id to use.
*/
std::string languagesOPDSFeed() const;
void setLibraryId(const std::string& id) { this->libraryId = id;}
/**
* Set the root location used when generating url.
*
* @param rootLocation the root location to use.
*/
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
/**
* Set some informations about the search results.
*
* @param totalResult the total number of results of the search.
* @param startIndex the start index of the result.
* @param count the number of result of the current set (or page).
*/
void setOpenSearchInfo(int totalResult, int startIndex, int count);
protected:
kiwix::Library* library;
std::string libraryId;
std::string rootLocation;
int m_totalResults;
int m_startIndex;
int m_count;
};
}

502
include/reader.h Normal file
View File

@@ -0,0 +1,502 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_READER_H
#define KIWIX_READER_H
#include <stdio.h>
#include <zim/zim.h>
#include <zim/archive.h>
#include <exception>
#include <map>
#include <sstream>
#include <string>
#include "common.h"
#include "entry.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
using namespace std;
namespace kiwix
{
/**
* The SuggestionItem is a helper class that contains the info about a single
* suggestion item.
*/
class SuggestionItem
{
// Functions
private:
// Create a sugggestion item.
explicit SuggestionItem(std::string title, std::string normalizedTitle,
std::string path, std::string snippet = "") :
title(title),
normalizedTitle(normalizedTitle),
path(path),
snippet(snippet) {}
public:
const std::string getTitle() {return title;}
const std::string getNormalizedTitle() {return normalizedTitle;}
const std::string getPath() {return path;}
const std::string getSnippet() {return snippet;}
const bool hasSnippet() {return !snippet.empty();}
// Data
private:
std::string title;
std::string normalizedTitle;
std::string path;
std::string snippet;
friend class Reader;
};
/**
* The Reader class is the class who allow to get an entry content from a zim
* file.
*/
using SuggestionsList_t = std::vector<SuggestionItem>;
class Reader
{
public:
/**
* Create a Reader to read a zim file specified by zimFilePath.
*
* @param zimFilePath The path to the zim file to read.
* The zim file can be splitted (.zimaa, .zimab, ...).
* In this case, the file path must still point to the
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
explicit Reader(const string zimFilePath);
#ifndef _WIN32
explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default;
/**
* Get the number of "displayable" entries in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'text/html' MIMEtype specified in the metadata.
* Else return the number of entries in the 'A' namespace.
*/
unsigned int getArticleCount() const;
/**
* Get the number of media in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
* the metadata.
* Else return the number of entries in the 'I' namespace.
*/
unsigned int getMediaCount() const;
/**
* Get the number of all entries in the zim file.
*
* @return Return the number of all the entries, whatever their MIMEtype or
* their namespace.
*/
unsigned int getGlobalCount() const;
/**
* Get the path of the zim file.
*
* @return the path of the zim file as given in the constructor.
*/
string getZimFilePath() const;
/**
* Get the Id of the zim file.
*
* @return The uuid stored in the zim file.
*/
string getId() const;
/**
* Get a random page.
*
* @return A random Entry. The entry is picked from all entries in
* the 'A' namespace.
* The main entry is excluded from the potential results.
*/
Entry getRandomPage() const;
/**
* Get the entry of the main page.
*
* @return Entry of the main page as specified in the zim file.
*/
Entry getMainPage() const;
/**
* Get the content of a metadata.
*
* @param[in] name The name of the metadata.
* @param[out] value The value will be set to the content of the metadata.
* @return True if it was possible to get the content of the metadata.
*/
bool getMetadata(const string& name, string& value) const;
/**
* Get the name of the zim file.
*
* @return The name of the zim file as specified in the zim metadata.
*/
string getName() const;
/**
* Get the title of the zim file.
*
* @return The title of zim file as specified in the zim metadata.
* If no title has been set, return a title computed from the
* file path.
*/
string getTitle() const;
/**
* Get the creator of the zim file.
*
* @return The creator of the zim file as specified in the zim metadata.
*/
string getCreator() const;
/**
* Get the publisher of the zim file.
*
* @return The publisher of the zim file as specified in the zim metadata.
*/
string getPublisher() const;
/**
* Get the date of the zim file.
*
* @return The date of the zim file as specified in the zim metadata.
*/
string getDate() const;
/**
* Get the description of the zim file.
*
* @return The description of the zim file as specified in the zim metadata.
* If no description has been set, return the subtitle.
*/
string getDescription() const;
/**
* Get the long description of the zim file.
*
* @return The long description of the zim file as specifed in the zim metadata.
*/
string getLongDescription() const;
/**
* Get the language of the zim file.
*
* @return The language of the zim file as specified in the zim metadata.
*/
string getLanguage() const;
/**
* Get the license of the zim file.
*
* @return The license of the zim file as specified in the zim metadata.
*/
string getLicense() const;
/**
* Get the tags of the zim file.
*
* @param original If true, return the original tags as specified in the zim metadata.
* Else, try to convert it to the new 'normalized' format.
* @return The tags of the zim file.
*/
string getTags(bool original=false) const;
/**
* Get the value (as a string) of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
*/
string getTagStr(const std::string& tagName) const;
/**
* Get the boolean value of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The boolean value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
* std::domain_error if the value of the tag cannot be convert to bool.
*/
bool getTagBool(const std::string& tagName) const;
/**
* Get the relations of the zim file.
*
* @return The relation of the zim file as specified in the zim metadata.
*/
string getRelation() const;
/**
* Get the flavour of the zim file.
*
* @return The flavour of the zim file as specified in the zim metadata.
*/
string getFlavour() const;
/**
* Get the source of the zim file.
*
* @return The source of the zim file as specified in the zim metadata.
*/
string getSource() const;
/**
* Get the scraper of the zim file.
*
* @return The scraper of the zim file as specified in the zim metadata.
*/
string getScraper() const;
/**
* Get the origId of the zim file.
*
* The origId is only used in the case of patch zim file and is the Id
* of the original zim file.
*
* @return The origId of the zim file as specified in the zim metadata.
*/
string getOrigId() const;
/**
* Get the favicon of the zim file.
*
* @param[out] content The content of the favicon.
* @param[out] mimeType The mimeType of the favicon.
* @return True if a favicon has been found.
*/
bool getFavicon(string& content, string& mimeType) const;
/**
* Get an entry associated to an path.
*
* @param path The path of the entry.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromPath(const std::string& path) const;
/**
* Get an entry associated to an url encoded path.
*
* Equivalent to `getEntryFromPath(urlDecode(path));`
*
* @param path The url encoded path.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromEncodedPath(const std::string& path) const;
/**
* Get un entry associated to a title.
*
* @param title The title.
* @return The entry
* throw NoEntry If no entry correspond to the url.
*/
Entry getEntryFromTitle(const std::string& title) const;
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param reset If true, remove previous suggestions in the internal vector.
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions have been added to the internal vector.
*/
DEPRECATED bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results);
/**
* Check if the path exists in the zim file.
*
* @param path the path to check.
* @return True if the path exists in the zim file.
*/
bool pathExists(const string& path) const;
/**
* Check if the zim file has a embedded fulltext index.
*
* @return True if the zim file has a embedded fulltext index
* and is not split (else the fulltext is not accessible).
*/
bool hasFulltextIndex() const;
/**
* Get potential case title variations for a title.
*
* @param title a title.
* @return the list of variantions.
*/
std::vector<std::string> getTitleVariants(const std::string& title) const;
/**
* Get the next suggestion title.
*
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
DEPRECATED bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
*
* @param[out] title the title of the suggestion.
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
DEPRECATED bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
*
* @return True if zim file have a checksum.
*/
bool canCheckIntegrity() const;
/**
* Check is zim file is corrupted.
*
* @return True if zim file is corrupted.
*/
bool isCorrupted() const;
/**
* Return the total size of the zim file.
*
* If zim file is split, return the sum of all parts' size.
*
* @return Size of the size file is KiB.
*/
unsigned int getFileSize() const;
/**
* Get the zim file handler.
*
* @return The libzim file handler.
*/
zim::Archive* getZimArchive() const;
protected:
std::unique_ptr<zim::Archive> zimArchive;
std::string zimFilePath;
SuggestionsList_t suggestions;
SuggestionsList_t::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
};
}
#endif

View File

@@ -21,12 +21,11 @@
#define KIWIX_SEARCH_RENDERER_H
#include <string>
#include <zim/search.h>
#include "library.h"
namespace kiwix
{
class Searcher;
class NameMapper;
/**
* The SearcherRenderer class is used to render a search result to a html page.
@@ -35,25 +34,21 @@ class SearchRenderer
{
public:
/**
* Construct a SearchRenderer from a SearchResultSet.
* The default constructor.
*
* @param srs The `SearchResultSet` to render.
* @param start The start offset used for the srs.
* @param estimatedResultCount The estimatedResultCount of the whole search
* @param humanReadableName The global zim's humanReadableName.
* Used to generate pagination links.
*/
SearchRenderer(zim::SearchResultSet srs, unsigned int start, unsigned int estimatedResultCount);
SearchRenderer(Searcher* searcher, NameMapper* mapper);
~SearchRenderer();
/**
* Set the search pattern used to do the search
*/
void setSearchPattern(const std::string& pattern);
/**
* Set the querystring used to select books
* Set the search content id.
*/
void setSearchBookQuery(const std::string& bookQuery);
void setSearchContent(const std::string& name);
/**
* Set protocol prefix.
@@ -72,47 +67,22 @@ class SearchRenderer
this->pageLength = pageLength;
}
/**
* set user language
*/
void setUserLang(const std::string& lang){
this->userlang = lang;
}
/**
* Generate the html page with the resutls of the search.
*
* @param mapper The `NameMapper` to use to do the rendering.
* @param library The `Library` to use to look up book details for search results.
May be nullptr. In this case, bookName is not set in the rendered string.
* @return The html string
*/
std::string getHtml(const NameMapper& mapper, const Library* library);
/**
* Generate the xml page with the resutls of the search.
*
* @param mapper The `NameMapper` to use to do the rendering.
* @param library The `Library` to use to look up book details for search results.
May be nullptr. In this case, bookName is not set in the rendered string.
* @return The xml string
*/
std::string getXml(const NameMapper& mapper, const Library* library);
protected: // function
std::string renderTemplate(const std::string& tmpl_str, const NameMapper& mapper, const Library *library);
std::string getHtml();
protected:
std::string beautifyInteger(const unsigned int number);
zim::SearchResultSet m_srs;
std::string searchBookQuery;
Searcher* mp_searcher;
NameMapper* mp_nameMapper;
std::string searchContent;
std::string searchPattern;
std::string protocolPrefix;
std::string searchProtocolPrefix;
unsigned int pageLength;
unsigned int estimatedResultCount;
unsigned int resultStart;
std::string userlang = "en";
};

171
include/searcher.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SEARCHER_H
#define KIWIX_SEARCHER_H
#include <stdio.h>
#include <stdlib.h>
#include <unicode/putil.h>
#include <algorithm>
#include <cctype>
#include <locale>
#include <string>
#include <memory>
#include <vector>
#include "tools/pathTools.h"
#include "tools/stringTools.h"
using namespace std;
namespace kiwix
{
class Reader;
class Result
{
public:
virtual ~Result(){};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual std::string get_zimId() = 0;
};
struct SearcherInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
*/
class Searcher
{
public:
/**
* The default constructor.
*/
Searcher();
~Searcher();
/**
* Add a reader (containing embedded fulltext index) to the search.
*
* @param reader The Reader for the zim containing the fulltext index.
* @return true if the reader has been added.
* false if the reader cannot be added (no embedded fulltext index present)
*/
bool add_reader(Reader* reader);
Reader* get_reader(int index);
/**
* Start a search on the zim associated to the Searcher.
*
* Search results should be retrived using the getNextResult method.
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void search(const std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a geographique search.
* The search return result for entry in a disc of center latitude/longitude
* and radius distance.
*
* Search results should be retrived using the getNextResult method.
*
* @param latitude The latitude of the center point.
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
/**
* Start a suggestion search.
* The search made depend of the "version" of the embedded index.
* - If the index is newer enough and have a title namespace, the search is
* made in the titles only.
* - Else the search is made on the whole article content.
* In any case, the search is made "partial" (as adding '*' at the end of the query)
*
* @param search The search query.
* @param verbose print some info on stdout if true.
*/
void suggestions(std::string& search, const bool verbose = false);
/**
* Get the next result of a started search.
* This is the method to use to loop hover the search results.
*/
Result* getNextResult();
/**
* Restart the previous search.
* Next call to getNextResult will return the first result.
*/
void restart_search();
/**
* Get a estimation of the result count.
*/
unsigned int getEstimatedResultCount();
unsigned int getResultStart() { return resultStart; }
unsigned int getResultEnd() { return resultEnd; }
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
std::vector<Reader*> readers;
std::unique_ptr<SearcherInternal> internal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int resultEnd;
private:
void reset();
};
}
#endif

View File

@@ -22,7 +22,6 @@
#include <string>
#include <memory>
#include "tools.h"
namespace kiwix
{
@@ -37,7 +36,7 @@ namespace kiwix
*
* @param library The library to serve.
*/
Server(std::shared_ptr<Library> library, std::shared_ptr<NameMapper> nameMapper=nullptr);
Server(Library* library, NameMapper* nameMapper=nullptr);
virtual ~Server();
@@ -52,42 +51,26 @@ namespace kiwix
void stop();
void setRoot(const std::string& root);
void setAddress(const std::string& addr);
void setAddress(const std::string& addr) { m_addr = addr; }
void setPort(int port) { m_port = port; }
void setNbThreads(int threads) { m_nbThreads = threads; }
void setMultiZimSearchLimit(unsigned int limit) { m_multizimSearchLimit = limit; }
void setIpConnectionLimit(int limit) { m_ipConnectionLimit = limit; }
void setVerbose(bool verbose) { m_verbose = verbose; }
void setIndexTemplateString(const std::string& indexTemplateString) { m_indexTemplateString = indexTemplateString; }
void setTaskbar(bool withTaskbar, bool withLibraryButton)
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
void setBlockExternalLinks(bool blockExternalLinks)
{ m_blockExternalLinks = blockExternalLinks; }
void setCatalogOnlyMode(bool enable) { m_catalogOnlyMode = enable; }
void setContentServerUrl(std::string url) { m_contentServerUrl = url; }
void setIpMode(IpMode mode) { m_ipMode = mode; }
int getPort() const;
IpAddress getAddress() const;
IpMode getIpMode() const;
std::vector<std::string> getServerAccessUrls() const;
protected:
std::shared_ptr<Library> mp_library;
std::shared_ptr<NameMapper> mp_nameMapper;
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_root = "";
IpAddress m_addr;
std::string m_indexTemplateString = "";
std::string m_addr = "";
int m_port = 80;
int m_nbThreads = 1;
unsigned int m_multizimSearchLimit = 0;
bool m_verbose = false;
bool m_withTaskbar = true;
bool m_withLibraryButton = true;
bool m_blockExternalLinks = false;
IpMode m_ipMode = IpMode::AUTO;
int m_ipConnectionLimit = 0;
bool m_catalogOnlyMode = false;
std::string m_contentServerUrl;
std::unique_ptr<InternalServer> mp_server;
};
}

View File

@@ -1,58 +0,0 @@
/*
* Copyright (C) 2025 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SPELLING_CORRECTION_H
#define KIWIX_SPELLING_CORRECTION_H
#include <filesystem>
#include <memory>
#include <string>
#include <vector>
namespace zim
{
class Archive;
}
namespace Xapian
{
class Database;
}
namespace kiwix
{
class SpellingsDB
{
public: // functions
SpellingsDB(const zim::Archive& archive, std::filesystem::path cacheDirPath);
~SpellingsDB();
SpellingsDB(const SpellingsDB& ) = delete;
void operator=(const SpellingsDB& ) = delete;
std::vector<std::string> getSpellingCorrections(const std::string& word, uint32_t maxCount) const;
private: // data
std::unique_ptr<Xapian::Database> impl_;
};
} // namespace kiwix
#endif // KIWIX_SPELLING_CORRECTION_H

View File

@@ -1,271 +0,0 @@
/*
* Copyright 2021 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_TOOLS_H
#define KIWIX_TOOLS_H
#include <string>
#include <vector>
#include <map>
#include <cstdint>
#include "common.h"
namespace kiwix
{
struct IpAddress
{
std::string addr; // IPv4 address
std::string addr6; // IPv6 address
};
typedef std::pair<std::string, std::string> LangNameCodePair;
typedef std::vector<LangNameCodePair> FeedLanguages;
typedef std::vector<std::string> FeedCategories;
/**
* Return the current directory.
*
* @return the current directory (utf8 encoded)
*/
std::string getCurrentDirectory();
/** Return the path of the executable
*
* Some application may be packaged in auto extractible archive (Appimage) and the
* real executable is different of the path of the archive.
* If `realPathOnly` is true, return the path of the real executable instead of the
* archive launched by the user.
*
* @param realPathOnly If we must return the real path of the executable.
* @return the path of the executable (utf8 encoded)
*/
std::string getExecutablePath(bool realPathOnly = false);
/** Tell if the path is a relative path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path A utf8 encoded path.
* @return true if the path is relative.
*/
bool isRelativePath(const std::string& path);
/** Append a path to another one.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param basePath the base path.
* @param relativePath a path to add to the base path, must be a relative path.
* @return The concatenation of the paths, using the right separator.
*/
std::string appendToDirectory(const std::string& basePath, const std::string& relativePath);
/** Remove the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The parent directory (or empty string if none).
*/
std::string removeLastPathElement(const std::string& path);
/** Get the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The base name of the path or empty string if none (ending with a separator).
*/
std::string getLastPathElement(const std::string& path);
/** Compute the absolute path of a relative path based on another one
*
* Equivalent to appendToDirectory followed by a normalization of the path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path (if empty, current directory is taken).
* @param relativePath the relative path.
* @return a absolute path.
*/
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
/** Compute the relative path of a path relative to another one
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path.
* @param absolutePath the absolute path to find the relative path for.
* @return a relative path (pointing to absolutePath, relative to path).
*/
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
/** Sleep the current thread.
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* @param milliseconds The number of milliseconds to wait for.
*/
void sleep(unsigned int milliseconds);
/** Split a string
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* Assuming text = "foo:;bar;baz,oups;"
*
* split(text, ":;", true, true) => ["foo", ":", ";", "bar", ";", "baz,oups", ";"]
* split(text, ":;", true, false) => ["foo", "bar", "baz,oups"] (default)
* split(text, ":;", false, true) => ["foo", ":", "", ";", "bar", ";", "baz,oups", ";", ""]
* split(text, ":;", false, false) => ["foo", "", "bar", "baz,oups", ""]
*
* @param str The string to split.
* @param delims A string of potential delimiters.
* Each charater in the string can be a individual delimiters.
* @param dropEmpty true if empty part must be dropped from the result.
* @param keepDelim true if delimiter must be included from the result.
* @return a list of part (potentially containing delimiters)
*/
std::vector<std::string> split(const std::string& str, const std::string& delims, bool dropEmpty=true, bool keepDelim = false);
/** Convert language code from iso2 code to iso3
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate locales.
*
* @param a2code a iso2 code string.
* @return the corresponding iso3 code.
* @throw std::out_of_range if iso2 code is not known.
*/
std::string converta2toa3(const std::string& a2code);
/** Extracts content from given file.
*
* This function provides content of a file provided it's path.
*
* @param path The absolute path provided in string format.
* @return Content of corresponding file in string format.
*/
std::string getFileContent(const std::string& path);
/** Checks if file exists.
*
* This function returns boolean stating if file exists.
*
* @param path The absolute path provided in string format.
* @return Boolean representing if file exists or not.
*/
bool fileExists(const std::string& path);
/** Checks if file is readable.
*
* This function returns boolean stating if file is readable.
*
* @param path The absolute path provided in string format.
* @return Boolean representing if file is readale or not.
*/
bool fileReadable(const std::string& path);
/** Provides mimetype from filename.
*
* This function provides mimetype from file-name.
*
* @param filename string containing filename.
* @return mimetype from filename in string format.
*/
std::string getMimeTypeForFile(const std::string& filename);
/** Provides all available network interfaces
*
* This function provides the available IPv4 and IPv6 network interfaces
* as a map from the interface name to its IPv4 and/or IPv6 address(es).
*/
std::map<std::string, IpAddress> getNetworkInterfacesIPv4Or6();
/** Provides all available IPv4 network interfaces
*
* This function provides the available IPv4 network interfaces
* as a map from the interface name to its IPv4 address.
*
* Provided for backward compatibility with libkiwix v13.1.0.
*/
std::map<std::string, std::string> getNetworkInterfaces();
/** Provides the best IP address
* This function provides the best IP addresses for both ipv4 and ipv6 protocols,
* in an IpAddress struct, based on the list given by getNetworkInterfacesIPv4Or6()
*/
IpAddress getBestPublicIps();
/** Provides the best IPv4 adddress
* Equivalent to getBestPublicIp(false). Provided for backward compatibility
* with libkiwix v13.1.0.
*/
std::string getBestPublicIp();
/** Converts file size to human readable format.
*
* This function will convert a number to its equivalent size using units.
*
* @param number file size in bytes.
* @return a human-readable string representation of the size, e.g., "2.3 KB", "1.8 MB", "5.2 GB".
*/
std::string beautifyFileSize(uint64_t number);
/**
* Load languages stored in an OPDS stream.
*
* @param content the OPDS stream.
* @return vector containing pairs of language code and their corresponding full language name.
*/
FeedLanguages readLanguagesFromFeed(const std::string& content);
/**
* Load categories stored in an OPDS stream .
*
* @param content the OPDS stream.
* @return vector containing category strings.
*/
FeedCategories readCategoriesFromFeed(const std::string& content);
/**
* Retrieve the full language name associated with a given ISO 639-3 language code.
*
* @param lang ISO 639-3 language code.
* @return full language name.
*/
std::string getLanguageSelfName(const std::string& lang);
/**
* Slugifies the filename by converting any characters reserved by the operating
* system to '_'. Note filename is only the file name and not a path.
*
* @param filename Valid UTF-8 encoded file name string.
* @return slugified string.
*/
std::string getSlugifiedFileName(const std::string& filename);
}
#endif // KIWIX_TOOLS_H

View File

@@ -23,23 +23,18 @@
#include <string>
#include <vector>
#include <map>
#include <cstdlib>
#include <zim/zim.h>
#include <mustache.hpp>
#include "stringTools.h"
namespace pugi {
class xml_node;
}
namespace zim {
class SuggestionItem;
}
namespace kiwix
{
void sleep(unsigned int milliseconds);
std::string nodeToString(const pugi::xml_node& node);
std::string converta2toa3(const std::string& a2code);
/*
* Convert all format tag string to new format
@@ -49,44 +44,13 @@ namespace kiwix
const std::string& tagName);
bool convertStrToBool(const std::string& value);
using MimeCounterType = std::map<const std::string, zim::entry_index_type>;
MimeCounterType parseMimetypeCounter(const std::string& counterData);
std::string gen_date_str();
std::string gen_uuid(const std::string& s);
// if s is empty then returns kainjow::mustache::data(false)
// otherwise kainjow::mustache::data(value)
kainjow::mustache::data onlyAsNonEmptyMustacheValue(const std::string& s);
std::string render_template(const std::string& template_str, kainjow::mustache::data data);
template<typename T>
T getEnvVar(const char* name, const T& defaultValue)
{
try {
const char* envString = std::getenv(name);
if (envString == nullptr) {
throw std::runtime_error("Environment variable not set");
}
return extractFromString<T>(envString);
} catch (...) {}
return defaultValue;
}
class Suggestions
{
public:
Suggestions();
void add(const zim::SuggestionItem& suggestion);
void addFTSearchSuggestion(const std::string& uiLang,
const std::string& query);
std::string getJSON() const;
private:
kainjow::mustache::data m_data;
};
}
#endif

View File

@@ -26,12 +26,23 @@
std::string WideToUtf8(const std::wstring& wstr);
std::wstring Utf8ToWide(const std::string& str);
#endif
bool isRelativePath(const std::string& path);
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
std::string removeLastPathElement(const std::string& path);
std::string appendToDirectory(const std::string& directoryPath, const std::string& filename);
unsigned int getFileSize(const std::string& path);
std::string getFileSizeAsString(const std::string& path);
std::string getFileContent(const std::string& path);
bool fileExists(const std::string& path);
bool makeDirectory(const std::string& path);
std::string makeTmpDirectory();
bool copyFile(const std::string& sourcePath, const std::string& destPath);
std::string getLastPathElement(const std::string& path);
std::string getExecutablePath(bool realPathOnly = false);
std::string getCurrentDirectory();
std::string getDataDirectory();
bool writeTextFile(const std::string& path, const std::string& content);
std::string getMimeTypeForFile(const std::string& filename);
#endif

View File

@@ -26,5 +26,11 @@ bool matchRegex(const std::string& content, const std::string& regex);
std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex);
std::string appendToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement);
std::string prependToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement);
#endif

View File

@@ -21,16 +21,15 @@
#define KIWIX_STRINGTOOLS_H
#include <unicode/unistr.h>
#include <unicode/locid.h>
#include <string>
#include <vector>
#include <sstream>
#include <stdexcept>
namespace kiwix
{
std::string beautifyInteger(uint64_t number);
std::string beautifyFileSize(uint64_t number);
void printStringInHexadecimal(const char* s);
void printStringInHexadecimal(icu::UnicodeString s);
void stringReplacement(std::string& str,
@@ -41,34 +40,16 @@ std::string encodeDiples(const std::string& str);
std::string removeAccents(const std::string& text);
void loadICUExternalTables();
class ICULanguageInfo
{
public:
explicit ICULanguageInfo(const std::string& langCode);
std::string iso3Code() const;
std::string selfName() const;
private:
const icu::Locale locale;
};
std::string escapeForJSON(const std::string& s, bool escapeQuote = true);
/* urlEncode() is the equivalent of JS encodeURIComponent(), with the only
* difference that the slash (/) symbol is NOT encoded. */
std::string urlEncode(const std::string& value);
std::string urlEncode(const std::string& value, bool encodeReserved = false);
std::string urlDecode(const std::string& value, bool component = false);
std::vector<std::string> split(const std::string& str, const std::string& delims, bool trimEmpty = true, bool keepDelim = false);
std::string join(const std::vector<std::string>& list, const std::string& sep);
std::string ucAll(const std::string& word);
std::string lcAll(const std::string& word);
std::string ucFirst(const std::string& word);
std::string lcFirst(const std::string& word);
/* This function is broken, related Github issue
* https://github.com/kiwix/libkiwix/issues/1188 */
std::string toTitle(const std::string& word);
std::string normalize(const std::string& word);
@@ -85,19 +66,9 @@ T extractFromString(const std::string& str) {
std::istringstream iss(str);
T ret;
iss >> ret;
if(iss.fail() || !iss.eof()) {
throw std::invalid_argument("no conversion");
}
return ret;
}
template<>
std::string extractFromString(const std::string& str);
bool startsWith(const std::string& base, const std::string& start);
std::string stripSuffix(const std::string& str, const std::string& suffix);
std::vector<std::string> getTitleVariants(const std::string& title);
} //namespace kiwix
#endif

10
kiwix.pc.in Normal file
View File

@@ -0,0 +1,10 @@
prefix=@prefix@
libdir=${prefix}/lib64
includedir=${prefix}/include
Name: libkiwix
Description: A library that contains a lot of things used by used by other kiwix programs
Version: @version@
Requires: @requires@
Libs: -L${libdir} -lkiwix @extra_libs@
Cflags: -I${includedir}/ @extra_cflags@

View File

@@ -1,61 +1,35 @@
project('libkiwix', 'cpp',
version : '14.1.1',
version : '10.0.0', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
license : 'GPLv3+',
default_options : ['c_std=c11', 'cpp_std=c++17', 'werror=true'])
default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true'])
compiler = meson.get_compiler('cpp')
static_deps = get_option('static-linkage') or get_option('default_library') == 'static'
extra_libs = []
# Atomics as compiled by GCC or clang can lead to external references to
# functions depending on the type size and the platform. LLVM provides them in
# 'libcompiler_rt', which clang normally automatically links in, while GNU
# provides them in 'libatomic', which GCC *does not* link in automatically (but
# this is probably going to change, see
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358). Regardless of the setup
# of the compiler driver itself (GCC or clang), we can thus assume that if some
# atomic references can't be resolved, then 'libatomic' is missing.
atomics_program = '''
#include <atomic>
#include <cstdint>
wrapper = get_option('wrapper')
using namespace std;
int main() {
volatile atomic_bool a_b(true);
volatile atomic_ullong a_ull(-1);
// Next two lines are to cover atomic<socket_t> from 'httplib.h'.
volatile atomic<uint32_t> a_u32(-1);
volatile atomic<uint64_t> a_u64(-1);
return atomic_load(&a_b) == false && atomic_load(&a_ull) == 0 &&
atomic_load(&a_u32) == 0 && atomic_load(&a_u64) == 0;
}
'''
if not compiler.links(atomics_program,
name: 'compiler driver readily supports atomics')
libatomic = compiler.find_library('atomic')
compiler.links(atomics_program, name: 'atomics work with libatomic',
dependencies: libatomic, required: true)
extra_libs += ['-latomic']
static_deps = wrapper.contains('android') or wrapper.contains('java') or get_option('default_library') == 'static'
if wrapper.contains('android')
extra_libs = ['-llog']
else
extra_libs = []
endif
# C++ std::thread is implemented using pthread on Linux by GCC, and on FreeBSD
# for both GCC and LLVM.
if (host_machine.system() == 'linux' and compiler.get_id() == 'gcc') or \
host_machine.system() == 'freebsd'
if wrapper.contains('java')
add_languages('java')
endif
# See https://github.com/kiwix/libkiwix/issues/371
if ['arm', 'mips', 'm68k', 'ppc', 'sh4'].contains(target_machine.cpu_family())
extra_libs += '-latomic'
endif
if (compiler.get_id() == 'gcc' and build_machine.system() == 'linux') or target_machine.system() == 'freebsd'
# C++ std::thread is implemented using pthread on linux by gcc
thread_dep = dependency('threads')
else
thread_dep = dependency('', required:false)
endif
libicu_dep = dependency('icu-i18n', static:static_deps)
if libicu_dep.version().version_compare('>= 76')
libicu_deps = [libicu_dep, dependency('icu-uc', static:static_deps)]
else
libicu_deps = [libicu_dep]
endif
pugixml_dep = dependency('pugixml', static:static_deps)
libcurl_dep = dependency('libcurl', static:static_deps)
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)
@@ -70,53 +44,52 @@ else
error('Cannot found header mustache.hpp')
endif
libzim_dep = dependency('libzim', version:['>=9.4.0', '<10.0.0'], static:static_deps)
if not compiler.has_header_symbol('zim/zim.h', 'LIBZIM_WITH_XAPIAN', dependencies: libzim_dep)
error('Libzim seems to be compiled without Xapian. Xapian support is mandatory.')
libzim_dep = dependency('libzim', version : '>=7.0.0', static:static_deps)
if not compiler.has_header_symbol('zim/zim.h', 'LIBZIM_WITH_XAPIAN')
error('Libzim seems to be compiled without xapian. Xapian support is mandatory.')
endif
extra_cflags = ''
if host_machine.system() == 'windows' and static_deps
if target_machine.system() == 'windows' and static_deps
add_project_arguments('-DCURL_STATICLIB', language : 'cpp')
extra_cflags += '-DCURL_STATICLIB'
endif
if host_machine.system() == 'windows'
if target_machine.system() == 'windows'
add_project_arguments('-DNOMINMAX', language: 'cpp')
extra_libs += ['-liphlpapi']
endif
if build_machine.system() == 'windows'
extra_libs += ['-lshlwapi', '-lwinmm']
endif
# Dependencies as string
all_deps = [thread_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
# Dependencies as array
all_deps += libicu_deps
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
inc = include_directories('include', extra_include)
conf = configuration_data()
conf.set('LIBKIWIX_VERSION', '"@0@"'.format(meson.project_version()))
conf.set('VERSION', '"@0@"'.format(meson.project_version()))
if build_machine.system() == 'windows'
extra_link_args = ['-lshlwapi', '-lwinmm']
else
extra_link_args = []
endif
subdir('include')
subdir('scripts')
subdir('static')
subdir('src')
subdir('test')
if get_option('doc')
subdir('docs')
endif
pkg_mod = import('pkgconfig')
pkg_mod.generate(libraries : [libkiwix] + extra_libs,
version : meson.project_version(),
name : 'libkiwix',
filebase : 'libkiwix',
description : 'A library that contains useful primitives that Kiwix readers have in common',
extra_cflags: extra_cflags)
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl', 'libmicrohttpd', 'xapian-core']
pkg_conf = configuration_data()
pkg_conf.set('prefix', get_option('prefix'))
pkg_conf.set('requires', ' '.join(pkg_requires))
pkg_conf.set('extra_libs', ' '.join(extra_libs))
pkg_conf.set('extra_cflags', extra_cflags)
pkg_conf.set('version', meson.project_version())
configure_file(output : 'kiwix.pc',
configuration : pkg_conf,
input : 'kiwix.pc.in',
install_dir: get_option('libdir')+'/pkgconfig'
)

View File

@@ -1,4 +1,2 @@
option('static-linkage', type : 'boolean', value : false,
description : 'Link statically with the dependencies.')
option('doc', type : 'boolean', value : false,
description : 'Build the documentations.')
option('wrapper', type:'array', choices:['java', 'android'], value:[],
description: 'The wrapper to generate.')

View File

@@ -1,14 +0,0 @@
#!/usr/bin/bash
# Compute 'src' path
SCRIPT_DIR=$(dirname "$0")
REPO_DIR=$(readlink -f "$SCRIPT_DIR"/..)
DIRS="src include"
# Apply formating to all *.cpp and *.h files
cd "$REPO_DIR"
for FILE in $(find $DIRS -name '*.h' -o -name '*.cpp')
do
echo $FILE
clang-format -i -style=file "$FILE"
done

View File

@@ -1,148 +0,0 @@
#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import os.path
import re
import json
def to_identifier(name):
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
if ident[0].isnumeric():
return "_"+ident
return ident
def lang_code(filename):
filename = os.path.basename(filename)
lang = to_identifier(os.path.splitext(filename)[0])
print(filename, '->', lang)
return lang
from string import Template
def expand_cxx_template(t, **kwargs):
return Template(t).substitute(**kwargs)
def cxx_string_literal(s):
# Taking advantage of the fact the JSON string escape rules match
# those of C++
return 'u8' + json.dumps(s)
string_table_cxx_template = '''
const I18nString $TABLE_NAME[] = {
$TABLE_ENTRIES
};
'''
lang_table_entry_cxx_template = '''
{
$LANG_STRING_LITERAL,
ARRAY_ELEMENT_COUNT($STRING_TABLE_NAME),
$STRING_TABLE_NAME
}'''
cxxfile_template = '''// This file is automatically generated. Do not modify it.
#include "server/i18n_utils.h"
namespace kiwix {
namespace i18n {
namespace
{
$STRING_DATA
} // unnamed namespace
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))
extern const I18nStringTable stringTables[] = {
$LANG_TABLE
};
extern const size_t langCount = $LANG_COUNT;
} // namespace i18n
} // namespace kiwix
'''
class Resource:
def __init__(self, filename):
filename = filename.strip()
self.filename = filename
self.lang_code = lang_code(filename)
with open(filename, 'r', encoding='utf-8') as f:
self.data = f.read()
def get_string_table_name(self):
return "string_table_for_" + self.lang_code
def get_string_table(self):
table_entries = ",\n ".join(self.get_string_table_entries())
return expand_cxx_template(string_table_cxx_template,
TABLE_NAME=self.get_string_table_name(),
TABLE_ENTRIES=table_entries)
def get_string_table_entries(self):
d = json.loads(self.data)
for k in sorted(d.keys()):
if k != "@metadata":
key_string = cxx_string_literal(k)
value_string = cxx_string_literal(d[k])
yield '{ ' + key_string + ', ' + value_string + ' }'
def get_lang_table_entry(self):
return expand_cxx_template(lang_table_entry_cxx_template,
LANG_STRING_LITERAL=cxx_string_literal(self.lang_code),
STRING_TABLE_NAME=self.get_string_table_name())
def gen_c_file(resources):
string_data = []
lang_table = []
for r in resources:
string_data.append(r.get_string_table())
lang_table.append(r.get_lang_table_entry())
return expand_cxx_template(cxxfile_template,
STRING_DATA="\n".join(string_data),
LANG_TABLE=",\n ".join(lang_table),
LANG_COUNT=len(resources)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--cxxfile',
required=True,
help='The Cpp file name to generate')
parser.add_argument('i18n_resource_files', nargs='+',
help='The list of resources to compile.')
args = parser.parse_args()
resources = [Resource(filename) for filename in args.i18n_resource_files]
with open(args.cxxfile, 'w') as f:
f.write(gen_c_file(resources))

View File

@@ -1,18 +0,0 @@
.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands"
.SH NAME
kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data
.SH SYNOPSIS
\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_files ...\fR
.SH DESCRIPTION
.TP
i18n_resource_files ...
The list of i18n resources to compile.
.TP
\fB\-h\fR, \fB\-\-help\fR
show a help message and exit
.TP
\fB\-\-cxxfile\fR CXXFILE
The Cpp file name to generate
.TP
.SH AUTHOR
Veloman Yunkan <veloman.yunkan@gmail.com>

View File

@@ -52,60 +52,26 @@ resource_getter_template = """
return RESOURCE::{identifier};
"""
resource_cacheid_getter_template = """
if (name == "{common_name}")
return "{cacheid}";
"""
resource_decl_template = """{namespaces_open}
extern const std::string {identifier};
{namespaces_close}"""
BINARY_RESOURCE_EXTENSIONS = {'.ico', '.png', '.ttf'}
TEXT_RESOURCE_EXTENSIONS = {
'.css',
'.html',
'.js',
'.json',
'.svg',
'.tmpl',
'.webmanifest',
'.xml',
}
if not BINARY_RESOURCE_EXTENSIONS.isdisjoint(TEXT_RESOURCE_EXTENSIONS):
raise RuntimeError(f"The following file type extensions are declared to be both binary and text: {BINARY_RESOURCE_EXTENSIONS.intersection(TEXT_RESOURCE_EXTENSIONS)}")
def is_binary_resource(filename):
_, extension = os.path.splitext(filename)
is_binary = extension in BINARY_RESOURCE_EXTENSIONS
is_text = extension in TEXT_RESOURCE_EXTENSIONS
if not is_binary and not is_text:
# all file type extensions of static resources must be listed
# in either BINARY_RESOURCE_EXTENSIONS or TEXT_RESOURCE_EXTENSIONS
raise RuntimeError(f"Unknown file type extension: {extension}")
return is_binary
class Resource:
def __init__(self, base_dirs, filename, cacheid=None):
filename = filename
def __init__(self, base_dirs, filename):
filename = filename.strip()
self.filename = filename
self.identifier = full_identifier(filename)
self.cacheid = cacheid
found = False
for base_dir in base_dirs:
try:
with open(os.path.join(base_dir, filename), 'rb') as f:
self.data = f.read()
if not is_binary_resource(filename):
self.data = self.data.replace(b"\r\n", b"\n")
found = True
break
except FileNotFoundError:
continue
if not found:
raise Exception("Resource not found: {}".format(filename))
raise Exception("Impossible to found {}".format(filename))
def dump_impl(self):
nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0)
@@ -127,12 +93,6 @@ class Resource:
identifier="::".join(self.identifier)
)
def dump_cacheid_getter(self):
return resource_cacheid_getter_template.format(
common_name=self.filename,
cacheid=self.cacheid
)
def dump_decl(self):
return resource_decl_template.format(
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
@@ -142,7 +102,7 @@ class Resource:
master_c_template = """//This file is automatically generated. Do not modify it.
master_c_template = """//This file is automaically generated. Do not modify it.
#include <stdlib.h>
#include <fstream>
@@ -163,12 +123,7 @@ static std::string init_resource(const char* name, const unsigned char* content,
const std::string& getResource_{basename}(const std::string& name) {{
{RESOURCES_GETTER}
throw ResourceNotFound("Resource not found: " + name);
}}
const char* getResourceCacheId_{basename}(const std::string& name) {{
{RESOURCE_CACHEID_GETTER}
return nullptr;
throw ResourceNotFound("Resource not found.");
}}
{RESOURCES}
@@ -179,7 +134,6 @@ def gen_c_file(resources, basename):
return master_c_template.format(
RESOURCES="\n\n".join(r.dump_impl() for r in resources),
RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources),
RESOURCE_CACHEID_GETTER="\n\n".join(r.dump_cacheid_getter() for r in resources if r.cacheid is not None),
include_file=basename,
basename=to_identifier(basename)
)
@@ -205,10 +159,8 @@ class ResourceNotFound : public std::runtime_error {{
}};
const std::string& getResource_{basename}(const std::string& name);
const char* getResourceCacheId_{basename}(const std::string& name);
#define getResource(a) (getResource_{basename}(a))
#define getResourceCacheId(a) (getResourceCacheId_{basename}(a))
#endif // KIWIX_{BASENAME}
@@ -230,17 +182,15 @@ if __name__ == "__main__":
parser.add_argument('--source_dir',
help="Additional directory where to look for resources.",
action='append')
parser.add_argument('resource_files', nargs='+',
parser.add_argument('resource_file',
help='The list of resources to compile.')
args = parser.parse_args()
base_dir = os.path.dirname(os.path.realpath(args.resource_file))
source_dir = args.source_dir or []
resources = []
for resfile in args.resource_files:
base_dir = os.path.dirname(os.path.realpath(resfile))
with open(resfile, 'r') as f:
resources += [Resource([base_dir]+source_dir, *line.strip().split())
for line in f.readlines()]
with open(args.resource_file, 'r') as f:
resources = [Resource([base_dir]+source_dir, filename)
for filename in f.readlines()]
h_identifier = to_identifier(os.path.basename(args.hfile))
with open(args.hfile, 'w') as f:

View File

@@ -2,7 +2,7 @@
.SH NAME
kiwix-compile-resources \- helper to compile and generate some Kiwix resources
.SH SYNOPSIS
\fBkiwix\-compile\-resources\fR [\-h] [\-\-cxxfile CXXFILE] [\-\-hfile HFILE] resource_file ...\fR
\fBkiwix\-compile\-resources\fR [\-h] [\-\-cxxfile CXXFILE] [\-\-hfile HFILE] resource_file\fR
.SH DESCRIPTION
.TP
resource_file

View File

@@ -1,135 +0,0 @@
#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import hashlib
import os.path
import re
def read_resource_file(resource_file_path):
with open(resource_file_path, 'r') as f:
return [line.strip() for line in f]
def list_resources(resource_file_path):
for resource_path in read_resource_file(resource_file_path):
print(resource_path)
def compute_resource_revision(resource_path):
with open(os.path.join(OUT_DIR, resource_path), 'rb') as f:
return hashlib.sha1(f.read()).hexdigest()[:8]
resource_revisions = {}
def get_resource_revision(res):
if not res in resource_revisions:
preprocess_resource(res)
resource_revisions[res] = compute_resource_revision(res)
return resource_revisions[res]
RESOURCE_WITH_CACHEID_URL_PATTERN=r'(?P<pre>.*/(?P<resource>skin/[^"?]+)\?)KIWIXCACHEID(?P<post>[^"]*)'
def set_cacheid(resource_matchobj):
pre = resource_matchobj.group('pre')
resource = resource_matchobj.group('resource')
post = resource_matchobj.group('post')
cacheid = 'cacheid=' + get_resource_revision(resource)
return pre + cacheid + post
def preprocess_text(s):
if 'KIWIXCACHEID' in s:
s = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, s)
assert not 'KIWIXCACHEID' in s
return s
def get_preprocessed_resource(srcpath):
"""Get the transformed content of a resource
If the resource at srcpath is modified by preprocessing then this function
returns the transformed content of the resource. Otherwise it returns None.
"""
try:
with open(srcpath, 'r') as resource_file:
content = resource_file.read()
preprocessed_content = preprocess_text(content)
return preprocessed_content if preprocessed_content != content else None
except UnicodeDecodeError:
# It was a binary resource
return None
def symlink_resource(src, resource_path):
if os.path.exists(resource_path):
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
return
os.remove(resource_path)
os.symlink(src, resource_path)
def preprocess_resource(resource_path):
print('Preprocessing', resource_path, '...')
resource_dir = os.path.dirname(resource_path)
if resource_dir != '':
os.makedirs(os.path.join(OUT_DIR, resource_dir), exist_ok=True)
srcpath = os.path.join(BASE_DIR, resource_path)
outpath = os.path.join(OUT_DIR, resource_path)
if os.path.exists(outpath):
os.remove(outpath)
preprocessed_content = get_preprocessed_resource(srcpath)
if preprocessed_content is None:
symlink_resource(srcpath, outpath)
else:
with open(outpath, 'w') as target:
print(preprocessed_content, end='', file=target)
def copy_resource_list_file(src_path, dst_path):
with open(src_path, 'r') as src:
with open(dst_path, 'w') as dst:
for line in src:
res = line.strip()
if line.startswith("skin/") and res in resource_revisions:
dst.write(res + " " + resource_revisions[res] + "\n")
else:
dst.write(line)
def preprocess_resources(resource_file_path):
resource_filename = os.path.basename(resource_file_path)
for resource in read_resource_file(resource_file_path):
if resource.startswith('skin/'):
get_resource_revision(resource)
else:
preprocess_resource(resource)
copy_resource_list_file(resource_file_path, os.path.join(OUT_DIR, resource_filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
commands = parser.add_mutually_exclusive_group()
commands.add_argument('--list-all', action='store_true')
commands.add_argument('--preprocess', action='store_true')
parser.add_argument('--outdir')
parser.add_argument('resource_file')
args = parser.parse_args()
BASE_DIR = os.path.dirname(os.path.realpath(args.resource_file))
OUT_DIR = args.outdir
if args.list_all:
list_resources(args.resource_file)
elif args.preprocess:
preprocess_resources(args.resource_file)

View File

@@ -1,13 +1,6 @@
res_manager = find_program('kiwix-resources')
res_compiler = find_program('kiwix-compile-resources')
install_data(res_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-resources.1')
i18n_compiler = find_program('kiwix-compile-i18n')
install_data(i18n_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-i18n.1')

View File

@@ -3,16 +3,13 @@
#include "aria2.h"
#include "xmlrpc.h"
#include <iostream>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <thread>
#include <chrono>
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "downloader.h" // For AriaError
#include <tools/otherTools.h>
#include <tools/pathTools.h>
#include <tools/stringTools.h>
#include <downloader.h> // For AriaError
#ifdef _WIN32
# define ARIA2_CMD "aria2c.exe"
@@ -25,46 +22,25 @@
#define LOG_ARIA_ERROR() \
{ \
std::cerr << "ERROR: aria2 RPC request failed. (" << res << ")." << std::endl; \
std::cerr << (curlErrorBuffer[0] ? curlErrorBuffer : curl_easy_strerror(res)) << std::endl; \
std::cerr << (m_curlErrorBuffer[0] ? m_curlErrorBuffer.get() : curl_easy_strerror(res)) << std::endl; \
}
namespace kiwix {
namespace {
void pauseAnyActiveDownloads(const std::string& ariaSessionFilePath)
{
std::ifstream inputFile(ariaSessionFilePath);
if ( !inputFile )
return;
std::ostringstream ss;
std::string line;
while ( std::getline(inputFile, line) ) {
if ( !startsWith(line, " pause=") ) {
ss << line << "\n";
}
if ( !line.empty() && line[0] != ' ' && line[0] != '#' ) {
ss << " pause=true\n";
}
}
std::ofstream outputFile(ariaSessionFilePath);
outputFile << ss.str();
}
} // unnamed namespace
Aria2::Aria2(std::string sessionFileDir):
Aria2::Aria2():
mp_aria(nullptr),
m_port(42042),
m_secret(getNewRpcSecret())
m_secret("kiwixariarpc"),
m_curlErrorBuffer(new char[CURL_ERROR_SIZE]),
mp_curl(nullptr)
{
m_downloadDir = getDataDirectory();
makeDirectory(m_downloadDir);
std::vector<const char*> callCmd;
std::string rpc_port = "--rpc-listen-port=" + to_string(m_port);
std::string session_file = appendToDirectory(sessionFileDir, "kiwix.session");
pauseAnyActiveDownloads(session_file);
std::string download_dir = "--dir=" + getDataDirectory();
std::string session_file = appendToDirectory(getDataDirectory(), "kiwix.session");
std::string session = "--save-session=" + session_file;
std::string inputFile = "--input-file=" + session_file;
// std::string log_dir = "--log=\"" + logDir + "\"";
@@ -91,7 +67,8 @@ Aria2::Aria2(std::string sessionFileDir):
callCmd.push_back("--enable-rpc");
callCmd.push_back(rpc_secret.c_str());
callCmd.push_back(rpc_port.c_str());
if (fileReadable(session_file)) {
callCmd.push_back(download_dir.c_str());
if (fileExists(session_file)) {
callCmd.push_back(inputFile.c_str());
}
callCmd.push_back(session.c_str());
@@ -112,42 +89,36 @@ Aria2::Aria2(std::string sessionFileDir):
launchCmd.append(cmd).append(" ");
}
mp_aria = Subprocess::run(callCmd);
mp_curl = curl_easy_init();
CURL* p_curl = curl_easy_init();
char curlErrorBuffer[CURL_ERROR_SIZE];
curl_easy_setopt(mp_curl, CURLOPT_URL, "http://localhost/rpc");
curl_easy_setopt(mp_curl, CURLOPT_PORT, m_port);
curl_easy_setopt(mp_curl, CURLOPT_POST, 1L);
curl_easy_setopt(mp_curl, CURLOPT_ERRORBUFFER, m_curlErrorBuffer.get());
curl_easy_setopt(p_curl, CURLOPT_URL, "http://localhost/rpc");
curl_easy_setopt(p_curl, CURLOPT_PORT, m_port);
curl_easy_setopt(p_curl, CURLOPT_POST, 1L);
curl_easy_setopt(p_curl, CURLOPT_ERRORBUFFER, curlErrorBuffer);
curl_easy_setopt(p_curl, CURLOPT_TIMEOUT_MS, 100);
typedef std::chrono::duration<double> Seconds;
const double MAX_WAITING_TIME_SECONDS = 1;
const auto t0 = std::chrono::steady_clock::now();
bool maxWaitingTimeWasExceeded = false;
CURLcode res = CURLE_OK;
while ( !maxWaitingTimeWasExceeded ) {
int watchdog = 50;
while(--watchdog) {
sleep(10);
curlErrorBuffer[0] = 0;
res = curl_easy_perform(p_curl);
m_curlErrorBuffer[0] = 0;
auto res = curl_easy_perform(mp_curl);
if (res == CURLE_OK) {
break;
} else if (watchdog == 1) {
LOG_ARIA_ERROR();
}
const auto dt = std::chrono::steady_clock::now() - t0;
const double elapsedTime = std::chrono::duration_cast<Seconds>(dt).count();
maxWaitingTimeWasExceeded = elapsedTime > MAX_WAITING_TIME_SECONDS;
}
curl_easy_cleanup(p_curl);
if ( maxWaitingTimeWasExceeded ) {
LOG_ARIA_ERROR();
if (!watchdog) {
curl_easy_cleanup(mp_curl);
throw std::runtime_error("Cannot connect to aria2c rpc. Aria2c launch cmd : " + launchCmd);
}
}
Aria2::~Aria2()
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_cleanup(mp_curl);
}
void Aria2::close()
{
saveSession();
@@ -167,25 +138,20 @@ std::string Aria2::doRequest(const MethodCall& methodCall)
std::stringstream outStream;
CURLcode res;
long response_code;
char curlErrorBuffer[CURL_ERROR_SIZE];
CURL* p_curl = curl_easy_init();
curl_easy_setopt(p_curl, CURLOPT_URL, "http://localhost/rpc");
curl_easy_setopt(p_curl, CURLOPT_PORT, m_port);
curl_easy_setopt(p_curl, CURLOPT_POST, 1L);
curl_easy_setopt(p_curl, CURLOPT_ERRORBUFFER, curlErrorBuffer);
curl_easy_setopt(p_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(p_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(p_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(p_curl, CURLOPT_WRITEDATA, &outStream);
curlErrorBuffer[0] = 0;
res = curl_easy_perform(p_curl);
if (res != CURLE_OK) {
LOG_ARIA_ERROR();
curl_easy_cleanup(p_curl);
throw std::runtime_error("Cannot perform request");
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &outStream);
m_curlErrorBuffer[0] = 0;
res = curl_easy_perform(mp_curl);
if (res != CURLE_OK) {
LOG_ARIA_ERROR();
throw std::runtime_error("Cannot perform request");
}
curl_easy_getinfo(mp_curl, CURLINFO_RESPONSE_CODE, &response_code);
}
curl_easy_getinfo(p_curl, CURLINFO_RESPONSE_CODE, &response_code);
curl_easy_cleanup(p_curl);
auto responseContent = outStream.str();
if (response_code != 200) {
@@ -229,13 +195,6 @@ std::string Aria2::tellStatus(const std::string& gid, const std::vector<std::str
return doRequest(methodCall);
}
std::string Aria2::getNewRpcSecret()
{
std::string uuid = gen_uuid("");
uuid.erase(std::remove(uuid.begin(), uuid.end(), '-'));
return uuid.substr(0, 9);
}
std::vector<std::string> Aria2::tellActive()
{
MethodCall methodCall("aria2.tellActive", m_secret);

View File

@@ -12,6 +12,7 @@
#include "xmlrpc.h"
#include <memory>
#include <mutex>
#include <curl/curl.h>
namespace kiwix {
@@ -22,16 +23,20 @@ class Aria2
std::unique_ptr<Subprocess> mp_aria;
int m_port;
std::string m_secret;
std::string m_downloadDir;
std::unique_ptr<char[]> m_curlErrorBuffer;
CURL* mp_curl;
std::mutex m_lock;
std::string doRequest(const MethodCall& methodCall);
public:
explicit Aria2(std::string sessionFileDir);
virtual ~Aria2() = default;
Aria2();
virtual ~Aria2();
void close();
std::string addUri(const std::vector<std::string>& uri, const std::vector<std::pair<std::string, std::string>>& options = {});
std::string tellStatus(const std::string& gid, const std::vector<std::string>& statusKey);
static std::string getNewRpcSecret();
std::vector<std::string> tellActive();
std::vector<std::string> tellWaiting();
void saveSession();

View File

@@ -18,18 +18,13 @@
*/
#include "book.h"
#include "reader.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/networkTools.h"
#include "tools/otherTools.h"
#include "tools/stringTools.h"
#include "tools/pathTools.h"
#include "tools/archiveTools.h"
#include <zim/archive.h>
#include <zim/item.h>
#include <pugixml.hpp>
namespace kiwix
@@ -40,17 +35,11 @@ Book::Book() :
m_readOnly(false)
{
}
/* Destructor */
Book::~Book()
{
}
Book::Illustrations Book::getIllustrations() const
{
return m_illustrations;
}
bool Book::update(const kiwix::Book& other)
{
if (m_readOnly)
@@ -59,39 +48,55 @@ bool Book::update(const kiwix::Book& other)
if (m_id != other.m_id)
return false;
*this = other;
m_readOnly = other.m_readOnly;
m_path = other.m_path;
m_pathValid = other.m_pathValid;
m_title = other.m_title;
m_description = other.m_description;
m_language = other.m_language;
m_creator = other.m_creator;
m_publisher = other.m_publisher;
m_date = other.m_date;
m_url = other.m_url;
m_name = other.m_name;
m_flavour = other.m_flavour;
m_tags = other.m_tags;
m_category = other.m_category;
m_origId = other.m_origId;
m_articleCount = other.m_articleCount;
m_mediaCount = other.m_mediaCount;
m_size = other.m_size;
m_favicon = other.m_favicon;
m_faviconMimeType = other.m_faviconMimeType;
m_faviconUrl = other.m_faviconUrl;
m_downloadId = other.m_downloadId;
return true;
}
void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename();
void Book::update(const kiwix::Reader& reader)
{
m_path = reader.getZimFilePath();
m_pathValid = true;
m_id = std::string(archive.getUuid());
m_title = getArchiveTitle(archive);
m_description = getMetaDescription(archive);
m_language = getMetaLanguage(archive);
m_creator = getMetaCreator(archive);
m_publisher = getMetaPublisher(archive);
m_date = getMetaDate(archive);
m_name = getMetaName(archive);
m_flavour = getMetaFlavour(archive);
m_tags = getMetaTags(archive);
m_id = reader.getId();
m_title = reader.getTitle();
m_description = reader.getDescription();
m_language = reader.getLanguage();
m_creator = reader.getCreator();
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_category = getCategoryFromTags();
m_articleCount = archive.getArticleCount();
m_mediaCount = archive.getMediaCount();
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
m_mediaCount = reader.getMediaCount();
m_size = static_cast<uint64_t>(reader.getFileSize()) << 10;
m_pathValid = true;
m_illustrations.clear();
for ( const auto& illustrationInfo : archive.getIllustrationInfos() ) {
const auto illustration = std::make_shared<Illustration>();
const zim::Item illustrationItem = archive.getIllustrationItem(illustrationInfo);
illustration->width = illustrationInfo.width;
illustration->height = illustrationInfo.height;
illustration->mimeType = illustrationItem.getMimetype();
illustration->data = illustrationItem.getData();
// NOTE: illustration->url is left uninitialized
m_illustrations.push_back(illustration);
}
reader.getFavicon(m_favicon, m_faviconMimeType);
}
#define ATTR(name) node.attribute(name).value()
@@ -103,7 +108,7 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
path = computeAbsolutePath(baseDir, path);
}
m_path = path;
m_pathValid = fileReadable(path);
m_pathValid = fileExists(path);
m_title = ATTR("title");
m_description = ATTR("description");
m_language = ATTR("language");
@@ -118,15 +123,9 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
m_articleCount = strtoull(ATTR("articleCount"), 0, 0);
m_mediaCount = strtoull(ATTR("mediaCount"), 0, 0);
m_size = strtoull(ATTR("size"), 0, 0) << 10;
const std::string faviconMimeType = ATTR("faviconMimeType");
const std::string faviconBase64EncodedData = ATTR("favicon");
if ( !faviconMimeType.empty() && !faviconBase64EncodedData.empty() ) {
const auto favicon = std::make_shared<Illustration>();
favicon->data = base64_decode(faviconBase64EncodedData);
favicon->mimeType = faviconMimeType;
favicon->url = ATTR("faviconUrl");
m_illustrations.assign(1, favicon);
}
m_favicon = base64_decode(ATTR("favicon"));
m_faviconMimeType = ATTR("faviconMimeType");
m_faviconUrl = ATTR("faviconUrl");
try {
m_downloadId = ATTR("downloadId");
} catch(...) {}
@@ -157,9 +156,7 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_language = VALUE("language");
m_creator = node.child("author").child("name").child_value();
m_publisher = node.child("publisher").child("name").child_value();
const std::string dcIssuedDate = VALUE("dc:issued");
m_date = dcIssuedDate.empty() ? VALUE("updated") : dcIssuedDate;
m_date = fromOpdsDate(m_date);
m_date = fromOpdsDate(VALUE("updated"));
m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags");
@@ -176,11 +173,8 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_size = strtoull(linkNode.attribute("length").value(), 0, 0);
}
if (rel == "http://opds-spec.org/image/thumbnail") {
const auto favicon = std::make_shared<Illustration>();
favicon->data.clear();
favicon->url = urlHost + linkNode.attribute("href").value();
favicon->mimeType = linkNode.attribute("type").value();
m_illustrations.assign(1, favicon);
m_faviconUrl = urlHost + linkNode.attribute("href").value();
m_faviconMimeType = linkNode.attribute("type").value();
}
}
@@ -191,7 +185,7 @@ std::string Book::getHumanReadableIdFromPath() const
{
std::string id = m_path;
if (!id.empty()) {
id = kiwix::removeAccents(id);
kiwix::removeAccents(id);
#ifdef _WIN32
id = replaceRegex(id, "", "^.*\\\\");
@@ -213,54 +207,15 @@ void Book::setPath(const std::string& path)
: path;
}
const Book::Illustration Book::missingDefaultIllustration;
std::shared_ptr<const Book::Illustration> Book::getIllustration(unsigned int size) const
{
for ( const auto& ilPtr : m_illustrations ) {
if (ilPtr->width == size && ilPtr->height == size) {
return ilPtr;
}
}
throw std::runtime_error("Cannot find illustration");
}
const Book::Illustration& Book::getDefaultIllustration() const
{
try {
return *getIllustration(48);
} catch (...) {
return missingDefaultIllustration;
}
}
const std::string& Book::Illustration::getData() const
{
if (data.empty() && !url.empty()) {
const std::lock_guard<std::mutex> l(mutex);
if ( data.empty() ) {
try {
data = download(url);
} catch(...) {
std::cerr << "Cannot download favicon from " << url;
}
}
}
return data;
}
const std::string& Book::getFavicon() const {
return getDefaultIllustration().getData();
}
const std::string& Book::getFaviconUrl() const
{
return getDefaultIllustration().url;
}
const std::string& Book::getFaviconMimeType() const
{
return getDefaultIllustration().mimeType;
if (m_favicon.empty() && !m_faviconUrl.empty()) {
try {
m_favicon = download(m_faviconUrl);
} catch(...) {
std::cerr << "Cannot download favicon from " << m_faviconUrl;
}
}
return m_favicon;
}
std::string Book::getTagStr(const std::string& tagName) const {
@@ -288,9 +243,4 @@ std::string Book::getCategoryFromTags() const
}
}
const std::vector<std::string> Book::getLanguages() const
{
return kiwix::split(m_language, ",");
}
}

View File

@@ -18,7 +18,6 @@
*/
#include "bookmark.h"
#include "book.h"
#include <pugixml.hpp>
@@ -29,17 +28,6 @@ Bookmark::Bookmark()
{
}
Bookmark::Bookmark(const Book& book, const std::string& path, const std::string& title):
m_bookId(book.getId()),
m_bookTitle(book.getTitle()),
m_bookName(book.getName()),
m_bookFlavour(book.getFlavour()),
m_url(path),
m_title(title),
m_language(book.getCommaSeparatedLanguages()),
m_date(book.getDate())
{}
/* Destructor */
Bookmark::~Bookmark()
{
@@ -50,8 +38,6 @@ void Bookmark::updateFromXml(const pugi::xml_node& node)
auto bookNode = node.child("book");
m_bookId = bookNode.child("id").child_value();
m_bookTitle = bookNode.child("title").child_value();
m_bookName = bookNode.child("name").child_value();
m_bookFlavour = bookNode.child("flavour").child_value();
m_language = bookNode.child("language").child_value();
m_date = bookNode.child("date").child_value();
m_title = node.child("title").child_value();

View File

@@ -1,3 +1,3 @@
#mesondefine LIBKIWIX_VERSION
#mesondefine VERSION

View File

@@ -18,7 +18,6 @@
*/
#include "downloader.h"
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
@@ -125,50 +124,38 @@ void Download::cancelDownload()
}
/* Constructor */
Downloader::Downloader(std::string sessionFileDir) :
mp_aria(new Aria2(sessionFileDir))
Downloader::Downloader() :
mp_aria(new Aria2())
{
try {
for (auto gid : mp_aria->tellWaiting()) {
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads[gid]->updateStatus(false);
}
} catch (std::exception& e) {
std::cerr << "aria2 tellWaiting failed : " << e.what() << std::endl;
}
try {
for (auto gid : mp_aria->tellActive()) {
if( m_knownDownloads.find(gid) == m_knownDownloads.end()) {
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads[gid]->updateStatus(false);
}
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellActive failed : " << e.what() << std::endl;
}
try {
for (auto gid : mp_aria->tellWaiting()) {
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellWaiting failed : " << e.what() << std::endl;
}
}
/* Destructor */
Downloader::~Downloader()
{
close();
}
void Downloader::close()
{
if ( mp_aria ) {
try {
mp_aria->close();
} catch (const std::exception& err) {
std::cerr << "ERROR: Failed to save the downloader state: "
<< err.what() << std::endl;
}
mp_aria.reset();
}
mp_aria->close();
}
std::vector<std::string> Downloader::getDownloadIds() const {
std::unique_lock<std::mutex> lock(m_lock);
std::vector<std::string> Downloader::getDownloadIds() {
std::vector<std::string> ret;
for(auto& p:m_knownDownloads) {
ret.push_back(p.first);
@@ -176,82 +163,42 @@ std::vector<std::string> Downloader::getDownloadIds() const {
return ret;
}
namespace
Download* Downloader::startDownload(const std::string& uri, const std::vector<std::pair<std::string, std::string>>& options)
{
bool downloadCanBeReused(const Download& d,
const std::string& uri,
const Downloader::Options& /*options*/)
{
const auto& uris = d.getUris();
const bool sameURI = std::find(uris.begin(), uris.end(), uri) != uris.end();
if ( !sameURI )
return false;
switch ( d.getStatus() ) {
case Download::K_ERROR:
case Download::K_UNKNOWN:
case Download::K_REMOVED:
return false;
case Download::K_ACTIVE:
case Download::K_WAITING:
case Download::K_PAUSED:
return true; // XXX: what if options are different?
case Download::K_COMPLETE:
return fileExists(d.getPath()); // XXX: what if options are different?
}
return false;
}
} // unnamed namespace
std::shared_ptr<Download> Downloader::startDownload(const std::string& uri, const std::string& downloadDir, Options options)
{
std::unique_lock<std::mutex> lock(m_lock);
options.erase(std::remove_if(options.begin(), options.end(), [](const auto& option) {
return option.first == "dir";
}), options.end());
options.push_back({"dir", downloadDir});
for (auto& p: m_knownDownloads) {
auto& d = p.second;
if ( downloadCanBeReused(*d, uri, options) )
return d;
auto& uris = d->getUris();
if (std::find(uris.begin(), uris.end(), uri) != uris.end())
return d.get();
}
std::vector<std::string> uris = {uri};
auto gid = mp_aria->addUri(uris, options);
m_knownDownloads[gid] = std::make_shared<Download>(mp_aria, gid);
return m_knownDownloads[gid];
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
return m_knownDownloads[gid].get();
}
std::shared_ptr<Download> Downloader::getDownload(const std::string& did)
Download* Downloader::getDownload(const std::string& did)
{
std::unique_lock<std::mutex> lock(m_lock);
try {
return m_knownDownloads.at(did);
m_knownDownloads.at(did).get()->updateStatus(true);
return m_knownDownloads.at(did).get();
} catch(std::exception& e) {
for (auto gid : mp_aria->tellWaiting()) {
if (gid == did) {
m_knownDownloads[gid] = std::make_shared<Download>(mp_aria, gid);
return m_knownDownloads[gid];
}
}
for (auto gid : mp_aria->tellActive()) {
if (gid == did) {
m_knownDownloads[gid] = std::make_shared<Download>(mp_aria, gid);
return m_knownDownloads[gid];
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads.at(gid).get()->updateStatus(true);
return m_knownDownloads[gid].get();
}
}
for (auto gid : mp_aria->tellWaiting()) {
if (gid == did) {
m_knownDownloads[gid] = std::unique_ptr<Download>(new Download(mp_aria, gid));
m_knownDownloads.at(gid).get()->updateStatus(true);
return m_knownDownloads[gid].get();
}
}
throw e;
}
}
size_t Downloader::getNbDownload() const {
std::unique_lock<std::mutex> lock(m_lock);
return m_knownDownloads.size();
}
}

73
src/entry.cpp Normal file
View File

@@ -0,0 +1,73 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
namespace kiwix
{
Entry::Entry(zim::Entry entry)
: entry(entry)
{
}
size_type Entry::getSize() const
{
if (entry.isRedirect()) {
return 0;
} else {
return entry.getItem().getSize();
}
}
std::string Entry::getMimetype() const
{
return entry.getItem(true).getMimetype();
}
bool Entry::isRedirect() const
{
return entry.isRedirect();
}
Entry Entry::getRedirectEntry() const
{
if ( !entry.isRedirect() ) {
throw NoEntry();
}
return entry.getRedirectEntry();
}
Entry Entry::getFinalEntry() const
{
int loopCounter = 42;
auto final_entry = entry;
while (final_entry.isRedirect() && loopCounter--) {
final_entry = final_entry.getRedirectEntry();
}
// Prevent infinite loops.
if (final_entry.isRedirect()) {
throw NoEntry();
}
return final_entry;
}
}

View File

@@ -1,144 +0,0 @@
#include "html_dumper.h"
#include "libkiwix-resources.h"
#include "tools/otherTools.h"
#include "tools.h"
#include "tools/regexTools.h"
#include "server/i18n_utils.h"
namespace kiwix
{
/* Constructor */
HTMLDumper::HTMLDumper(const Library* library, const NameMapper* nameMapper)
: LibraryDumper(library, nameMapper)
{
}
/* Destructor */
HTMLDumper::~HTMLDumper()
{
}
namespace {
std::string humanFriendlyTitle(std::string title)
{
std::string humanFriendlyString = replaceRegex(title, "_", " ");
humanFriendlyString[0] = toupper(humanFriendlyString[0]);
return humanFriendlyString;
}
kainjow::mustache::object getLangTag(const std::vector<std::string>& bookLanguages) {
std::string langShortString = "";
std::string langFullString = "???";
//if more than 1 languages then show "mul" else show the language
if(bookLanguages.size() > 1) {
std::vector<std::string> mulLanguages;
langShortString = "mul";
for (const auto& lang : bookLanguages) {
const std::string fullLang = getLanguageSelfName(lang);
mulLanguages.push_back(fullLang);
}
langFullString = kiwix::join(mulLanguages, ",");
} else if(bookLanguages.size() == 1) {
langShortString = bookLanguages[0];
langFullString = getLanguageSelfName(langShortString);
}
kainjow::mustache::object langTag;
langTag["langShortString"] = langShortString;
langTag["langFullString"] = langFullString;
return langTag;
}
kainjow::mustache::list getTagList(std::string tags)
{
const auto tagsList = kiwix::split(tags, ";", true, false);
kainjow::mustache::list finalTagList;
for (auto tag : tagsList) {
if (tag[0] != '_')
finalTagList.push_back(kainjow::mustache::object{
{"tag", tag}
});
}
return finalTagList;
}
} // unnamed namespace
std::string HTMLDumper::dumpPlainHTML(kiwix::Filter filter) const
{
kainjow::mustache::list booksData;
const auto filteredBooks = library->filter(filter);
const auto searchQuery = filter.getQuery();
auto languages = getLanguageData();
auto categories = getCategoryData();
for (auto &category : categories) {
const auto categoryName = category.get("name")->string_value();
if (categoryName == filter.getCategory()) {
category["selected"] = true;
}
category["hf_name"] = humanFriendlyTitle(categoryName);
}
for (auto &language : languages) {
if (language.get("lang_code")->string_value() == filter.getLang()) {
language["selected"] = true;
}
}
for ( const auto& bookId : filteredBooks ) {
const auto bookObj = library->getBookById(bookId);
const auto bookTitle = bookObj.getTitle();
std::string contentId = "";
try {
contentId = urlEncode(nameMapper->getNameForId(bookId));
} catch (...) {}
const auto bookDescription = bookObj.getDescription();
const auto bookIconUrl = rootLocation + "/catalog/v2/illustration/" + bookId + "/?size=48";
const auto tags = bookObj.getTags();
const auto downloadAvailable = (bookObj.getUrl() != "");
const auto langTagObj = getLangTag(bookObj.getLanguages());
std::string faviconAttr = "style=background-image:url(" + bookIconUrl + ")";
booksData.push_back(kainjow::mustache::object{
{"id", contentId},
{"title", bookTitle},
{"description", bookDescription},
{"langTag", langTagObj},
{"faviconAttr", faviconAttr},
{"tagList", getTagList(tags)},
{"downloadAvailable", downloadAvailable}
});
}
auto getTranslation = i18n::GetTranslatedStringWithMsgId(m_userLang);
const auto translations = kainjow::mustache::object{
getTranslation("search"),
getTranslation("download"),
getTranslation("count-of-matching-books", {{"COUNT", to_string(filteredBooks.size())}}),
getTranslation("book-filtering-all-categories"),
getTranslation("book-filtering-all-languages"),
getTranslation("powered-by-kiwix-html"),
getTranslation("welcome-to-kiwix-server"),
getTranslation("preview-book"),
getTranslation("welcome-page-overzealous-filter", {{"URL", "?lang="}})
};
return render_template(
RESOURCE::templates::no_js_library_page_html,
kainjow::mustache::object{
{"root", rootLocation},
{"contentAccessUrl", onlyAsNonEmptyMustacheValue(contentAccessUrl)},
{"books", booksData },
{"searchQuery", searchQuery},
{"languages", languages},
{"categories", categories},
{"noResults", filteredBooks.size() == 0},
{"translations", translations}
}
);
}
} // namespace kiwix

View File

@@ -9,7 +9,6 @@
# include <unistd.h>
#endif
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"

View File

@@ -19,28 +19,23 @@
#include "library.h"
#include "book.h"
#include "reader.h"
#include "libxml_dumper.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/concurrent_cache.h"
#include <pugixml.hpp>
#include <algorithm>
#include <set>
#include <cmath>
#include <unicode/locid.h>
#include <xapian.h>
namespace kiwix
{
namespace
{
@@ -53,101 +48,51 @@ std::string normalizeText(const std::string& text)
return removeAccents(text);
}
bool booksReferToTheSameArchive(const Book& book1, const Book& book2)
{
return book1.isPathValid()
&& book2.isPathValid()
&& book1.getPath() == book2.getPath();
}
} // unnamed namespace
template<typename Key, typename Value>
class MultiKeyCache: public ConcurrentCache<std::set<Key>, Value>
class Library::BookDB : public Xapian::WritableDatabase
{
public:
explicit MultiKeyCache(size_t maxEntries)
: ConcurrentCache<std::set<Key>, Value>(maxEntries)
{}
bool drop(const Key& key)
{
std::unique_lock<std::mutex> l(this->lock_);
bool removed = false;
for(auto& cache_key: this->impl_.keys()) {
if(cache_key.find(key)!=cache_key.end()) {
removed |= this->impl_.drop(cache_key);
}
}
return removed;
}
public:
BookDB() : Xapian::WritableDatabase("", Xapian::DB_BACKEND_INMEMORY) {}
};
unsigned int
Library::getBookCount_not_protected(const bool localBooks, const bool remoteBooks) const
{
unsigned int result = 0;
for (auto& pair: m_books) {
auto& book = pair.second;
if ((!book.getPath().empty() && localBooks)
|| (!book.getUrl().empty() && remoteBooks)) {
result++;
}
}
return result;
}
/* Constructor */
Library::Library()
: mp_archiveCache(new ArchiveCache(std::max(getEnvVar<int>("KIWIX_ARCHIVE_CACHE_SIZE", 1), 1))),
mp_searcherCache(new SearcherCache(std::max(getEnvVar<int>("KIWIX_SEARCHER_CACHE_SIZE", 1), 1))),
m_bookDB(new Xapian::WritableDatabase("", Xapian::DB_BACKEND_INMEMORY))
: m_bookDB(new BookDB)
{
}
Library::Library(Library&& ) = default;
Library& Library::operator=(Library&& ) = default;
/* Destructor */
Library::~Library() = default;
Library::~Library()
{
}
bool Library::addBook(const Book& book)
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
++m_revision;
/* Try to find it */
updateBookDB(book);
try {
auto& oldbook = m_books.at(book.getId());
if ( ! booksReferToTheSameArchive(oldbook, book) ) {
dropCache(book.getId());
}
oldbook.update(book); // XXX: This may have no effect if oldbook is readonly
// XXX: Then m_bookDB will become out-of-sync with
// XXX: the real contents of the library.
oldbook.lastUpdatedRevision = m_revision;
oldbook.update(book);
return false;
} catch (std::out_of_range&) {
auto& newEntry = m_books[book.getId()];
static_cast<Book&>(newEntry) = book;
newEntry.lastUpdatedRevision = m_revision;
size_t new_cache_size = static_cast<size_t>(std::ceil(getBookCount_not_protected(true, true)*0.1));
if (getEnvVar<int>("KIWIX_ARCHIVE_CACHE_SIZE", -1) <= 0) {
mp_archiveCache->setMaxSize(new_cache_size);
}
if (getEnvVar<int>("KIWIX_SEARCHER_CACHE_SIZE", -1) <= 0) {
mp_searcherCache->setMaxSize(new_cache_size);
}
m_books[book.getId()] = book;
return true;
}
}
void Library::addBookmark(const Bookmark& bookmark)
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
m_bookmarks.push_back(bookmark);
}
bool Library::removeBookmark(const std::string& zimId, const std::string& url)
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
for(auto it=m_bookmarks.begin(); it!=m_bookmarks.end(); it++) {
if (it->getBookId() == zimId && it->getUrl() == url) {
m_bookmarks.erase(it);
@@ -157,227 +102,28 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url)
return false;
}
std::tuple<int, int> Library::migrateBookmarks(MigrationMode migrationMode) {
std::set<std::string> sourceBooks;
int invalidBookmarks = 0;
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
for(auto& bookmark:m_bookmarks) {
if (m_books.find(bookmark.getBookId()) == m_books.end()) {
invalidBookmarks += 1;
sourceBooks.insert(bookmark.getBookId());
}
}
}
int changed = 0;
for(auto& sourceBook:sourceBooks) {
changed += migrateBookmarks(sourceBook, migrationMode);
}
return std::make_tuple(changed, invalidBookmarks);
}
std::string Library::getBestFromBookCollection(BookIdCollection books, const Bookmark& bookmark, MigrationMode migrationMode) const {
// This function try to get the best book for a bookmark from a book collection.
// It assumes that all books in the collection are "acceptable".
// (this definiton is not clear but for now it is book's name is equal to bookmark's bookName)
//
// The algorithm first sort the colletion by "flavour equality" and date.
// "flavour equality" is if book's flavour is same that bookmark's flavour (let's say "flavourA" here)
// So we have the sorted collection:
// - flavourA, date 5
// - flavourA, date 4
// - flavourB, date 6
// - flavourC, date 5
// - flavourB, date 3
//
// Then, depending of migrationMode:
// - If ALLOW_DOWNGRADE => take the first one
// - If UPGRADE_ONLY => loop on books until we find a book newer than bookmark.
// So if bookmark date is 5 => flavourB, date 6
// if bookmark date is 4 => flavourA, date 5
// if bookmark date is 7 => No book
if (books.empty()) {
return "";
}
sort(books, DATE, false);
stable_sort(books.begin(), books.end(), [&](const std::string& bookId1, const std::string& bookId2) {
const auto& book1 = getBookById(bookId1);
const auto& book2 = getBookById(bookId2);
bool same_flavour1 = book1.getFlavour() == bookmark.getBookFlavour();
bool same_flavour2 = book2.getFlavour() == bookmark.getBookFlavour();
// return True if bookId1 is before bookId2, ie if same_flavour1 and not same_flavour2
return same_flavour1 > same_flavour2;
});
if (migrationMode == ALLOW_DOWNGRADE) {
return books[0];
} else {
for (const auto& bookId: books) {
const auto& book = getBookById(bookId);
if (book.getDate() >= bookmark.getDate()) {
return bookId;
}
}
}
return "";
}
std::string remove_quote(std::string input) {
std::replace(input.begin(), input.end(), '"', ' ');
return input;
}
std::string Library::getBestTargetBookId(const std::string& bookName, const std::string& preferedFlavour, const std::string& minDate) const {
// Let's reuse our algorithm based on bookmark.
MigrationMode migrationMode = UPGRADE_ONLY;
auto bookmark = Bookmark();
bookmark.setBookName(bookName);
bookmark.setBookFlavour(preferedFlavour);
if (minDate.empty()) {
migrationMode = ALLOW_DOWNGRADE;
} else {
bookmark.setDate(minDate);
}
return getBestTargetBookId(bookmark, migrationMode);
}
std::string Library::getBestTargetBookId(const Bookmark& bookmark, MigrationMode migrationMode) const {
std::lock_guard<std::recursive_mutex> lock(m_mutex);
// Search for a existing book with the same name
auto book_filter = Filter();
if (!bookmark.getBookName().empty()) {
book_filter.name(bookmark.getBookName());
} else {
// We don't have a name stored (older bookmarks)
// Fallback on title (All bookmarks should have one, but let's be safe against wrongly filled bookmark)
if (bookmark.getBookTitle().empty()) {
// No bookName nor bookTitle, no way to find target book.
return "";
}
book_filter.query("title:\"" + remove_quote(bookmark.getBookTitle()) + "\"");
}
auto targetBooks = filter(book_filter);
auto bestBook = getBestFromBookCollection(targetBooks, bookmark, migrationMode);
if (bestBook.empty()) {
try {
getBookById(bookmark.getBookId());
return bookmark.getBookId();
} catch (std::out_of_range&) {}
}
return bestBook;
}
int Library::migrateBookmarks(const std::string& sourceBookId, MigrationMode migrationMode) {
std::lock_guard<std::recursive_mutex> lock(m_mutex);
Bookmark firstBookmarkToChange;
for(auto& bookmark:m_bookmarks) {
if (bookmark.getBookId() == sourceBookId) {
firstBookmarkToChange = bookmark;
break;
}
}
if (firstBookmarkToChange.getBookId().empty()) {
return 0;
}
std::string betterBook = getBestTargetBookId(firstBookmarkToChange, migrationMode);
if (betterBook.empty()) {
return 0;
}
return migrateBookmarks(sourceBookId, betterBook);
}
int Library::migrateBookmarks(const std::string& sourceBookId, const std::string& targetBookId) {
if (sourceBookId == targetBookId) {
return 0;
}
int changed = 0;
for (auto& bookmark:m_bookmarks) {
if (bookmark.getBookId() == sourceBookId) {
bookmark.setBookId(targetBookId);
changed +=1;
}
}
return changed;
}
void Library::dropCache(const std::string& id)
{
mp_archiveCache->drop(id);
mp_searcherCache->drop(id);
}
bool Library::removeBookById(const std::string& id)
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
m_bookDB->delete_document("Q" + id);
dropCache(id);
// We do not change the cache size here
// Most of the time, the book is remove in case of library refresh, it is
// often associated with addBook calls (which will properly set the cache size)
// Having a too big cache is not a problem here (or it would have been before)
// (And setMaxSize doesn't actually reduce the cache size, extra cached items
// will be removed in put or getOrPut).
const bool bookWasRemoved = m_books.erase(id) == 1;
if ( bookWasRemoved ) {
++m_revision;
}
return bookWasRemoved;
}
Library::Revision Library::getRevision() const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
return m_revision;
}
uint32_t Library::removeBooksNotUpdatedSince(Revision libraryRevision)
{
BookIdCollection booksToRemove;
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
for ( const auto& entry : m_books) {
if ( entry.second.lastUpdatedRevision <= libraryRevision ) {
booksToRemove.push_back(entry.first);
}
}
}
uint32_t countOfRemovedBooks = 0;
for ( const auto& id : booksToRemove ) {
if ( removeBookById(id) )
++countOfRemovedBooks;
}
return countOfRemovedBooks;
m_readers.erase(id);
return m_books.erase(id) == 1;
}
const Book& Library::getBookById(const std::string& id) const
{
// XXX: Doesn't make sense to lock this operation since it cannot
// XXX: guarantee thread-safety because of its return type
return m_books.at(id);
}
Book Library::getBookByIdThreadSafe(const std::string& id) const
Book& Library::getBookById(const std::string& id)
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
return getBookById(id);
const Library& const_self = *this;
return const_cast<Book&>(const_self.getBookById(id));
}
const Book& Library::getBookByPath(const std::string& path) const
{
// XXX: Doesn't make sense to lock this operation since it cannot
// XXX: guarantee thread-safety because of its return type
for(auto& it: m_books) {
auto& book = it.second;
if (book.getPath() == path)
@@ -388,124 +134,75 @@ const Book& Library::getBookByPath(const std::string& path) const
throw std::out_of_range(ss.str());
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
Book& Library::getBookByPath(const std::string& path)
{
try {
return mp_archiveCache->getOrPut(id,
[&](){
auto book = getBookById(id);
if (!book.isPathValid()) {
throw std::invalid_argument("");
}
return std::make_shared<zim::Archive>(book.getPath());
});
} catch (std::invalid_argument&) {
return nullptr;
}
const Library& const_self = *this;
return const_cast<Book&>(const_self.getBookByPath(path));
}
std::shared_ptr<ZimSearcher> Library::getSearcherByIds(const BookIdSet& ids)
std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
{
assert(!ids.empty());
try {
return mp_searcherCache->getOrPut(ids,
[&](){
std::vector<zim::Archive> archives;
for(auto& id:ids) {
auto archive = getArchiveById(id);
if(!archive) {
throw std::invalid_argument("");
}
archives.push_back(*archive);
}
return std::make_shared<ZimSearcher>(zim::Searcher(archives));
});
} catch (std::invalid_argument&) {
return m_readers.at(id);
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
}
auto sptr = make_shared<Reader>(book.getPath());
m_readers[id] = sptr;
return sptr;
}
unsigned int Library::getBookCount(const bool localBooks,
const bool remoteBooks) const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
return getBookCount_not_protected(localBooks, remoteBooks);
unsigned int result = 0;
for (auto& pair: m_books) {
auto& book = pair.second;
if ((!book.getPath().empty() && localBooks)
|| (book.getPath().empty() && remoteBooks)) {
result++;
}
}
return result;
}
bool Library::writeToFile(const std::string& path) const
{
const auto allBookIds = getBooksIds();
auto baseDir = removeLastPathElement(path);
LibXMLDumper dumper(this);
dumper.setBaseDir(baseDir);
std::string xml;
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
xml = dumper.dumpLibXMLContent(allBookIds);
};
return writeTextFile(path, xml);
return writeTextFile(path, dumper.dumpLibXMLContent(getBooksIds()));
}
bool Library::writeBookmarksToFile(const std::string& path) const
{
LibXMLDumper dumper(this);
// NOTE: LibXMLDumper::dumpLibXMLBookmark uses Library in a thread-safe way
const std::string xml = dumper.dumpLibXMLBookmark();
return writeTextFile(path, xml);
}
Library::AttributeCounts Library::getBookAttributeCounts(BookStrPropMemFn p) const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
AttributeCounts propValueCounts;
for (const auto& pair: m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
propValueCounts[(book.*p)()] += 1;
}
}
return propValueCounts;
}
std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
{
std::vector<std::string> result;
for ( const auto& kv : getBookAttributeCounts(p) ) {
result.push_back(kv.first);
}
return result;
return writeTextFile(path, dumper.dumpLibXMLBookmark());
}
std::vector<std::string> Library::getBooksLanguages() const
{
std::vector<std::string> langs;
for ( const auto& langAndCount : getBooksLanguagesWithCounts() ) {
langs.push_back(langAndCount.first);
}
return langs;
}
std::vector<std::string> booksLanguages;
std::map<std::string, bool> booksLanguagesMap;
Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
AttributeCounts langsWithCounts;
for (const auto& pair: m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
for ( const auto& lang : book.getLanguages() ) {
++langsWithCounts[lang];
for (auto& pair: m_books) {
auto& book = pair.second;
auto& language = book.getLanguage();
if (booksLanguagesMap.find(language) == booksLanguagesMap.end()) {
if (book.getOrigId().empty()) {
booksLanguagesMap[language] = true;
booksLanguages.push_back(language);
}
}
}
return langsWithCounts;
return booksLanguages;
}
std::vector<std::string> Library::getBooksCategories() const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
std::set<std::string> categories;
for (const auto& pair: m_books) {
@@ -521,12 +218,40 @@ std::vector<std::string> Library::getBooksCategories() const
std::vector<std::string> Library::getBooksCreators() const
{
return getBookPropValueSet(&Book::getCreator);
std::vector<std::string> booksCreators;
std::map<std::string, bool> booksCreatorsMap;
for (auto& pair: m_books) {
auto& book = pair.second;
auto& creator = book.getCreator();
if (booksCreatorsMap.find(creator) == booksCreatorsMap.end()) {
if (book.getOrigId().empty()) {
booksCreatorsMap[creator] = true;
booksCreators.push_back(creator);
}
}
}
return booksCreators;
}
std::vector<std::string> Library::getBooksPublishers() const
{
return getBookPropValueSet(&Book::getPublisher);
std::vector<std::string> booksPublishers;
std::map<std::string, bool> booksPublishersMap;
for (auto& pair:m_books) {
auto& book = pair.second;
auto& publisher = book.getPublisher();
if (booksPublishersMap.find(publisher) == booksPublishersMap.end()) {
if (book.getOrigId().empty()) {
booksPublishersMap[publisher] = true;
booksPublishers.push_back(publisher);
}
}
}
return booksPublishers;
}
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks) const
@@ -536,7 +261,6 @@ const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks
}
std::vector<kiwix::Bookmark> validBookmarks;
auto booksId = getBooksIds();
std::lock_guard<std::recursive_mutex> lock(m_mutex);
for(auto& bookmark:m_bookmarks) {
if (std::find(booksId.begin(), booksId.end(), bookmark.getBookId()) != booksId.end()) {
validBookmarks.push_back(bookmark);
@@ -547,7 +271,6 @@ const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks
Library::BookIdCollection Library::getBooksIds() const
{
std::lock_guard<std::recursive_mutex> lock(m_mutex);
BookIdCollection bookIds;
for (auto& pair: m_books) {
@@ -557,19 +280,26 @@ Library::BookIdCollection Library::getBooksIds() const
return bookIds;
}
Library::BookIdCollection Library::filter(const std::string& search) const
{
if (search.empty()) {
return getBooksIds();
}
return filter(Filter().query(search));
}
void Library::updateBookDB(const Book& book)
{
Xapian::Stem stemmer;
Xapian::TermGenerator indexer;
const auto langs = book.getLanguages();
if ( langs.size() == 1 ) {
try {
stemmer = Xapian::Stem(iso639_3ToXapian(langs[0]));
indexer.set_stemmer(stemmer);
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
} catch (...) {}
}
const std::string lang = book.getLanguage();
try {
stemmer = Xapian::Stem(iso639_3ToXapian(lang));
indexer.set_stemmer(stemmer);
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
} catch (...) {}
Xapian::Document doc;
indexer.set_document(doc);
@@ -584,22 +314,14 @@ void Library::updateBookDB(const Book& book)
// Index all fields for field-based search
indexer.index_text(title, 1, "S");
indexer.index_text(desc, 1, "XD");
for ( const auto& lang : langs ) {
indexer.index_text(lang, 1, "L");
}
indexer.index_text(lang, 1, "L");
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
doc.add_term("XN"+normalizeText(book.getName()));
indexer.index_text(normalizeText(book.getFlavour()), 1, "XF");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");
for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) {
for ( const auto& tag : split(normalizeText(book.getTags()), ";") )
doc.add_boolean_term("XT" + tag);
if ( tag[0] != '_' ) {
indexer.increase_termpos();
indexer.index_text(tag);
}
}
const std::string idterm = "Q" + book.getId();
doc.add_boolean_term(idterm);
@@ -631,7 +353,6 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
queryParser.add_prefix("title", "S");
queryParser.add_prefix("description", "XD");
queryParser.add_prefix("name", "XN");
queryParser.add_prefix("flavour", "XF");
queryParser.add_prefix("category", "XC");
queryParser.add_prefix("lang", "L");
queryParser.add_prefix("publisher", "XP");
@@ -645,6 +366,8 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
//queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???)));
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
const auto flags = Xapian::QueryParser::FLAG_PHRASE
| Xapian::QueryParser::FLAG_BOOLEAN
| Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
| Xapian::QueryParser::FLAG_LOVEHATE
| Xapian::QueryParser::FLAG_WILDCARD
| partialQueryFlag;
@@ -656,35 +379,14 @@ Xapian::Query nameQuery(const std::string& name)
return Xapian::Query("XN" + normalizeText(name));
}
Xapian::Query flavourQuery(const std::string& name)
Xapian::Query categoryQuery(const std::string& category)
{
return Xapian::Query("XF" + normalizeText(name));
return Xapian::Query("XC" + normalizeText(category));
}
Xapian::Query multipleParamQuery(const std::string& commaSeparatedList, const std::string& prefix)
Xapian::Query langQuery(const std::string& lang)
{
Xapian::Query q;
bool firstIteration = true;
for ( const auto& elem : kiwix::split(commaSeparatedList, ",") ) {
const Xapian::Query singleQuery(prefix + normalizeText(elem));
if ( firstIteration ) {
q = singleQuery;
firstIteration = false;
} else {
q = Xapian::Query(Xapian::Query::OP_OR, q, singleQuery);
}
}
return q;
}
Xapian::Query categoryQuery(const std::string& commaSeparatedCategoryList)
{
return multipleParamQuery(commaSeparatedCategoryList, "XC");
}
Xapian::Query langQuery(const std::string& commaSeparatedLanguageList)
{
return multipleParamQuery(commaSeparatedLanguageList, "L");
return Xapian::Query("L" + normalizeText(lang));
}
Xapian::Query publisherQuery(const std::string& publisher)
@@ -728,9 +430,6 @@ Xapian::Query buildXapianQuery(const Filter& filter)
if ( filter.hasName() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName()));
}
if ( filter.hasFlavour() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, flavourQuery(filter.getFlavour()));
}
if ( filter.hasCategory() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory()));
}
@@ -761,7 +460,6 @@ Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const
BookIdCollection bookIds;
std::lock_guard<std::recursive_mutex> lock(m_mutex);
Xapian::Enquire enquire(*m_bookDB);
enquire.set_query(query);
const auto results = enquire.get_mset(0, m_books.size());
@@ -775,9 +473,7 @@ Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const
Library::BookIdCollection Library::filter(const Filter& filter) const
{
BookIdCollection result;
const auto preliminaryResult = filterViaBookDB(filter);
std::lock_guard<std::recursive_mutex> lock(m_mutex);
for(auto id : preliminaryResult) {
for(auto id : filterViaBookDB(filter)) {
if(filter.accept(m_books.at(id))) {
result.push_back(id);
}
@@ -846,11 +542,6 @@ std::string Comparator<PUBLISHER>::get_key(const std::string& id)
void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool ascending) const
{
// NOTE: Can reimplement this method in a way that doesn't require locking
// NOTE: for the entire duration of the sort. Will need to obtain (under a
// NOTE: lock) the required atributes from the books once, and then the
// NOTE: sorting will run on a copy of data without locking.
std::lock_guard<std::recursive_mutex> lock(m_mutex);
switch(sort) {
case TITLE:
std::sort(bookIds.begin(), bookIds.end(), Comparator<TITLE>(this, ascending));
@@ -873,6 +564,48 @@ void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool asc
}
Library::BookIdCollection Library::listBooksIds(
int mode,
supportedListSortBy sortBy,
const std::string& search,
const std::string& language,
const std::string& creator,
const std::string& publisher,
const std::vector<std::string>& tags,
size_t maxSize) const {
Filter _filter;
if (mode & LOCAL)
_filter.local(true);
if (mode & NOLOCAL)
_filter.local(false);
if (mode & VALID)
_filter.valid(true);
if (mode & NOVALID)
_filter.valid(false);
if (mode & REMOTE)
_filter.remote(true);
if (mode & NOREMOTE)
_filter.remote(false);
if (!tags.empty())
_filter.acceptTags(tags);
if (maxSize != 0)
_filter.maxSize(maxSize);
if (!language.empty())
_filter.lang(language);
if (!publisher.empty())
_filter.publisher(publisher);
if (!creator.empty())
_filter.creator(creator);
if (!search.empty())
_filter.query(search);
auto bookIds = filter(_filter);
sort(bookIds, sortBy, true);
return bookIds;
}
Filter::Filter()
: activeFilters(0),
_maxSize(0)
@@ -896,7 +629,6 @@ enum filterTypes {
QUERY = FLAG(12),
NAME = FLAG(13),
CATEGORY = FLAG(14),
FLAVOUR = FLAG(15),
};
Filter& Filter::local(bool accept)
@@ -998,25 +730,6 @@ Filter& Filter::name(std::string name)
activeFilters |= NAME;
return *this;
}
Filter& Filter::flavour(std::string flavour)
{
_flavour = flavour;
activeFilters |= FLAVOUR;
return *this;
}
Filter& Filter::clearLang()
{
activeFilters &= ~LANG;
return *this;
}
Filter& Filter::clearCategory()
{
activeFilters &= ~CATEGORY;
return *this;
}
#define ACTIVE(X) (activeFilters & (X))
#define FILTER(TAG, TEST) if (ACTIVE(TAG) && !(TEST)) { return false; }
@@ -1050,12 +763,6 @@ bool Filter::hasCreator() const
return ACTIVE(_CREATOR);
}
bool Filter::hasFlavour() const
{
return ACTIVE(FLAVOUR);
}
bool Filter::accept(const Book& book) const
{
auto local = !book.getPath().empty();

View File

@@ -1,61 +0,0 @@
#include "library_dumper.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools.h"
namespace kiwix
{
/* Constructor */
LibraryDumper::LibraryDumper(const Library* library, const NameMapper* nameMapper)
: library(library),
nameMapper(nameMapper)
{
}
/* Destructor */
LibraryDumper::~LibraryDumper()
{
}
void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int count)
{
m_totalResults = totalResults;
m_startIndex = startIndex,
m_count = count;
}
kainjow::mustache::list LibraryDumper::getCategoryData() const
{
const auto now = gen_date_str();
kainjow::mustache::list categoryData;
for ( const auto& category : library->getBooksCategories() ) {
const auto urlencodedCategoryName = urlEncode(category);
categoryData.push_back(kainjow::mustache::object{
{"name", category},
{"urlencoded_name", urlencodedCategoryName},
{"updated", now},
{"id", gen_uuid(libraryId + "/categories/" + urlencodedCategoryName)}
});
}
return categoryData;
}
kainjow::mustache::list LibraryDumper::getLanguageData() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
const auto languageSelfName = getLanguageSelfName(languageCode);
languageData.push_back(kainjow::mustache::object{
{"lang_code", languageCode},
{"lang_self_name", languageSelfName},
{"book_count", to_string(bookCount)},
{"updated", now},
{"id", gen_uuid(libraryId + "/languages/" + languageCode)}
});
}
return languageData;
}
} // namespace kiwix

View File

@@ -1,99 +0,0 @@
/*
* Copyright 2023 Nikhil Tanwar <2002nikhiltanwar@gmail.com>
* Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_LIBRARY_DUMPER_H
#define KIWIX_LIBRARY_DUMPER_H
#include <string>
#include "library.h"
#include "name_mapper.h"
#include <mustache.hpp>
namespace kiwix
{
/**
* A base class to dump Library in various formats.
*
*/
class LibraryDumper
{
public:
LibraryDumper(const Library* library, const NameMapper* NameMapper);
~LibraryDumper();
void setLibraryId(const std::string& id) { this->libraryId = id;}
/**
* Set the root location used when generating url.
*
* @param rootLocation the root location to use.
*/
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
/**
* Set the URL for accessing book content
*
* @param url the URL of the /content endpoint of the content server
*/
void setContentAccessUrl(const std::string& url) { this->contentAccessUrl = url; }
/**
* Set some informations about the search results.
*
* @param totalResult the total number of results of the search.
* @param startIndex the start index of the result.
* @param count the number of result of the current set (or page).
*/
void setOpenSearchInfo(int totalResult, int startIndex, int count);
/**
* Sets user default language
*
* @param userLang the user language to be set
*/
void setUserLanguage(std::string userLang) { this->m_userLang = userLang; }
/**
* Get the data of categories
*/
kainjow::mustache::list getCategoryData() const;
/**
* Get the data of languages
*/
kainjow::mustache::list getLanguageData() const;
protected:
const kiwix::Library* const library;
const kiwix::NameMapper* const nameMapper;
std::string libraryId;
std::string rootLocation;
std::string contentAccessUrl;
std::string m_userLang;
int m_totalResults;
int m_startIndex;
int m_count;
};
}
#endif // KIWIX_LIBRARY_DUMPER_H

View File

@@ -20,10 +20,10 @@
#include "libxml_dumper.h"
#include "book.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/pathTools.h"
namespace kiwix
{
@@ -54,19 +54,16 @@ void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) {
if (book.getOrigId().empty()) {
ADD_ATTR_NOT_EMPTY(entry_node, "title", book.getTitle());
ADD_ATTR_NOT_EMPTY(entry_node, "description", book.getDescription());
ADD_ATTR_NOT_EMPTY(entry_node, "language", book.getCommaSeparatedLanguages());
ADD_ATTR_NOT_EMPTY(entry_node, "language", book.getLanguage());
ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator());
ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher());
ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName());
ADD_ATTR_NOT_EMPTY(entry_node, "flavour", book.getFlavour());
ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags());
try {
auto defaultIllustration = book.getIllustration(48);
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", defaultIllustration->mimeType);
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", defaultIllustration->url);
if (!defaultIllustration->getData().empty())
ADD_ATTRIBUTE(entry_node, "favicon", base64_encode(defaultIllustration->getData()));
} catch(...) {}
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", book.getFaviconUrl());
if (!book.getFavicon().empty())
ADD_ATTRIBUTE(entry_node, "favicon", base64_encode(book.getFavicon()));
} else {
ADD_ATTRIBUTE(entry_node, "origId", book.getOrigId());
}
@@ -94,18 +91,14 @@ void LibXMLDumper::handleBookmark(Bookmark bookmark, pugi::xml_node root_node) {
auto book_node = entry_node.append_child("book");
try {
auto book = library->getBookByIdThreadSafe(bookmark.getBookId());
auto book = library->getBookById(bookmark.getBookId());
ADD_TEXT_ENTRY(book_node, "id", book.getId());
ADD_TEXT_ENTRY(book_node, "title", book.getTitle());
ADD_TEXT_ENTRY(book_node, "name", book.getName());
ADD_TEXT_ENTRY(book_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(book_node, "language", book.getCommaSeparatedLanguages());
ADD_TEXT_ENTRY(book_node, "language", book.getLanguage());
ADD_TEXT_ENTRY(book_node, "date", book.getDate());
} catch (...) {
ADD_TEXT_ENTRY(book_node, "id", bookmark.getBookId());
ADD_TEXT_ENTRY(book_node, "title", bookmark.getBookTitle());
ADD_TEXT_ENTRY(book_node, "name", bookmark.getBookName());
ADD_TEXT_ENTRY(book_node, "flavour", bookmark.getBookFlavour());
ADD_TEXT_ENTRY(book_node, "language", bookmark.getLanguage());
ADD_TEXT_ENTRY(book_node, "date", bookmark.getDate());
}
@@ -139,7 +132,7 @@ std::string LibXMLDumper::dumpLibXMLBookmark()
pugi::xml_node bookmarksNode = doc.append_child("bookmarks");
if (library) {
for (auto& bookmark: library->getBookmarks(false)) {
for (auto& bookmark: library->getBookmarks()) {
handleBookmark(bookmark, bookmarksNode);
}
}

View File

@@ -19,78 +19,34 @@
#include "manager.h"
#include "tools.h"
#include "tools/pathTools.h"
#include <pugixml.hpp>
namespace kiwix
{
////////////////////////////////////////////////////////////////////////////////
// LibraryManipulator
////////////////////////////////////////////////////////////////////////////////
LibraryManipulator::LibraryManipulator(LibraryPtr library)
: library(library)
{}
LibraryManipulator::~LibraryManipulator()
{}
bool LibraryManipulator::addBookToLibrary(const Book& book)
{
const auto ret = library->addBook(book);
if ( ret ) {
bookWasAddedToLibrary(book);
}
return ret;
}
void LibraryManipulator::addBookmarkToLibrary(const Bookmark& bookmark)
{
library->addBookmark(bookmark);
bookmarkWasAddedToLibrary(bookmark);
}
uint32_t LibraryManipulator::removeBooksNotUpdatedSince(Library::Revision rev)
{
const auto n = library->removeBooksNotUpdatedSince(rev);
if ( n != 0 ) {
booksWereRemovedFromLibrary();
}
return n;
}
void LibraryManipulator::bookWasAddedToLibrary(const Book& book)
{
}
void LibraryManipulator::bookmarkWasAddedToLibrary(const Bookmark& bookmark)
{
}
void LibraryManipulator::booksWereRemovedFromLibrary()
{
}
////////////////////////////////////////////////////////////////////////////////
// Manager
////////////////////////////////////////////////////////////////////////////////
/* Constructor */
Manager::Manager(LibraryManipulator manipulator):
Manager::Manager(LibraryManipulator* manipulator):
writableLibraryPath(""),
manipulator(manipulator)
manipulator(manipulator),
mustDeleteManipulator(false)
{
}
Manager::Manager(LibraryPtr library) :
Manager::Manager(Library* library) :
writableLibraryPath(""),
manipulator(LibraryManipulator(library))
manipulator(new DefaultLibraryManipulator(library)),
mustDeleteManipulator(true)
{
}
/* Destructor */
Manager::~Manager()
{
if (mustDeleteManipulator) {
delete manipulator;
}
}
bool Manager::parseXmlDom(const pugi::xml_document& doc,
bool readOnly,
const std::string& libraryPath,
@@ -111,7 +67,7 @@ bool Manager::parseXmlDom(const pugi::xml_document& doc,
if (!trustLibrary && !book.getPath().empty()) {
this->readBookFromPath(book.getPath(), &book);
}
manipulator.addBookToLibrary(book);
manipulator->addBookToLibrary(book);
}
return true;
@@ -156,7 +112,7 @@ bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& url
book.updateFromOpds(entryNode, urlHost);
/* Update the book properties with the new importer */
manipulator.addBookToLibrary(book);
manipulator->addBookToLibrary(book);
}
return true;
@@ -228,10 +184,10 @@ std::string Manager::addBookFromPathAndGetId(const std::string& pathToOpen,
}
if (!checkMetaData
|| (!book.getTitle().empty() && !book.getLanguages().empty()
|| (checkMetaData && !book.getTitle().empty() && !book.getLanguage().empty()
&& !book.getDate().empty())) {
book.setUrl(url);
manipulator.addBookToLibrary(book);
manipulator->addBookToLibrary(book);
return book.getId();
}
}
@@ -258,8 +214,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
tmp_path = computeAbsolutePath(getCurrentDirectory(), path);
}
try {
zim::Archive archive(tmp_path);
book->update(archive);
kiwix::Reader reader(tmp_path);
book->update(reader);
book->setPathValid(true);
} catch (const std::exception& e) {
book->setPathValid(false);
@@ -286,27 +242,10 @@ bool Manager::readBookmarkFile(const std::string& path)
bookmark.updateFromXml(node);
manipulator.addBookmarkToLibrary(bookmark);
manipulator->addBookmarkToLibrary(bookmark);
}
return true;
}
void Manager::reload(const Paths& paths)
{
const auto libRevision = manipulator.getLibrary()->getRevision();
for (std::string path : paths) {
if (!path.empty()) {
if ( kiwix::isRelativePath(path) )
path = kiwix::computeAbsolutePath(kiwix::getCurrentDirectory(), path);
if (!readFile(path, false, true)) {
throw std::runtime_error("Failed to load the XML library file '" + path + "'.");
}
}
}
manipulator.removeBooksNotUpdatedSince(libRevision);
}
}

View File

@@ -5,10 +5,11 @@ kiwix_sources = [
'manager.cpp',
'libxml_dumper.cpp',
'opds_dumper.cpp',
'html_dumper.cpp',
'library_dumper.cpp',
'downloader.cpp',
'reader.cpp',
'entry.cpp',
'server.cpp',
'searcher.cpp',
'search_renderer.cpp',
'subprocess.cpp',
'aria2.cpp',
@@ -17,10 +18,7 @@ kiwix_sources = [
'tools/regexTools.cpp',
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/opdsParsingTools.cpp',
'tools/languageTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',
@@ -28,14 +26,9 @@ kiwix_sources = [
'server/request_context.cpp',
'server/response.cpp',
'server/internalServer.cpp',
'server/internalServer_catalog.cpp',
'server/i18n.cpp',
'opds_catalog.cpp',
'spelling_correction.cpp',
'version.cpp'
'server/internalServer_catalog_v2.cpp'
]
kiwix_sources += lib_resources
kiwix_sources += i18n_resources
if host_machine.system() == 'windows'
kiwix_sources += 'subprocess_windows.cpp'
@@ -43,14 +36,22 @@ else
kiwix_sources += 'subprocess_unix.cpp'
endif
install_dir = get_option('libdir')
if wrapper.contains('android')
install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi')
else
install_dir = get_option('libdir')
endif
if wrapper.contains('android') or wrapper.contains('java')
subdir('wrapper/java')
endif
config_h = configure_file(output : 'kiwix_config.h',
configuration : conf,
input : 'config.h.in')
install_headers(config_h, subdir:'kiwix')
libkiwix = library('kiwix',
kiwixlib = library('kiwix',
kiwix_sources,
include_directories : inc,
dependencies : all_deps,

View File

@@ -24,85 +24,39 @@
namespace kiwix {
HumanReadableNameMapper::HumanReadableNameMapper(const kiwix::Library& library, bool withAlias) {
for (auto& bookId: library.filter(kiwix::Filter())) {
HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool withAlias) {
for (auto& bookId: library.filter(kiwix::Filter().local(true).valid(true))) {
auto& currentBook = library.getBookById(bookId);
auto bookName = currentBook.getHumanReadableIdFromPath();
m_idToName[bookId] = bookName;
mapName(library, bookName, bookId);
m_nameToId[bookName] = bookId;
if (!withAlias)
continue;
auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$");
if (aliasName != bookName) {
mapName(library, aliasName, bookId);
if (aliasName == bookName) {
continue;
}
if (m_nameToId.find(aliasName) == m_nameToId.end()) {
m_nameToId[aliasName] = bookId;
} else {
auto alreadyPresentPath = library.getBookById(m_nameToId[aliasName]).getPath();
std::cerr << "Path collision: " << alreadyPresentPath
<< " and " << currentBook.getPath()
<< " can't share the same URL path '" << aliasName << "'."
<< " Therefore, only " << alreadyPresentPath
<< " will be served." << std::endl;
}
}
}
void HumanReadableNameMapper::mapName(const Library& library, std::string name, std::string bookId) {
if (m_nameToId.find(name) == m_nameToId.end()) {
m_nameToId[name] = bookId;
} else {
const auto& currentBook = library.getBookById(bookId);
auto alreadyPresentPath = library.getBookById(m_nameToId[name]).getPath();
std::cerr << "Path collision: '" << alreadyPresentPath
<< "' and '" << currentBook.getPath()
<< "' can't share the same URL path '" << name << "'."
<< " Therefore, only '" << alreadyPresentPath
<< "' will be served." << std::endl;
}
}
std::string HumanReadableNameMapper::getNameForId(const std::string& id) const {
std::string HumanReadableNameMapper::getNameForId(const std::string& id) {
return m_idToName.at(id);
}
std::string HumanReadableNameMapper::getIdForName(const std::string& name) const {
std::string HumanReadableNameMapper::getIdForName(const std::string& name) {
return m_nameToId.at(name);
}
////////////////////////////////////////////////////////////////////////////////
// UpdatableNameMapper
////////////////////////////////////////////////////////////////////////////////
UpdatableNameMapper::UpdatableNameMapper(LibraryPtr lib, bool withAlias)
: library(lib)
, withAlias(withAlias)
{
update();
}
void UpdatableNameMapper::update()
{
const auto newNameMapper = new HumanReadableNameMapper(*library, withAlias);
std::lock_guard<std::mutex> lock(mutex);
nameMapper.reset(newNameMapper);
}
UpdatableNameMapper::NameMapperHandle
UpdatableNameMapper::currentNameMapper() const
{
// Return a copy of the handle to the current NameMapper object. It will
// ensure that the object survives any call to UpdatableNameMapper::update()
// made before the completion of any pending operation on that object.
std::lock_guard<std::mutex> lock(mutex);
return nameMapper;
}
std::string UpdatableNameMapper::getNameForId(const std::string& id) const
{
// Ensure that the current nameMapper object survives a concurrent call
// to UpdatableNameMapper::update()
return currentNameMapper()->getNameForId(id);
}
std::string UpdatableNameMapper::getIdForName(const std::string& name) const
{
// Ensure that the current nameMapper object survives a concurrent call
// to UpdatableNameMapper::update()
return currentNameMapper()->getIdForName(name);
}
}

View File

@@ -1,74 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "opds_catalog.h"
#include "tools/stringTools.h"
#include <sstream>
namespace kiwix
{
namespace
{
const char opdsSearchEndpoint[] = "/catalog/v2/entries";
enum Separator { AMP };
std::ostringstream& operator<<(std::ostringstream& oss, Separator sep)
{
if ( oss.tellp() > 0 )
oss << "&";
return oss;
}
std::string buildSearchString(const Filter& f)
{
std::ostringstream oss;
if ( f.hasQuery() )
oss << AMP << "q=" << urlEncode(f.getQuery());
if ( f.hasCategory() )
oss << AMP << "category=" << urlEncode(f.getCategory());
if ( f.hasLang() )
oss << AMP << "lang=" << urlEncode(f.getLang());
if ( f.hasName() )
oss << AMP << "name=" << urlEncode(f.getName());
if ( !f.getAcceptTags().empty() )
oss << AMP << "tag=" << urlEncode(join(f.getAcceptTags(), ";"));
return oss.str();
}
} // unnamed namespace
std::string getSearchUrl(const Filter& f)
{
const std::string searchString = buildSearchString(f);
if ( searchString.empty() )
return opdsSearchEndpoint;
else
return opdsSearchEndpoint + ("?" + searchString);
}
} // namespace kiwix

View File

@@ -20,18 +20,17 @@
#include "opds_dumper.h"
#include "book.h"
#include "libkiwix-resources.h"
#include <mustache.hpp>
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "kiwixlib-resources.h"
#include <mustache.hpp>
namespace kiwix
{
/* Constructor */
OPDSDumper::OPDSDumper(const Library* library, const NameMapper* nameMapper)
: LibraryDumper(library, nameMapper)
OPDSDumper::OPDSDumper(Library* library)
: library(library)
{
}
/* Destructor */
@@ -39,46 +38,35 @@ OPDSDumper::~OPDSDumper()
{
}
void OPDSDumper::setOpenSearchInfo(int totalResults, int startIndex, int count)
{
m_totalResults = totalResults;
m_startIndex = startIndex,
m_count = count;
}
namespace
{
const std::string XML_HEADER(R"(<?xml version="1.0" encoding="UTF-8"?>)");
typedef kainjow::mustache::data MustacheData;
typedef kainjow::mustache::list BooksData;
typedef kainjow::mustache::list IllustrationInfo;
typedef kainjow::mustache::list BookData;
IllustrationInfo getBookIllustrationInfo(const Book& book)
BookData getBookData(const Library* library, const std::vector<std::string>& bookIds)
{
kainjow::mustache::list illustrations;
for ( const auto& illustration : book.getIllustrations() ) {
// For now, we are handling only sizexsize@1 illustration.
// So we can simply pass one size to mustache.
illustrations.push_back(kainjow::mustache::object{
{"icon_size", to_string(illustration->width)},
{"icon_mimetype", illustration->mimeType}
});
}
return illustrations;
}
std::string fullEntryXML(const Book& book,
const std::string& rootLocation,
const std::string& contentAccessUrl,
const std::string& contentId)
{
const auto bookDate = book.getDate() + "T00:00:00Z";
const kainjow::mustache::object data{
{"root", rootLocation},
{"contentAccessUrl", onlyAsNonEmptyMustacheValue(contentAccessUrl)},
{"id", book.getId()},
BookData bookData;
for ( const auto& bookId : bookIds ) {
const Book& book = library->getBookById(bookId);
const MustacheData bookUrl = book.getUrl().empty()
? MustacheData(false)
: MustacheData(book.getUrl());
bookData.push_back(kainjow::mustache::object{
{"id", "urn:uuid:"+book.getId()},
{"name", book.getName()},
{"title", book.getTitle()},
{"description", book.getDescription()},
{"language", book.getCommaSeparatedLanguages()},
{"content_id", urlEncode(contentId)},
{"updated", bookDate}, // XXX: this should be the entry update datetime
{"book_date", bookDate},
{"language", book.getLanguage()},
{"content_id", book.getHumanReadableIdFromPath()},
{"updated", book.getDate() + "T00:00:00Z"},
{"category", book.getCategory()},
{"flavour", book.getFlavour()},
{"tags", book.getTags()},
@@ -86,106 +74,66 @@ std::string fullEntryXML(const Book& book,
{"media_count", to_string(book.getMediaCount())},
{"author_name", book.getCreator()},
{"publisher_name", book.getPublisher()},
{"url", onlyAsNonEmptyMustacheValue(book.getUrl())},
{"url", bookUrl},
{"size", to_string(book.getSize())},
{"icons", getBookIllustrationInfo(book)},
};
return render_template(RESOURCE::templates::catalog_v2_entry_xml, data);
}
std::string partialEntryXML(const Book& book, const std::string& rootLocation)
{
const auto bookDate = book.getDate() + "T00:00:00Z";
const kainjow::mustache::object data{
{"root", rootLocation},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"id", book.getId()},
{"title", book.getTitle()},
{"updated", bookDate}, // XXX: this should be the entry update datetime
};
const auto xmlTemplate = RESOURCE::templates::catalog_v2_partial_entry_xml;
return render_template(xmlTemplate, data);
}
BooksData getBooksData(const Library* library,
const NameMapper* nameMapper,
const std::vector<std::string>& bookIds,
const std::string& rootLocation,
const std::string& contentAccessUrl,
bool partial)
{
BooksData booksData;
for ( const auto& bookId : bookIds ) {
try {
const Book book = library->getBookByIdThreadSafe(bookId);
const std::string contentId = nameMapper->getNameForId(bookId);
const auto entryXML = partial
? partialEntryXML(book, rootLocation)
: fullEntryXML(book, rootLocation, contentAccessUrl, contentId);
booksData.push_back(kainjow::mustache::object{ {"entry", entryXML} });
} catch ( const std::out_of_range& ) {
// the book was removed from the library since its id was obtained
// ignore it
}
});
}
return booksData;
return bookData;
}
} // unnamed namespace
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const
{
const auto booksData = getBooksData(library, nameMapper, bookIds, rootLocation, contentAccessUrl, false);
const auto bookData = getBookData(library, bookIds);
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"root", rootLocation},
{"feed_id", gen_uuid(libraryId + "/catalog/search?"+query)},
{"filter", onlyAsNonEmptyMustacheValue(query)},
{"filter", query.empty() ? MustacheData(false) : MustacheData(query)},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData }
{"books", bookData }
};
return render_template(RESOURCE::templates::catalog_entries_xml, template_data);
}
string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const
string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query) const
{
const auto endpointRoot = rootLocation + "/catalog/v2";
const auto booksData = getBooksData(library, nameMapper, bookIds, rootLocation, contentAccessUrl, partial);
const auto bookData = getBookData(library, bookIds);
const char* const endpoint = partial ? "/partial_entries" : "/entries";
const std::string url = endpoint + (query.empty() ? "" : "?" + query);
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"endpoint_root", endpointRoot},
{"feed_id", gen_uuid(libraryId + endpoint + "?" + query)},
{"filter", onlyAsNonEmptyMustacheValue(query)},
{"self_url", url},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/entries?"+query)},
{"filter", query.empty() ? MustacheData(false) : MustacheData(query)},
{"query", query.empty() ? "" : "?" + urlEncode(query)},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData }
{"books", bookData }
};
return render_template(RESOURCE::templates::catalog_v2_entries_xml, template_data);
}
std::string OPDSDumper::dumpOPDSCompleteEntry(const std::string& bookId) const
{
const auto book = library->getBookById(bookId);
const std::string contentId = nameMapper->getNameForId(bookId);
return XML_HEADER
+ "\n"
+ fullEntryXML(book, rootLocation, contentAccessUrl, contentId);
}
std::string OPDSDumper::categoriesOPDSFeed() const
std::string OPDSDumper::categoriesOPDSFeed(const std::vector<std::string>& categories) const
{
const auto now = gen_date_str();
kainjow::mustache::list categoryData = getCategoryData();
kainjow::mustache::list categoryData;
for ( const auto& category : categories ) {
const auto urlencodedCategoryName = urlEncode(category);
categoryData.push_back(kainjow::mustache::object{
{"name", category},
{"urlencoded_name", urlencodedCategoryName},
{"updated", now},
{"id", gen_uuid(libraryId + "/categories/" + urlencodedCategoryName)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_categories_xml,
kainjow::mustache::object{
@@ -197,19 +145,4 @@ std::string OPDSDumper::categoriesOPDSFeed() const
);
}
std::string OPDSDumper::languagesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData = getLanguageData();
return render_template(
RESOURCE::templates::catalog_v2_languages_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/languages")},
{"languages", languageData }
}
);
}
}

581
src/reader.cpp Normal file
View File

@@ -0,0 +1,581 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
#include <zim/search.h>
#include <zim/item.h>
#include <zim/error.h>
#include "tools/otherTools.h"
inline char hi(char v)
{
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v)
{
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID(std::string in)
{
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]);
}
std::string op = out.str();
return op;
}
namespace kiwix
{
/* Constructor */
Reader::Reader(const string zimFilePath)
: zimArchive(nullptr),
zimFilePath(zimFilePath)
{
string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && tmpZimFilePath.size() > 5
&& found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
}
zimArchive.reset(new zim::Archive(tmpZimFilePath));
/* initialize random seed: */
srand(time(nullptr));
}
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
: zimArchive(new zim::Archive(fd, offset, size)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
#endif // #ifndef _WIN32
zim::Archive* Reader::getZimArchive() const
{
return zimArchive.get();
}
MimeCounterType Reader::parseCounterMetadata() const
{
try {
auto counterContent = zimArchive->getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
}
/* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
for(auto &pair:counterMap) {
if (startsWith(pair.first, "text/html")) {
counter += pair.second;
}
}
return counter;
}
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}
return counter;
}
/* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() const
{
return zimArchive->getEntryCount();
}
/* Return the UID of the ZIM file */
string Reader::getId() const
{
std::ostringstream s;
s << zimArchive->getUuid();
return s.str();
}
Entry Reader::getRandomPage() const
{
try {
return zimArchive->getRandomEntry();
} catch(...) {
throw NoEntry();
}
}
Entry Reader::getMainPage() const
{
return zimArchive->getMainEntry();
}
bool Reader::getFavicon(string& content, string& mimeType) const
{
try {
auto item = zimArchive->getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
string Reader::getZimFilePath() const
{
return zimFilePath;
}
/* Return a metatag value */
bool Reader::getMetadata(const string& name, string& value) const
{
try {
value = zimArchive->getMetadata(name);
return true;
} catch(zim::EntryNotFound& e) {
return false;
}
}
#define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
string Reader::getName() const
{
METADATA("Name")
}
string Reader::getTitle() const
{
string value = zimArchive->getMetadata("Title");
if (value.empty()) {
value = getLastPathElement(zimFilePath);
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
string Reader::getCreator() const
{
METADATA("Creator")
}
string Reader::getPublisher() const
{
METADATA("Publisher")
}
string Reader::getDate() const
{
METADATA("Date")
}
string Reader::getDescription() const
{
string value;
this->getMetadata("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
this->getMetadata("Subtitle", value);
}
return value;
}
string Reader::getLongDescription() const
{
METADATA("LongDescription")
}
string Reader::getLanguage() const
{
METADATA("Language")
}
string Reader::getLicense() const
{
METADATA("License")
}
string Reader::getTags(bool original) const
{
string tags_str;
getMetadata("Tags", tags_str);
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
string Reader::getTagStr(const std::string& tagName) const
{
string tags_str;
getMetadata("Tags", tags_str);
return getTagValueFromTagList(convertTags(tags_str), tagName);
}
bool Reader::getTagBool(const std::string& tagName) const
{
return convertStrToBool(getTagStr(tagName));
}
string Reader::getRelation() const
{
METADATA("Relation")
}
string Reader::getFlavour() const
{
METADATA("Flavour")
}
string Reader::getSource() const
{
METADATA("Source")
}
string Reader::getScraper() const
{
METADATA("Scraper")
}
#undef METADATA
string Reader::getOrigId() const
{
string value;
this->getMetadata("startfileuid", value);
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = hexUUID(tempArray);
return origID;
}
Entry Reader::getEntryFromPath(const std::string& path) const
{
if (path.empty() || path == "/") {
return getMainPage();
}
try {
return zimArchive->getEntryByPath(path);
} catch (zim::EntryNotFound& e) {
throw NoEntry();
}
}
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
{
return getEntryFromPath(urlDecode(path, true));
}
Entry Reader::getEntryFromTitle(const std::string& title) const
{
try {
return zimArchive->getEntryByTitle(title);
} catch(zim::EntryNotFound& e) {
throw NoEntry();
}
}
bool Reader::pathExists(const string& path) const
{
return zimArchive->hasEntryByPath(path);
}
/* Does the ZIM file has a fulltext index */
bool Reader::hasFulltextIndex() const
{
return zimArchive->hasFulltextIndex();
}
/* Search titles by prefix */
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset)
{
/* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
if (reset) {
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
} else {
if (this->suggestions.size() > suggestionsCount) {
return false;
}
}
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
bool retVal = false;
/* Return if no prefix */
if (prefix.size() == 0) {
return false;
}
for (auto& entry: zimArchive->findByTitle(prefix)) {
if (results.size() >= suggestionsCount) {
break;
}
/* Extract the interesting part of article title & url */
std::string normalizedArticleTitle
= kiwix::normalize(entry.getTitle());
// Get the final path.
auto item = entry.getItem(true);
std::string articleFinalUrl = item.getPath();
/* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other
title) */
bool insert = true;
std::vector<SuggestionItem>::iterator suggestionItr;
for (suggestionItr = results.begin();
suggestionItr != results.end();
suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr).getNormalizedTitle());
if (result == 0 && articleFinalUrl.compare((*suggestionItr).getPath()) == 0) {
insert = false;
break;
} else if (result < 0) {
break;
}
}
/* Insert if possible */
if (insert) {
SuggestionItem suggestion(entry.getTitle(), normalizedArticleTitle, articleFinalUrl);
results.insert(suggestionItr, suggestion);
}
/* Suggestions where found */
retVal = true;
}
return retVal;
}
std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount)
{
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false;
/* Try to search in the title using fulltext search database */
auto suggestionSearcher = zim::Searcher(*zimArchive);
zim::Query suggestionQuery;
suggestionQuery.setQuery(prefix, true);
auto suggestionSearch = suggestionSearcher.search(suggestionQuery);
if (suggestionSearch.getEstimatedMatches()) {
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
for (auto current = suggestions.begin();
current != suggestions.end();
current++) {
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
retVal = true;
} else {
for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end();
variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|| retVal;
}
}
return retVal;
}
/* Get next suggestion */
bool Reader::getNextSuggestion(string& title)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
bool Reader::getNextSuggestion(string& title, string& url)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
url = (*(this->suggestionsOffset)).getPath();
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
/* Check if the file has as checksum */
bool Reader::canCheckIntegrity() const
{
return zimArchive->hasChecksum();
}
/* Return true if corrupted, false otherwise */
bool Reader::isCorrupted() const
{
try {
if (zimArchive->check() == true) {
return false;
}
} catch (exception& e) {
cerr << e.what() << endl;
return true;
}
return true;
}
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
return zimArchive->getFilesize() / 1024;
}
}

View File

@@ -21,61 +21,26 @@
#include <cmath>
#include "search_renderer.h"
#include "searcher.h"
#include "reader.h"
#include "library.h"
#include "name_mapper.h"
#include "tools/archiveTools.h"
#include <zim/search.h>
#include <mustache.hpp>
#include "libkiwix-resources.h"
#include "tools/stringTools.h"
#include "kiwixlib-resources.h"
#include "server/i18n_utils.h"
namespace kiwix
{
namespace
{
ParameterizedMessage searchResultsPageTitleMsg(const std::string& searchPattern)
{
return ParameterizedMessage("search-results-page-title",
{{"SEARCH_PATTERN", searchPattern}}
);
}
ParameterizedMessage searchResultsPageHeaderMsg(const std::string& searchPattern,
const kainjow::mustache::data& r)
{
if ( r.get("count")->string_value() == "0" ) {
return ParameterizedMessage("empty-search-results-page-header",
{{"SEARCH_PATTERN", searchPattern}}
);
} else {
return ParameterizedMessage("search-results-page-header",
{
{"SEARCH_PATTERN", searchPattern},
{"START", r.get("startLabel")->string_value()},
{"END", r.get("end") ->string_value()},
{"COUNT", r.get("count")->string_value()},
}
);
}
}
} // unnamed namespace
/* Constructor */
SearchRenderer::SearchRenderer(zim::SearchResultSet srs,
unsigned int start, unsigned int estimatedResultCount)
: m_srs(srs),
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: mp_searcher(searcher),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://"),
estimatedResultCount(estimatedResultCount),
resultStart(start)
searchProtocolPrefix("search://?")
{}
/* Destructor */
@@ -83,12 +48,12 @@ SearchRenderer::~SearchRenderer() = default;
void SearchRenderer::setSearchPattern(const std::string& pattern)
{
searchPattern = pattern;
this->searchPattern = pattern;
}
void SearchRenderer::setSearchBookQuery(const std::string& bookQuery)
void SearchRenderer::setSearchContent(const std::string& name)
{
searchBookQuery = bookQuery;
this->searchContent = name;
}
void SearchRenderer::setProtocolPrefix(const std::string& prefix)
@@ -101,175 +66,84 @@ void SearchRenderer::setSearchProtocolPrefix(const std::string& prefix)
this->searchProtocolPrefix = prefix;
}
std::string extractValueFromQuery(const std::string& query, const std::string& key) {
const std::string p = key + "=";
const size_t i = query.find(p);
if (i == std::string::npos) {
return "";
}
std::string r = query.substr(i + p.size());
return r.substr(0, r.find("&"));
}
kainjow::mustache::data buildQueryData
(
const std::string& searchProtocolPrefix,
const std::string& pattern,
const std::string& bookQuery
) {
kainjow::mustache::data query;
query.set("pattern", kiwix::encodeDiples(pattern));
std::ostringstream ss;
ss << searchProtocolPrefix << "?pattern=" << urlEncode(pattern);
ss << "&" << bookQuery;
query.set("unpaginatedQuery", ss.str());
auto lang = extractValueFromQuery(bookQuery, "books.filter.lang");
if(!lang.empty()) {
query.set("lang", lang);
}
return query;
}
kainjow::mustache::data buildPagination(
unsigned int pageLength,
unsigned int resultsCount,
unsigned int resultsStart
)
std::string SearchRenderer::getHtml()
{
assert(pageLength!=0);
kainjow::mustache::data pagination;
kainjow::mustache::data results{kainjow::mustache::data::type::list};
mp_searcher->restart_search();
Result* p_result = NULL;
while ((p_result = mp_searcher->getNextResult())) {
kainjow::mustache::data result;
result.set("title", p_result->get_title());
result.set("url", p_result->get_url());
result.set("snippet", p_result->get_snippet());
result.set("resultContentId", mp_nameMapper->getNameForId(p_result->get_zimId()));
if (p_result->get_wordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount()));
}
results.push_back(result);
delete p_result;
}
// pages
kainjow::mustache::data pages{kainjow::mustache::data::type::list};
if (resultsCount == 0) {
// Easy case
pagination.set("itemsPerPage", to_string(pageLength));
pagination.set("hasPages", false);
pagination.set("pages", pages);
return pagination;
}
// First we want to display pages starting at a multiple of `pageLength`
// so, let's calculate the start index of the current page.
auto currentPage = resultsStart/pageLength;
auto lastPage = ((resultsCount-1)/pageLength);
auto lastPageStart = lastPage*pageLength;
auto nbPages = lastPage + 1;
auto firstPageGenerated = currentPage > 4 ? currentPage-4 : 0;
auto lastPageGenerated = std::min(currentPage+4, lastPage);
if (nbPages != 1) {
if (firstPageGenerated!=0) {
kainjow::mustache::data page;
page.set("label", "");
page.set("start", to_string(0));
page.set("current", false);
pages.push_back(page);
auto resultStart = mp_searcher->getResultStart();
auto resultEnd = 0U;
auto estimatedResultCount = mp_searcher->getEstimatedResultCount();
auto currentPage = 0U;
auto pageStart = 0U;
auto pageEnd = 0U;
auto lastPageStart = 0U;
if (pageLength) {
currentPage = resultStart/pageLength;
pageStart = currentPage > 4 ? currentPage-4 : 0;
pageEnd = currentPage + 5;
if (pageEnd > estimatedResultCount / pageLength) {
pageEnd = (estimatedResultCount + pageLength - 1) / pageLength;
}
for (auto i=firstPageGenerated; i<=lastPageGenerated; i++) {
kainjow::mustache::data page;
page.set("label", to_string(i+1));
page.set("start", to_string(i*pageLength));
page.set("current", bool(i == currentPage));
pages.push_back(page);
}
if (lastPageGenerated!=lastPage) {
kainjow::mustache::data page;
page.set("label", "");
page.set("start", to_string(lastPageStart));
page.set("current", false);
pages.push_back(page);
if (estimatedResultCount > pageLength) {
lastPageStart = ((estimatedResultCount-1)/pageLength)*pageLength;
}
}
pagination.set("itemsPerPage", to_string(pageLength));
pagination.set("hasPages", firstPageGenerated < lastPageGenerated);
pagination.set("pages", pages);
return pagination;
}
resultEnd = resultStart+pageLength; //setting result end
std::string SearchRenderer::renderTemplate(const std::string& tmpl_str, const NameMapper& nameMapper, const Library* library)
{
const std::string absPathPrefix = protocolPrefix;
// Build the results list
kainjow::mustache::data items{kainjow::mustache::data::type::list};
for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
kainjow::mustache::data result;
const std::string zim_id(it.getZimId());
const auto path = nameMapper.getNameForId(zim_id) + "/" + it.getPath();
result.set("title", it.getTitle());
result.set("absolutePath", absPathPrefix + urlEncode(path));
result.set("snippet", it.getSnippet());
if (library) {
const std::string bookTitle = library->getBookById(zim_id).getTitle();
const ParameterizedMessage bookInfoMsg("search-result-book-info",
{{"BOOK_TITLE", bookTitle}}
);
result.set("bookInfo", bookInfoMsg.getText(userlang)); // for HTML
result.set("bookTitle", bookTitle); // for XML
}
if (it.getWordCount() >= 0) {
const auto wordCountStr = kiwix::beautifyInteger(it.getWordCount());
const ParameterizedMessage wordCountMsg("word-count",
{{"COUNT", wordCountStr}}
);
result.set("wordCountInfo", wordCountMsg.getText(userlang)); // for HTML
result.set("wordCount", wordCountStr); // for XML
}
for (unsigned int i = pageStart; i < pageEnd; i++) {
kainjow::mustache::data page;
page.set("label", to_string(i + 1));
page.set("start", to_string(i * pageLength));
items.push_back(result);
if (i == currentPage) {
page.set("selected", true);
}
pages.push_back(page);
}
kainjow::mustache::data results;
results.set("items", items);
results.set("count", kiwix::beautifyInteger(estimatedResultCount));
results.set("start", kiwix::beautifyInteger(resultStart));
results.set("startLabel", kiwix::beautifyInteger(resultStart+1));
results.set("end", kiwix::beautifyInteger(std::min(resultStart+pageLength, estimatedResultCount)));
// pagination
auto pagination = buildPagination(
pageLength,
estimatedResultCount,
resultStart
);
std::string template_str = RESOURCE::templates::search_result_html;
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data query = buildQueryData(
searchProtocolPrefix,
searchPattern,
searchBookQuery
);
const auto pageHeaderMsg = searchResultsPageHeaderMsg(searchPattern, results);
const kainjow::mustache::object allData{
{"PAGE_TITLE", searchResultsPageTitleMsg(searchPattern).getText(userlang)},
{"PAGE_HEADER", pageHeaderMsg.getText(userlang)},
{"searchProtocolPrefix", searchProtocolPrefix},
{"results", results},
{"pagination", pagination},
{"query", query},
};
kainjow::mustache::mustache tmpl(tmpl_str);
kainjow::mustache::data allData;
allData.set("results", results);
allData.set("pages", pages);
allData.set("hasResults", estimatedResultCount != 0);
allData.set("hasPages", pageStart != pageEnd);
allData.set("count", kiwix::beautifyInteger(estimatedResultCount));
allData.set("searchPattern", kiwix::encodeDiples(this->searchPattern));
allData.set("searchPatternEncoded", urlEncode(this->searchPattern));
allData.set("resultStart", to_string(resultStart + 1));
allData.set("resultEnd", to_string(min(resultEnd, estimatedResultCount)));
allData.set("pageLength", to_string(pageLength));
allData.set("resultLastPageStart", to_string(lastPageStart));
allData.set("protocolPrefix", this->protocolPrefix);
allData.set("searchProtocolPrefix", this->searchProtocolPrefix);
allData.set("contentId", this->searchContent);
std::stringstream ss;
tmpl.render(allData, [&ss](const std::string& str) { ss << str; });
if (!tmpl.is_valid()) {
throw std::runtime_error("Error while rendering search results: " + tmpl.error_message());
}
return ss.str();
}
std::string SearchRenderer::getHtml(const NameMapper& mapper, const Library* library)
{
return renderTemplate(RESOURCE::templates::search_result_html, mapper, library);
}
std::string SearchRenderer::getXml(const NameMapper& mapper, const Library* library)
{
return renderTemplate(RESOURCE::templates::search_result_xml, mapper, library);
}
}
}

272
src/searcher.cpp Normal file
View File

@@ -0,0 +1,272 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <cmath>
#include "searcher.h"
#include "reader.h"
#include <zim/search.h>
#include <mustache.hpp>
#include "kiwixlib-resources.h"
#define MAX_SEARCH_LEN 140
namespace kiwix
{
class _Result : public Result
{
public:
_Result(zim::SearchResultSet::iterator iterator);
virtual ~_Result(){};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual std::string get_zimId();
private:
zim::SearchResultSet::iterator iterator;
};
struct SearcherInternal : zim::SearchResultSet {
explicit SearcherInternal(const zim::SearchResultSet& srs)
: zim::SearchResultSet(srs)
, current_iterator(srs.begin())
{
}
zim::SearchResultSet::iterator current_iterator;
};
/* Constructor */
Searcher::Searcher()
: searchPattern(""),
estimatedResultCount(0),
resultStart(0),
resultEnd(0)
{
loadICUExternalTables();
}
/* Destructor */
Searcher::~Searcher()
{
}
bool Searcher::add_reader(Reader* reader)
{
if (!reader->hasFulltextIndex()) {
return false;
}
this->readers.push_back(reader);
return true;
}
Reader* Searcher::get_reader(int readerIndex)
{
return readers.at(readerIndex);
}
/* Search strings in the database */
void Searcher::search(const std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing query `" << search << "'" << endl;
}
this->searchPattern = search;
this->resultStart = resultStart;
this->resultEnd = resultEnd;
/* Try to find results */
if (resultStart != resultEnd) {
/* Perform the search */
string unaccentedSearch = removeAccents(search);
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
if ( (*current)->hasFulltextIndex() ) {
archives.push_back(*(*current)->getZimArchive());
}
}
zim::Searcher searcher(archives);
zim::Query query;
query.setQuery(unaccentedSearch, false);
query.setVerbose(verbose);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, resultEnd)));
this->estimatedResultCount = search.getEstimatedMatches();
}
return;
}
void Searcher::geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
/* Perform the search */
std::ostringstream oss;
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
this->searchPattern = oss.str();
this->resultStart = resultStart;
this->resultEnd = resultEnd;
/* Try to find results */
if (resultStart == resultEnd) {
return;
}
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
archives.push_back(*(*current)->getZimArchive());
}
zim::Searcher searcher(archives);
zim::Query query;
query.setVerbose(verbose);
query.setQuery("", false);
query.setGeorange(latitude, longitude, distance);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, resultEnd)));
this->estimatedResultCount = search.getEstimatedMatches();
}
void Searcher::restart_search()
{
if (internal.get()) {
internal->current_iterator = internal->begin();
}
}
Result* Searcher::getNextResult()
{
if (internal.get() &&
internal->current_iterator != internal->end()) {
Result* result = new _Result(internal->current_iterator);
internal->current_iterator++;
return result;
}
return NULL;
}
/* Reset the results */
void Searcher::reset()
{
this->estimatedResultCount = 0;
this->searchPattern = "";
return;
}
void Searcher::suggestions(std::string& searchPattern, const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing suggestion query `" << searchPattern << "`" << endl;
}
this->searchPattern = searchPattern;
this->resultStart = 0;
this->resultEnd = 10;
string unaccentedSearch = removeAccents(searchPattern);
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
archives.push_back(*(*current)->getZimArchive());
}
zim::Searcher searcher(archives);
zim::Query query;
query.setVerbose(verbose);
query.setQuery(unaccentedSearch, true);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, resultEnd)));
this->estimatedResultCount = search.getEstimatedMatches();
}
/* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount()
{
return this->estimatedResultCount;
}
_Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator)
{
}
std::string _Result::get_url()
{
return iterator.getPath();
}
std::string _Result::get_title()
{
return iterator.getTitle();
}
int _Result::get_score()
{
return iterator.getScore();
}
std::string _Result::get_snippet()
{
return iterator.getSnippet();
}
std::string _Result::get_content()
{
return iterator->getItem(true).getData();
}
int _Result::get_size()
{
return iterator.getSize();
}
int _Result::get_wordCount()
{
return iterator.getWordCount();
}
std::string _Result::get_zimId()
{
std::ostringstream s;
s << iterator.getZimId();
return s.str();
}
}

View File

@@ -24,28 +24,11 @@
#include <string>
#include <zim/item.h>
#include "server/internalServer.h"
namespace kiwix {
namespace
{
std::string makeServerUrl(std::string host, int port, std::string root)
{
const int httpDefaultPort = 80;
if (port == httpDefaultPort) {
return "http://" + host + root;
} else {
return "http://" + host + ":" + std::to_string(port) + root;
}
}
} // unnamed namespace
Server::Server(LibraryPtr library, std::shared_ptr<NameMapper> nameMapper) :
Server::Server(Library* library, NameMapper* nameMapper) :
mp_library(library),
mp_nameMapper(nameMapper),
mp_server(nullptr)
@@ -62,23 +45,11 @@ bool Server::start() {
m_port,
m_root,
m_nbThreads,
m_multizimSearchLimit,
m_verbose,
m_withTaskbar,
m_withLibraryButton,
m_blockExternalLinks,
m_ipMode,
m_indexTemplateString,
m_ipConnectionLimit,
m_catalogOnlyMode,
m_contentServerUrl));
if (mp_server->start()) {
// this syncs m_addr of InternalServer and Server as they may diverge
m_addr = mp_server->getAddress();
return true;
} else {
return false;
}
m_blockExternalLinks));
return mp_server->start();
}
void Server::stop() {
@@ -91,53 +62,12 @@ void Server::stop() {
void Server::setRoot(const std::string& root)
{
m_root = root;
while (!m_root.empty() && m_root.back() == '/')
m_root.pop_back();
while (!m_root.empty() && m_root.front() == '/')
m_root = m_root.substr(1);
m_root = m_root.empty() ? m_root : "/" + m_root;
}
void Server::setAddress(const std::string& addr)
{
m_addr.addr.clear();
m_addr.addr6.clear();
if (addr.empty()) return;
if (addr.find(':') != std::string::npos) { // IPv6
m_addr.addr6 = (addr[0] == '[') ? addr.substr(1, addr.length() - 2) : addr; // Remove brackets if any
} else {
m_addr.addr = addr;
if (m_root[0] != '/') {
m_root = "/" + m_root;
}
if (m_root.back() == '/') {
m_root.erase(m_root.size() - 1);
}
}
int Server::getPort() const
{
return m_port;
}
IpAddress Server::getAddress() const
{
return m_addr;
}
IpMode Server::getIpMode() const
{
return mp_server->getIpMode();
}
std::vector<std::string> Server::getServerAccessUrls() const
{
std::vector<std::string> result;
if (!m_addr.addr.empty()) {
result.push_back(makeServerUrl(m_addr.addr, m_port, m_root));
}
if (!m_addr.addr6.empty()) {
result.push_back(makeServerUrl("[" + m_addr.addr6 + "]", m_port, m_root));
}
return result;
}
}

View File

@@ -37,11 +37,11 @@ namespace {
// into the ETag for ETag::Option opt.
// IMPORTANT: The characters in all_options must come in sorted order (so that
// IMPORTANT: isValidOptionsString() works correctly).
const char all_options[] = "Zz";
const char all_options[] = "cz";
static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, "");
bool isValidETagBody(const std::string& s)
bool isValidServerId(const std::string& s)
{
return !s.empty() && s.find_first_of("\"/") == std::string::npos;
}
@@ -83,17 +83,17 @@ bool ETag::get_option(Option opt) const
std::string ETag::get_etag() const
{
if ( m_body.empty() )
if ( m_serverId.empty() )
return std::string();
return "\"" + m_body + "/" + m_options + "\"";
return "\"" + m_serverId + "/" + m_options + "\"";
}
ETag::ETag(const std::string& body, const std::string& options)
ETag::ETag(const std::string& serverId, const std::string& options)
{
if ( isValidETagBody(body) && isValidOptionsString(options) )
if ( isValidServerId(serverId) && isValidOptionsString(options) )
{
m_body = body;
m_serverId = serverId;
m_options = options;
}
}
@@ -115,7 +115,7 @@ ETag ETag::parse(std::string s)
return ETag(s.substr(0, i), s.substr(i+1));
}
ETag ETag::match(const std::string& etags, const std::string& body)
ETag ETag::match(const std::string& etags, const std::string& server_id)
{
std::istringstream ss(etags);
std::string etag_str;
@@ -125,7 +125,7 @@ ETag ETag::match(const std::string& etags, const std::string& body)
etag_str.pop_back();
const ETag etag = parse(etag_str);
if ( etag && etag.m_body == body )
if ( etag && etag.m_serverId == server_id )
return etag;
}

View File

@@ -28,11 +28,10 @@ namespace kiwix {
// The ETag string used by Kiwix server (more precisely, its value inside the
// double quotes) consists of two parts:
//
// 1. Body - A string uniquely identifying the object or state from which
// the resource has been obtained.
// 1. ServerId - The string obtained on server start up
//
// 2. Options - Zero or more characters encoding the type of the ETag and/or
// the values of some of the headers of the response
// 2. Options - Zero or more characters encoding the values of some of the
// headers of the response
//
// The two parts are separated with a slash (/) symbol (which is always present,
// even when the the options part is empty). Neither portion of a Kiwix ETag
@@ -41,7 +40,7 @@ namespace kiwix {
//
// "abcdefghijklmn/"
// "1234567890/z"
// "6f1d19d0-633f-087b-fb55-7ac324ff9baf/Zz"
// "1234567890/cz"
//
// The options part of the Kiwix ETag allows to correctly set the required
// headers when responding to a conditional If-None-Match request with a 304
@@ -52,7 +51,7 @@ class ETag
{
public: // types
enum Option {
ZIM_CONTENT,
CACHEABLE_ENTITY,
COMPRESSED_CONTENT,
OPTION_COUNT
};
@@ -60,10 +59,10 @@ class ETag
public: // functions
ETag() {}
void set_body(const std::string& s) { m_body = s; }
void set_server_id(const std::string& id) { m_serverId = id; }
void set_option(Option opt);
explicit operator bool() const { return !m_body.empty(); }
explicit operator bool() const { return !m_serverId.empty(); }
bool get_option(Option opt) const;
std::string get_etag() const;
@@ -77,7 +76,7 @@ class ETag
static ETag parse(std::string s);
private: // data
std::string m_body;
std::string m_serverId;
std::string m_options;
};

View File

@@ -1,205 +0,0 @@
/*
* Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "i18n_utils.h"
#include "tools/otherTools.h"
#include <algorithm>
#include <map>
namespace kiwix
{
const char* I18nStringTable::get(const std::string& key) const
{
const I18nString* const begin = entries;
const I18nString* const end = begin + entryCount;
const I18nString* found = std::lower_bound(begin, end, key,
[](const I18nString& a, const std::string& k) {
return a.key < k;
});
return (found == end || found->key != key) ? nullptr : found->value;
}
namespace i18n
{
// this data is generated by the i18n resource compiler
extern const I18nStringTable stringTables[];
extern const size_t langCount;
}
namespace
{
class I18nStringDB
{
public: // functions
I18nStringDB() {
for ( size_t i = 0; i < kiwix::i18n::langCount; ++i ) {
const auto& t = kiwix::i18n::stringTables[i];
lang2TableMap[t.lang] = &t;
}
enStrings = lang2TableMap.at("en");
};
std::string get(const std::string& lang, const std::string& key) const {
const char* s = getStringsFor(lang)->get(key);
if ( s == nullptr ) {
s = enStrings->get(key);
if ( s == nullptr ) {
throw std::runtime_error("Invalid message id");
}
}
return s;
}
size_t getStringCount(const std::string& lang) const {
try {
return lang2TableMap.at(lang)->entryCount;
} catch(const std::out_of_range&) {
return 0;
}
}
private: // functions
const I18nStringTable* getStringsFor(const std::string& lang) const {
try {
return lang2TableMap.at(lang);
} catch(const std::out_of_range&) {
return enStrings;
}
}
private: // data
std::map<std::string, const I18nStringTable*> lang2TableMap;
const I18nStringTable* enStrings;
};
const I18nStringDB& getStringDb()
{
static const I18nStringDB stringDb;
return stringDb;
}
} // unnamed namespace
std::string getTranslatedString(const std::string& lang, const std::string& key)
{
return getStringDb().get(lang, key);
}
namespace i18n
{
std::string expandParameterizedString(const std::string& lang,
const std::string& key,
const Parameters& params)
{
kainjow::mustache::object mustacheParams;
for( const auto& kv : params ) {
mustacheParams[kv.first] = kv.second;
}
const std::string tmpl = getTranslatedString(lang, key);
return render_template(tmpl, mustacheParams);
}
} // namespace i18n
std::string ParameterizedMessage::getText(const std::string& lang) const
{
return i18n::expandParameterizedString(lang, msgId, params);
}
namespace
{
LangPreference parseSingleLanguagePreference(const std::string& s)
{
const size_t langStart = s.find_first_not_of(" \t\n");
if ( langStart == std::string::npos ) {
return {"", 0};
}
const size_t langEnd = s.find(';', langStart);
if ( langEnd == std::string::npos ) {
return {s.substr(langStart), 1};
}
const std::string lang = s.substr(langStart, langEnd - langStart);
// We don't care about langEnd == langStart which will result in an empty
// language name - it will be dismissed by parseUserLanguagePreferences()
float q = 1.0;
int nCharsScanned;
if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned)
&& langEnd + 1 + nCharsScanned == s.size() ) {
return {lang, q};
}
return {"", 0};
}
} // unnamed namespace
UserLangPreferences parseUserLanguagePreferences(const std::string& s)
{
UserLangPreferences result;
std::istringstream iss(s);
std::string singleLangPrefStr;
while ( std::getline(iss, singleLangPrefStr, ',') )
{
const auto langPref = parseSingleLanguagePreference(singleLangPrefStr);
if ( !langPref.lang.empty() && langPref.preference > 0 ) {
result.push_back(langPref);
}
}
return result;
}
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs)
{
if ( prefs.empty() ) {
return "en";
}
std::string bestLangSoFar("en");
float bestScoreSoFar = 0;
const auto& stringDb = getStringDb();
for ( const auto& entry : prefs ) {
const float score = entry.preference * stringDb.getStringCount(entry.lang);
if ( score > bestScoreSoFar ) {
bestScoreSoFar = score;
bestLangSoFar = entry.lang;
}
}
return bestLangSoFar;
}
std::string translateBookCategory(const std::string& lang, const std::string& category)
{
try {
return getTranslatedString(lang, "book-category." + category);
} catch (...) {
return category;
}
}
} // namespace kiwix

View File

@@ -1,83 +0,0 @@
/*
* Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SERVER_I18N_UTILS
#define KIWIX_SERVER_I18N_UTILS
#include "i18n.h"
#include <mustache.hpp>
namespace kiwix
{
struct I18nString {
const char* const key;
const char* const value;
};
struct I18nStringTable {
const char* const lang;
const size_t entryCount;
const I18nString* const entries;
const char* get(const std::string& key) const;
};
namespace i18n
{
class GetTranslatedStringWithMsgId
{
typedef kainjow::mustache::basic_data<std::string> MustacheString;
typedef std::pair<std::string, MustacheString> MsgIdAndTranslation;
public:
explicit GetTranslatedStringWithMsgId(const std::string& lang) : m_lang(lang) {}
MsgIdAndTranslation operator()(const std::string& key) const
{
return {key, getTranslatedString(m_lang, key)};
}
MsgIdAndTranslation operator()(const std::string& key, const Parameters& params) const
{
return {key, expandParameterizedString(m_lang, key, params)};
}
private:
const std::string m_lang;
};
} // namespace i18n
struct LangPreference
{
const std::string lang;
const float preference;
};
typedef std::vector<LangPreference> UserLangPreferences;
UserLangPreferences parseUserLanguagePreferences(const std::string& s);
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs);
} // namespace kiwix
#endif // KIWIX_SERVER_I18N_UTILS

View File

File diff suppressed because it is too large Load Diff

View File

@@ -27,10 +27,6 @@ extern "C" {
#include "library.h"
#include "name_mapper.h"
#include "tools.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
@@ -40,77 +36,26 @@ extern "C" {
#include "server/request_context.h"
#include "server/response.h"
#include "tools/concurrent_cache.h"
namespace kiwix {
struct GeoQuery {
GeoQuery()
: GeoQuery(0, 0, -1)
{}
GeoQuery(float latitude, float longitude, float distance)
: latitude(latitude), longitude(longitude), distance(distance)
{}
float latitude;
float longitude;
float distance;
explicit operator bool() const {
return distance >= 0;
}
friend bool operator<(const GeoQuery& l, const GeoQuery& r)
{
return std::tie(l.latitude, l.longitude, l.distance)
< std::tie(r.latitude, r.longitude, r.distance); // keep the same order
}
};
class SearchInfo {
public:
SearchInfo(const std::string& pattern, GeoQuery geoQuery, const Library::BookIdSet& bookIds, const std::string& bookFilterString);
zim::Query getZimQuery(bool verbose) const;
const Library::BookIdSet& getBookIds() const { return bookIds; }
friend bool operator<(const SearchInfo& l, const SearchInfo& r)
{
return std::tie(l.bookIds, l.pattern, l.geoQuery)
< std::tie(r.bookIds, r.pattern, r.geoQuery); // keep the same order
}
public: //data
std::string pattern;
GeoQuery geoQuery;
Library::BookIdSet bookIds;
std::string bookFilterQuery;
};
typedef kainjow::mustache::data MustacheData;
class Entry;
class OPDSDumper;
class LibraryDumper;
class InternalServer {
public:
InternalServer(LibraryPtr library,
std::shared_ptr<NameMapper> nameMapper,
IpAddress addr,
InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
unsigned int multizimSearchLimit,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks,
IpMode ipMode,
std::string indexTemplateString,
int ipConnectionLimit,
bool catalogOnlyMode,
std::string zimViewerURL);
virtual ~InternalServer();
bool blockExternalLinks);
virtual ~InternalServer() = default;
MHD_Result handlerCallback(struct MHD_Connection* connection,
const char* url,
@@ -121,85 +66,55 @@ class InternalServer {
void** cont_cls);
bool start();
void stop();
IpAddress getAddress() const { return m_addr; }
int getPort() const { return m_port; }
IpMode getIpMode() const { return m_ipMode; }
private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request);
std::unique_ptr<Response> build_redirect(const std::string& bookName, const zim::Item& item) const;
std::unique_ptr<Response> build_redirect(const std::string& bookName, const kiwix::Entry& entry) const;
std::unique_ptr<Response> build_homepage(const RequestContext& request);
std::unique_ptr<Response> handle_viewer_settings(const RequestContext& request);
std::unique_ptr<Response> handle_skin(const RequestContext& request);
std::unique_ptr<Response> handle_catalog(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_root(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request, bool partial);
std::unique_ptr<Response> handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId);
std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_categories(const RequestContext& request);
std::unique_ptr<Response> handle_no_js(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_languages(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_illustration(const RequestContext& request);
std::unique_ptr<Response> handle_meta(const RequestContext& request);
std::unique_ptr<Response> handle_search(const RequestContext& request);
std::unique_ptr<Response> handle_search_request(const RequestContext& request);
std::unique_ptr<Response> handle_suggest(const RequestContext& request);
std::unique_ptr<Response> handle_random(const RequestContext& request);
std::unique_ptr<Response> handle_catch(const RequestContext& request);
std::unique_ptr<Response> handle_captured_external(const RequestContext& request);
std::unique_ptr<Response> handle_content(const RequestContext& request);
std::unique_ptr<Response> handle_raw(const RequestContext& request);
std::unique_ptr<Response> handle_locally_customized_resource(const RequestContext& request);
std::vector<std::string> search_catalog(const RequestContext& request,
kiwix::OPDSDumper& opdsDumper);
MustacheData get_default_data() const;
std::pair<std::string, Library::BookIdSet> selectBooks(const RequestContext& r) const;
SearchInfo getSearchInfo(const RequestContext& r) const;
bool isLocallyCustomizedResource(const std::string& url) const;
std::string getLibraryId() const;
std::string getNoJSDownloadPageHTML(const std::string& bookId, const std::string& userLang) const;
OPDSDumper getOPDSDumper() const;
void setContentAccessUrl(LibraryDumper& libDumper) const;
private: // types
class LockableSuggestionSearcher;
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<std::string, std::shared_ptr<LockableSuggestionSearcher>> SuggestionSearcherCache;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const;
private: // data
IpAddress m_addr;
std::string m_addr;
int m_port;
std::string m_root; // URI-encoded
std::string m_rootPrefixOfDecodedURL; // URI-decoded
std::string m_root;
int m_nbThreads;
unsigned int m_multizimSearchLimit;
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
IpMode m_ipMode;
std::string m_indexTemplateString;
int m_ipConnectionLimit;
struct MHD_Daemon* mp_daemon;
LibraryPtr mp_library;
std::shared_ptr<NameMapper> mp_nameMapper;
SearchCache searchCache;
SuggestionSearcherCache suggestionSearcherCache;
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_server_id;
std::string m_library_id;
class CustomizedResources;
std::unique_ptr<CustomizedResources> m_customizedResources;
friend std::unique_ptr<Response> Response::build(const InternalServer& server);
friend std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage);
friend std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
friend std::unique_ptr<Response> Response::build_500(const InternalServer& server, const std::string& msg);
const bool m_catalogOnlyMode;
const std::string m_contentServerUrl;
};
}

View File

@@ -1,227 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "internalServer.h"
#include "library.h"
#include "opds_dumper.h"
#include "request_context.h"
#include "response.h"
#include "tools/otherTools.h"
#include "libkiwix-resources.h"
#include <mustache.hpp>
#include <string>
#include <vector>
namespace kiwix {
namespace
{
enum OPDSResponseKind
{
OPDS_ENTRY,
OPDS_NAVIGATION_FEED,
OPDS_ACQUISITION_FEED
};
const std::string opdsMimeType[] = {
"application/atom+xml;type=entry;profile=opds-catalog;charset=utf-8",
"application/atom+xml;profile=opds-catalog;kind=navigation;charset=utf-8",
"application/atom+xml;profile=opds-catalog;kind=acquisition;charset=utf-8"
};
} // unnamed namespace
OPDSDumper InternalServer::getOPDSDumper() const
{
kiwix::OPDSDumper opdsDumper(mp_library.get(), mp_nameMapper.get());
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(getLibraryId());
setContentAccessUrl(opdsDumper);
return opdsDumper;
}
std::unique_ptr<Response> InternalServer::handle_catalog(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog");
}
std::string host;
std::string url;
try {
host = request.get_header("Host");
url = request.get_url_part(1);
} catch (const std::out_of_range&) {
return UrlNotFoundResponse(request);
}
if (url == "v2") {
return handle_catalog_v2(request);
}
if (url != "searchdescription.xml" && url != "root.xml" && url != "search") {
return UrlNotFoundResponse(request);
}
if (url == "searchdescription.xml") {
auto response = ContentResponse::build(RESOURCE::opensearchdescription_xml, get_default_data(), "application/opensearchdescription+xml");
return std::move(response);
}
zim::Uuid uuid;
kiwix::OPDSDumper opdsDumper = getOPDSDumper();
std::vector<std::string> bookIdsToDump;
if (url == "root.xml") {
uuid = zim::Uuid::generate(host);
bookIdsToDump = mp_library->filter(kiwix::Filter().valid(true).local(true).remote(true));
} else if (url == "search") {
bookIdsToDump = search_catalog(request, opdsDumper);
uuid = zim::Uuid::generate();
}
auto response = ContentResponse::build(
opdsDumper.dumpOPDSFeed(bookIdsToDump, request.get_query()),
opdsMimeType[OPDS_ACQUISITION_FEED]);
return std::move(response);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog_v2");
}
std::string url;
try {
url = request.get_url_part(2);
} catch (const std::out_of_range&) {
return UrlNotFoundResponse(request);
}
if (url == "root.xml") {
return handle_catalog_v2_root(request);
} else if (url == "searchdescription.xml") {
const std::string endpoint_root = m_root + "/catalog/v2";
return ContentResponse::build(
RESOURCE::catalog_v2_searchdescription_xml,
kainjow::mustache::object({{"endpoint_root", endpoint_root}}),
"application/opensearchdescription+xml"
);
} else if (url == "entry") {
const std::string entryId = request.get_url_part(3);
return handle_catalog_v2_complete_entry(request, entryId);
} else if (url == "entries") {
return handle_catalog_v2_entries(request, /*partial=*/false);
} else if (url == "partial_entries") {
return handle_catalog_v2_entries(request, /*partial=*/true);
} else if (url == "categories") {
return handle_catalog_v2_categories(request);
} else if (url == "languages") {
return handle_catalog_v2_languages(request);
} else if (url == "illustration") {
return handle_catalog_v2_illustration(request);
} else {
return UrlNotFoundResponse(request);
}
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_root(const RequestContext& request)
{
const std::string libraryId = getLibraryId();
return ContentResponse::build(
RESOURCE::templates::catalog_v2_root_xml,
kainjow::mustache::object{
{"date", gen_date_str()},
{"endpoint_root", m_root + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId)},
{"all_entries_feed_id", gen_uuid(libraryId + "/entries")},
{"partial_entries_feed_id", gen_uuid(libraryId + "/partial_entries")},
{"category_list_feed_id", gen_uuid(libraryId + "/categories")},
{"language_list_feed_id", gen_uuid(libraryId + "/languages")}
},
opdsMimeType[OPDS_NAVIGATION_FEED]
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_entries(const RequestContext& request, bool partial)
{
kiwix::OPDSDumper opdsDumper = getOPDSDumper();
const auto bookIds = search_catalog(request, opdsDumper);
const auto opdsFeed = opdsDumper.dumpOPDSFeedV2(bookIds, request.get_query(), partial);
return ContentResponse::build(
opdsFeed,
opdsMimeType[OPDS_ACQUISITION_FEED]
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId)
{
try {
mp_library->getBookById(entryId);
} catch (const std::out_of_range&) {
return UrlNotFoundResponse(request);
}
kiwix::OPDSDumper opdsDumper = getOPDSDumper();
const auto opdsFeed = opdsDumper.dumpOPDSCompleteEntry(entryId);
return ContentResponse::build(
opdsFeed,
opdsMimeType[OPDS_ENTRY]
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_categories(const RequestContext& request)
{
kiwix::OPDSDumper opdsDumper = getOPDSDumper();
return ContentResponse::build(
opdsDumper.categoriesOPDSFeed(),
opdsMimeType[OPDS_NAVIGATION_FEED]
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_languages(const RequestContext& request)
{
kiwix::OPDSDumper opdsDumper = getOPDSDumper();
return ContentResponse::build(
opdsDumper.languagesOPDSFeed(),
opdsMimeType[OPDS_NAVIGATION_FEED]
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_illustration(const RequestContext& request)
{
try {
const auto bookId = request.get_url_part(3);
auto book = mp_library->getBookByIdThreadSafe(bookId);
auto size = request.get_argument<unsigned int>("size");
auto illustration = book.getIllustration(size);
return ContentResponse::build(
illustration->getData(),
illustration->mimeType
);
} catch(...) {
return UrlNotFoundResponse(request);
}
}
} // namespace kiwix

View File

@@ -0,0 +1,109 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "internalServer.h"
#include "library.h"
#include "opds_dumper.h"
#include "request_context.h"
#include "response.h"
#include "tools/otherTools.h"
#include "kiwixlib-resources.h"
#include <mustache.hpp>
#include <string>
#include <vector>
namespace kiwix {
std::unique_ptr<Response> InternalServer::handle_catalog_v2(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog_v2");
}
std::string url;
try {
url = request.get_url_part(2);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, "", "");
}
if (url == "root.xml") {
return handle_catalog_v2_root(request);
} else if (url == "searchdescription.xml") {
const std::string endpoint_root = m_root + "/catalog/v2";
return ContentResponse::build(*this,
RESOURCE::catalog_v2_searchdescription_xml,
kainjow::mustache::object({{"endpoint_root", endpoint_root}}),
"application/opensearchdescription+xml"
);
} else if (url == "entries") {
return handle_catalog_v2_entries(request);
} else if (url == "categories") {
return handle_catalog_v2_categories(request);
} else {
return Response::build_404(*this, request, "", "");
}
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_root(const RequestContext& request)
{
return ContentResponse::build(
*this,
RESOURCE::templates::catalog_v2_root_xml,
kainjow::mustache::object{
{"date", gen_date_str()},
{"endpoint_root", m_root + "/catalog/v2"},
{"feed_id", gen_uuid(m_library_id)},
{"all_entries_feed_id", gen_uuid(m_library_id + "/entries")},
{"category_list_feed_id", gen_uuid(m_library_id + "/categories")}
},
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_entries(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
const auto bookIds = search_catalog(request, opdsDumper);
const auto opdsFeed = opdsDumper.dumpOPDSFeedV2(bookIds, request.get_query());
return ContentResponse::build(
*this,
opdsFeed,
"application/atom+xml;profile=opds-catalog;kind=acquisition"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_categories(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
return ContentResponse::build(
*this,
opdsDumper.categoriesOPDSFeed(mp_library->getBooksCategories()),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
} // namespace kiwix

View File

@@ -25,10 +25,8 @@
#include <sstream>
#include <cstdio>
#include <atomic>
#include <cctype>
#include "tools/stringTools.h"
#include "i18n_utils.h"
namespace kiwix {
@@ -49,62 +47,67 @@ RequestMethod str2RequestMethod(const std::string& method) {
else return RequestMethod::OTHER;
}
std::string
fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation)
{
if (rootLocation.empty()) {
// nothing special to handle.
return full_url;
} else if (full_url == rootLocation) {
return "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
return full_url.substr(rootLocation.size());
} else {
return "";
}
}
} // unnamed namespace
RequestContext::RequestContext(const std::string& _rootLocation, // URI-encoded
const std::string& unrootedUrl, // URI-decoded
RequestContext::RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& _url,
const std::string& _method,
const std::string& version,
const NameValuePairs& headers,
const NameValuePairs& queryArgs) :
rootLocation(_rootLocation),
url(unrootedUrl),
const std::string& version) :
full_url(_url),
url(fullURL2LocalURL(_url, rootLocation)),
method(str2RequestMethod(_method)),
version(version),
requestIndex(s_requestIndex++),
acceptEncodingGzip(false),
acceptEncodingDeflate(false),
byteRange_()
{
for ( const auto& kv : headers ) {
add_header(kv.first, kv.second);
}
for ( const auto& kv : queryArgs ) {
add_argument(kv.first, kv.second);
}
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
try {
acceptEncodingGzip =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
acceptEncodingDeflate =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
} catch (const std::out_of_range&) {}
try {
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
} catch (const std::out_of_range&) {}
userlang = determine_user_language();
}
RequestContext::~RequestContext()
{}
void RequestContext::add_header(const char *key, const char *value)
MHD_Result RequestContext::fill_header(void *__this, enum MHD_ValueKind kind,
const char *key, const char *value)
{
this->headers[lcAll(key)] = value;
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->headers[lcAll(key)] = value;
return MHD_YES;
}
void RequestContext::add_argument(const char *key, const char* value)
MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
{
RequestContext *_this = this;
_this->arguments[key].push_back(value == nullptr ? "" : value);
if ( ! _this->queryString.empty() ) {
_this->queryString += "&";
}
_this->queryString += urlEncode(key);
if ( value ) {
_this->queryString += "=";
_this->queryString += urlEncode(value);
}
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->arguments[key] = value == nullptr ? "" : value;
return MHD_YES;
}
void RequestContext::print_debug_info() const {
@@ -118,18 +121,13 @@ void RequestContext::print_debug_info() const {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("arguments :\n");
for (auto& pair:arguments) {
printf(" - %s :", pair.first.c_str());
bool first = true;
for (auto& v: pair.second) {
printf("%s %s", first?"":",", v.c_str());
first = false;
}
printf("\n");
for (auto it=arguments.begin(); it!=arguments.end(); it++) {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n");
@@ -165,15 +163,11 @@ std::string RequestContext::get_url_part(int number) const {
}
std::string RequestContext::get_full_url() const {
return rootLocation + urlEncode(url);
}
std::string RequestContext::get_root_path() const {
return rootLocation.empty() ? "/" : rootLocation;
return full_url;
}
bool RequestContext::is_valid_url() const {
return url.empty() || url[0] == '/';
return !url.empty();
}
ByteRange RequestContext::get_range() const {
@@ -182,37 +176,21 @@ ByteRange RequestContext::get_range() const {
template<>
std::string RequestContext::get_argument(const std::string& name) const {
return arguments.at(name)[0];
return arguments.at(name);
}
std::string RequestContext::get_header(const std::string& name) const {
return headers.at(lcAll(name));
}
std::string RequestContext::get_user_language() const
{
return userlang.lang;
}
RequestContext::UserLanguage RequestContext::determine_user_language() const
{
try {
return {UserLanguage::SelectorKind::QUERY_PARAM, get_argument("userlang")};
} catch(const std::out_of_range&) {}
try {
const std::string acceptLanguage = get_header("Accept-Language");
const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage);
const auto lang = selectMostSuitableLanguage(userLangPrefs);
return {UserLanguage::SelectorKind::ACCEPT_LANGUAGE_HEADER, lang};
} catch(const std::out_of_range&) {}
return {UserLanguage::SelectorKind::DEFAULT, "en"};
}
std::string RequestContext::get_requested_format() const
{
return get_optional_param<std::string>("format", "html");
std::string RequestContext::get_query() const {
std::string q;
const char* sep = "";
for ( const auto& a : arguments ) {
q += sep + a.first + '=' + a.second;
sep = "&";
}
return q;
}
}

View File

@@ -25,11 +25,9 @@
#include <string>
#include <sstream>
#include <map>
#include <vector>
#include <stdexcept>
#include "byte_range.h"
#include "../tools/stringTools.h"
extern "C" {
#include "microhttpd_wrapper.h"
@@ -55,17 +53,12 @@ class IndexError: public std::runtime_error {};
class RequestContext {
public: // types
typedef std::vector<std::pair<const char*, const char*>> NameValuePairs;
public: // functions
RequestContext(const std::string& rootLocation, // URI-encoded
const std::string& unrootedUrl, // URI-decoded
RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& url,
const std::string& method,
const std::string& version,
const NameValuePairs& headers,
const NameValuePairs& queryArgs);
const std::string& version);
~RequestContext();
void print_debug_info() const;
@@ -75,11 +68,10 @@ class RequestContext {
std::string get_header(const std::string& name) const;
template<typename T=std::string>
T get_argument(const std::string& name) const {
return extractFromString<T>(get_argument(name));
}
std::vector<std::string> get_arguments(const std::string& name) const {
return arguments.at(name);
std::istringstream stream(arguments.at(name));
T v;
stream >> v;
return v;
}
template<class T>
@@ -96,68 +88,28 @@ class RequestContext {
std::string get_url() const;
std::string get_url_part(int part) const;
std::string get_full_url() const;
std::string get_root_path() const;
std::string get_query() const { return queryString; }
template<class F>
std::string get_query(F filter, bool mustEncode) const {
std::string q;
const char* sep = "";
auto encode = [=](const std::string& value) { return mustEncode?urlEncode(value):value; };
for ( const auto& a : arguments ) {
if (!filter(a.first)) {
continue;
}
for (const auto& v: a.second) {
q += sep + encode(a.first) + '=' + encode(v);
sep = "&";
}
}
return q;
}
std::string get_query() const;
ByteRange get_range() const;
bool can_compress() const { return acceptEncodingGzip; }
std::string get_user_language() const;
std::string get_requested_format() const;
private: // types
struct UserLanguage
{
enum SelectorKind
{
QUERY_PARAM,
ACCEPT_LANGUAGE_HEADER,
DEFAULT
};
SelectorKind selectedBy;
std::string lang;
};
bool can_compress() const { return acceptEncodingDeflate; }
private: // data
std::string rootLocation;
std::string full_url;
std::string url;
RequestMethod method;
std::string version;
unsigned long long requestIndex;
bool acceptEncodingGzip;
bool acceptEncodingDeflate;
ByteRange byteRange_;
std::map<std::string, std::string> headers;
std::map<std::string, std::vector<std::string>> arguments;
std::string queryString;
UserLanguage userlang;
std::map<std::string, std::string> arguments;
private: // functions
UserLanguage determine_user_language() const;
void add_header(const char* name, const char* value);
void add_argument(const char* name, const char* value);
static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
};
template<> std::string RequestContext::get_argument(const std::string& name) const;

View File

@@ -20,506 +20,85 @@
#include "response.h"
#include "request_context.h"
#include "internalServer.h"
#include "libkiwix-resources.h"
#include "kiwixlib-resources.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
#include "string.h"
#include <mustache.hpp>
#include <zlib.h>
#include <array>
#include <list>
#include <map>
#include <regex>
// This is somehow a magic value.
// If this value is too small, we will compress (and lost cpu time) too much
// content.
// If this value is too big, we will not compress enough content and send too
// much data.
// If we assume that MTU is 1500 Bytes it is useless to compress
// content smaller as the content will be sent in one packet anyway.
// 1400 Bytes seems to be a common accepted limit.
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
namespace kiwix {
namespace
{
typedef kainjow::mustache::data MustacheData;
// some utilities
std::string get_mime_type(const zim::Item& item)
{
try {
return item.getMimetype();
} catch (std::exception& e) {
} catch (exception& e) {
return "application/octet-stream";
}
}
bool is_compressible_mime_type(const std::string& mimeType)
{
return mimeType.find("text/") != std::string::npos
|| mimeType.find("application/javascript") != std::string::npos
|| mimeType.find("application/atom") != std::string::npos
|| mimeType.find("application/opensearchdescription") != std::string::npos
|| mimeType.find("application/json") != std::string::npos
// Web fonts
|| mimeType.find("application/font-") != std::string::npos
|| mimeType.find("application/x-font-") != std::string::npos
|| mimeType.find("application/vnd.ms-fontobject") != std::string::npos
|| mimeType.find("font/") != std::string::npos;
return mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/atom") != string::npos
|| mimeType.find("application/opensearchdescription") != string::npos
|| mimeType.find("application/json") != string::npos;
}
bool compress(std::string &content) {
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
Z_DEFAULT_STRATEGY);
if (ret != Z_OK) { return false; }
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
strm.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
std::string compressed;
std::array<char, 16384> buff{};
do {
strm.avail_out = buff.size();
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
compressed.append(buff.data(), buff.size() - strm.avail_out);
} while (strm.avail_out == 0);
assert(ret == Z_STREAM_END);
assert(strm.avail_in == 0);
content.swap(compressed);
deflateEnd(&strm);
return true;
}
const char* getCacheControlHeader(Response::Kind k)
{
switch(k) {
case Response::STATIC_RESOURCE: return "max-age=31536000, immutable";
case Response::ZIM_CONTENT: return "max-age=3600, must-revalidate";
default: return "max-age=0, must-revalidate";
}
}
} // unnamed namespace
Response::Response()
: m_returnCode(MHD_HTTP_OK)
Response::Response(bool verbose)
: m_verbose(verbose),
m_returnCode(MHD_HTTP_OK)
{
add_header(MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
}
void Response::set_kind(Kind k)
std::unique_ptr<Response> Response::build(const InternalServer& server)
{
m_kind = k;
if ( k == ZIM_CONTENT )
m_etag.set_option(ETag::ZIM_CONTENT);
return std::unique_ptr<Response>(new Response(server.m_verbose.load()));
}
std::unique_ptr<Response> Response::build()
std::unique_ptr<Response> Response::build_304(const InternalServer& server, const ETag& etag)
{
return std::make_unique<Response>();
}
std::unique_ptr<Response> Response::build_304(const ETag& etag)
{
auto response = Response::build();
auto response = Response::build(server);
response->set_code(MHD_HTTP_NOT_MODIFIED);
response->m_etag = etag;
if ( etag.get_option(ETag::ZIM_CONTENT) ) {
response->set_kind(Response::ZIM_CONTENT);
}
if ( etag.get_option(ETag::COMPRESSED_CONTENT) ) {
response->add_header(MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
return response;
}
namespace
std::unique_ptr<Response> Response::build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details)
{
MustacheData results;
results.set("url", request.get_full_url());
results.set("details", details);
// This class was introduced in order to work around the missing support
// for std::variant (and std::optional) under some of the current build
// platforms.
template<class T>
class Optional
{
public: // functions
Optional() {}
Optional(const T& t) : ptr(new T(t)) {}
Optional(const Optional& o) : ptr(o.has_value() ? new T(*o) : nullptr) {}
Optional(Optional&& o) : ptr(std::move(o.ptr)) {}
auto response = ContentResponse::build(server, RESOURCE::templates::_404_html, results, "text/html");
response->set_code(MHD_HTTP_NOT_FOUND);
response->set_taskbar(bookName, bookTitle);
Optional& operator=(const Optional& o)
{
*this = Optional(o);
return *this;
}
Optional& operator=(Optional&& o)
{
ptr = std::move(o.ptr);
return *this;
}
bool has_value() const { return ptr.get() != nullptr; }
const T& operator*() const { return *ptr; }
T& operator*() { return *ptr; }
private: // data
std::unique_ptr<T> ptr;
};
} // unnamed namespace
class ContentResponseBlueprint::Data
{
public:
typedef std::list<Data> List;
typedef std::map<std::string, Data> Object;
private:
// std::variant<std::string, bool, List, Object> data;
// XXX: libkiwix is compiled on platforms where std::variant
// XXX: is not yet supported. Hence this hack. Only one
// XXX: of the below data members is expected to contain a value.
Optional<std::string> m_stringValue;
Optional<bool> m_boolValue;
Optional<List> m_listValue;
Optional<Object> m_objectValue;
public:
Data() {}
Data(const std::string& s) : m_stringValue(s) {}
Data(bool b) : m_boolValue(b) {}
Data(const List& l) : m_listValue(l) {}
Data(const Object& o) : m_objectValue(o) {}
MustacheData toMustache(const std::string& lang) const;
Data& operator[](const std::string& key)
{
return (*m_objectValue)[key];
}
void push_back(const Data& d) { (*m_listValue).push_back(d); }
static Data onlyAsNonEmptyValue(const std::string& s)
{
return s.empty() ? Data(false) : Data(s);
}
static Data from(const ParameterizedMessage& pmsg)
{
Object obj;
for(const auto& kv : pmsg.getParams()) {
obj[kv.first] = kv.second;
}
return Object{
{ "msgid", pmsg.getMsgId() },
{ "params", Data(obj) }
};
}
static Data fromMsgId(const std::string& nonParameterizedMsgId)
{
return from(nonParameterizedMessage(nonParameterizedMsgId));
}
static Data staticMultiParagraphText(const std::string& msgIdPrefix, size_t n)
{
Object paragraphs;
for ( size_t i = 1; i <= n; ++i ) {
std::ostringstream oss;
oss << "p" << i;
const std::string pId = oss.str();
paragraphs[pId] = fromMsgId(msgIdPrefix + "." + pId);
}
return paragraphs;
}
std::string asJSON() const;
void dumpJSON(std::ostream& os) const;
private:
bool isString() const { return m_stringValue.has_value(); }
bool isList() const { return m_listValue.has_value(); }
bool isObject() const { return m_objectValue.has_value(); }
const std::string& stringValue() const { return *m_stringValue; }
bool boolValue() const { return *m_boolValue; }
const List& listValue() const { return *m_listValue; }
const Object& objectValue() const { return *m_objectValue; }
const Data* get(const std::string& key) const
{
if ( !isObject() )
return nullptr;
const auto& obj = objectValue();
const auto it = obj.find(key);
return it != obj.end() ? &it->second : nullptr;
}
};
MustacheData ContentResponseBlueprint::Data::toMustache(const std::string& lang) const
{
if ( this->isList() ) {
kainjow::mustache::list l;
for ( const auto& x : this->listValue() ) {
l.push_back(x.toMustache(lang));
}
return l;
} else if ( this->isObject() ) {
const Data* msgId = this->get("msgid");
const Data* msgParams = this->get("params");
if ( msgId && msgId->isString() && msgParams && msgParams->isObject() ) {
std::map<std::string, std::string> params;
for(const auto& kv : msgParams->objectValue()) {
params[kv.first] = kv.second.stringValue();
}
const ParameterizedMessage msg(msgId->stringValue(), ParameterizedMessage::Parameters(params));
return msg.getText(lang);
} else {
kainjow::mustache::object o;
for ( const auto& kv : this->objectValue() ) {
o[kv.first] = kv.second.toMustache(lang);
}
return o;
}
} else if ( this->isString() ) {
return this->stringValue();
} else {
return this->boolValue();
}
return std::move(response);
}
void ContentResponseBlueprint::Data::dumpJSON(std::ostream& os) const
std::unique_ptr<Response> Response::build_416(const InternalServer& server, size_t resourceLength)
{
if ( this->isString() ) {
os << '"' << escapeForJSON(this->stringValue()) << '"';
} else if ( this->isList() ) {
const char * sep = " ";
os << "[";
for ( const auto& x : this->listValue() ) {
os << sep;
x.dumpJSON(os);
sep = ", ";
}
os << " ]";
} else if ( this->isObject() ) {
const char * sep = " ";
os << "{";
for ( const auto& kv : this->objectValue() ) {
os << sep << '"' << kv.first << "\" : ";
kv.second.dumpJSON(os);
sep = ", ";
}
os << " }";
} else {
os << (this->boolValue() ? "true" : "false");
}
}
std::string ContentResponseBlueprint::Data::asJSON() const
{
std::ostringstream oss;
this->dumpJSON(oss);
// This JSON is going to be used in HTML inside a <script></script> tag.
// If it contains "</script>" (or "</script >") as a substring, then the HTML
// parser will be confused. Since for a valid JSON that may happen only inside
// a JSON string, we can safely take advantage of the answers to
// https://stackoverflow.com/questions/28259389/how-to-put-script-in-a-javascript-string
// and work around the issue by inserting an otherwise harmless backslash.
return std::regex_replace(oss.str(), std::regex("</script"), "</scr\\ipt");
}
ContentResponseBlueprint::ContentResponseBlueprint(const RequestContext* request,
int httpStatusCode,
const std::string& mimeType,
const std::string& templateStr,
bool includeKiwixResponseData)
: m_request(*request)
, m_httpStatusCode(httpStatusCode)
, m_mimeType(mimeType)
, m_template(templateStr)
, m_includeKiwixResponseData(includeKiwixResponseData)
, m_data(new Data)
{}
ContentResponseBlueprint::~ContentResponseBlueprint() = default;
std::unique_ptr<ContentResponse> ContentResponseBlueprint::generateResponseObject() const
{
kainjow::mustache::data d = m_data->toMustache(m_request.get_user_language());
if ( m_includeKiwixResponseData ) {
d.set("KIWIX_RESPONSE_TEMPLATE", escapeForJSON(m_template, false));
d.set("KIWIX_RESPONSE_DATA", m_data->asJSON());
}
auto r = ContentResponse::build(m_template, d, m_mimeType);
r->set_code(m_httpStatusCode);
return r;
}
NewHTTP404Response::NewHTTP404Response(const RequestContext& request,
const std::string& root,
const std::string& urlPath)
: ContentResponseBlueprint(&request,
MHD_HTTP_NOT_FOUND,
"text/html; charset=utf-8",
RESOURCE::templates::sexy404_html,
/*includeKiwixResponseData=*/true)
{
*this->m_data = Data(Data::Object{
{"root", root },
{"url_path", urlPath},
{"PAGE_TITLE", Data::fromMsgId("new-404-page-title")},
{"PAGE_HEADING", Data::fromMsgId("new-404-page-heading")},
{"404_img_text", Data::fromMsgId("404-img-text")},
{"path_was_not_found_msg", Data::fromMsgId("path-was-not-found")},
{"advice", Data::staticMultiParagraphText("404-advice", 5)},
});
}
BlockExternalLinkResponse::BlockExternalLinkResponse(const RequestContext& request,
const std::string& root,
const std::string& externalUrl)
: ContentResponseBlueprint(&request,
MHD_HTTP_OK,
"text/html; charset=utf-8",
RESOURCE::templates::captured_external_html,
/*includeKiwixResponseData=*/true)
{
*this->m_data = Data(Data::Object{
{"root", root },
{"external_link_detected", Data::fromMsgId("external-link-detected") },
{"url", externalUrl },
{"caution_warning", Data::fromMsgId("caution-warning") },
{"external_link_intro", Data::fromMsgId("external-link-intro") },
{"advice", Data::staticMultiParagraphText("external-link-advice", 3)},
});
}
HTTPErrorResponse::HTTPErrorResponse(const RequestContext& request,
int httpStatusCode,
const std::string& pageTitleMsgId,
const std::string& headingMsgId,
const std::string& cssUrl,
bool includeKiwixResponseData)
: ContentResponseBlueprint(&request,
httpStatusCode,
request.get_requested_format() == "html" ? "text/html; charset=utf-8" : "application/xml; charset=utf-8",
request.get_requested_format() == "html" ? RESOURCE::templates::error_html : RESOURCE::templates::error_xml,
includeKiwixResponseData)
{
Data::List emptyList;
*this->m_data = Data(Data::Object{
{"CSS_URL", Data::onlyAsNonEmptyValue(cssUrl) },
{"PAGE_TITLE", Data::fromMsgId(pageTitleMsgId)},
{"PAGE_HEADING", Data::fromMsgId(headingMsgId)},
{"details", emptyList}
});
}
HTTP404Response::HTTP404Response(const RequestContext& request)
: HTTPErrorResponse(request,
MHD_HTTP_NOT_FOUND,
"404-page-title",
"404-page-heading",
std::string(),
/*includeKiwixResponseData=*/true)
{
}
UrlNotFoundResponse::UrlNotFoundResponse(const RequestContext& request)
: HTTP404Response(request)
{
const std::string requestUrl = urlDecode(m_request.get_full_url(), false);
*this += ParameterizedMessage("url-not-found", {{"url", requestUrl}});
}
HTTPErrorResponse& HTTPErrorResponse::operator+(const ParameterizedMessage& details)
{
(*m_data)["details"].push_back(Data::Object{{"p", Data::from(details)}});
return *this;
}
HTTPErrorResponse& HTTPErrorResponse::operator+=(const ParameterizedMessage& details)
{
// operator+() is already a state-modifying operator (akin to operator+=)
return *this + details;
}
HTTP400Response::HTTP400Response(const RequestContext& request)
: HTTPErrorResponse(request,
MHD_HTTP_BAD_REQUEST,
"400-page-title",
"400-page-heading",
std::string(),
/*includeKiwixResponseData=*/true)
{
std::string requestUrl = urlDecode(m_request.get_full_url(), false);
const auto query = m_request.get_query();
if (!query.empty()) {
requestUrl += "?" + encodeDiples(query);
}
*this += ParameterizedMessage("invalid-request", {{"url", requestUrl}});
}
HTTP500Response::HTTP500Response(const RequestContext& request,
const std::string& root,
const std::string& urlPath,
const std::string& errorText)
: ContentResponseBlueprint(&request,
MHD_HTTP_INTERNAL_SERVER_ERROR,
"text/html; charset=utf-8",
RESOURCE::templates::sexy500_html,
/*includeKiwixResponseData=*/true)
{
auto pageParams = Data::Object{
{"root", root },
{"url_path", urlPath},
{"PAGE_TITLE", Data::fromMsgId("500-page-title")},
{"PAGE_HEADING", Data::fromMsgId("500-page-heading")},
{"PAGE_TEXT", Data::fromMsgId("500-page-text")},
{"500_img_text", Data::fromMsgId("500-img-text")},
};
if ( !errorText.empty() ) {
pageParams["error"] = errorText;
}
*this->m_data = Data(pageParams);
}
std::unique_ptr<Response> Response::build_416(size_t resourceLength)
{
auto response = Response::build();
auto response = Response::build(server);
// [FIXME] (compile with recent enough version of libmicrohttpd)
// response->set_code(MHD_HTTP_RANGE_NOT_SATISFIABLE);
response->set_code(416);
@@ -530,10 +109,21 @@ std::unique_ptr<Response> Response::build_416(size_t resourceLength)
return response;
}
std::unique_ptr<Response> Response::build_redirect(const std::string& redirectUrl)
std::unique_ptr<Response> Response::build_500(const InternalServer& server, const std::string& msg)
{
auto response = Response::build();
MustacheData data;
data.set("error", msg);
auto content = render_template(RESOURCE::templates::_500_html, data);
std::unique_ptr<Response> response (
new ContentResponse(server.m_root, true, false, false, false, content, "text/html"));
response->set_code(MHD_HTTP_INTERNAL_SERVER_ERROR);
return response;
}
std::unique_ptr<Response> Response::build_redirect(const InternalServer& server, const std::string& redirectUrl)
{
auto response = Response::build(server);
response->m_returnCode = MHD_HTTP_FOUND;
response->add_header(MHD_HTTP_HEADER_LOCATION, redirectUrl);
return response;
@@ -565,7 +155,7 @@ static ssize_t callback_reader_from_item(void* cls,
{
RunningResponse* response = static_cast<RunningResponse*>(cls);
size_t max_size_to_set = std::min<size_t>(
size_t max_size_to_set = min<size_t>(
max,
response->item.getSize() - pos - response->range_start);
@@ -595,12 +185,59 @@ void print_response_info(int retCode, MHD_Response* response)
}
void ContentResponse::introduce_taskbar()
{
kainjow::mustache::data data;
data.set("root", m_root);
data.set("content", m_bookName);
data.set("hascontent", (!m_bookName.empty() && !m_bookTitle.empty()));
data.set("title", m_bookTitle);
data.set("withlibrarybutton", m_withLibraryButton);
auto head_content = render_template(RESOURCE::templates::head_taskbar_html, data);
m_content = prependToFirstOccurence(
m_content,
"</head[ \\t]*>",
head_content);
auto taskbar_part = render_template(RESOURCE::templates::taskbar_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"<body[^>]*>",
taskbar_part);
}
void ContentResponse::inject_externallinks_blocker()
{
kainjow::mustache::data data;
data.set("root", m_root);
auto script_tag = render_template(RESOURCE::templates::external_blocker_part_html, data);
m_content = prependToFirstOccurence(
m_content,
"</head[ \\t]*>",
script_tag);
}
void ContentResponse::inject_root_link(){
m_content = prependToFirstOccurence(
m_content,
"</head[ \\t]*>",
"<link type=\"root\" href=\"" + m_root + "\">");
}
bool
ContentResponse::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
bool
ContentResponse::contentDecorationAllowed() const
{
return (startsWith(m_mimeType, "text/html")
&& m_mimeType.find(";raw=true") == std::string::npos);
}
MHD_Response*
@@ -613,27 +250,56 @@ Response::create_mhd_response(const RequestContext& request)
MHD_Response*
ContentResponse::create_mhd_response(const RequestContext& request)
{
const bool isCompressed = can_compress(request) && compress(m_content);
if (contentDecorationAllowed()) {
inject_root_link();
if (m_withTaskbar) {
introduce_taskbar();
}
if (m_blockExternalLinks) {
inject_externallinks_blocker();
}
}
bool shouldCompress = can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
if (isCompressed) {
m_etag.set_option(ETag::COMPRESSED_CONTENT);
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
return response;
}
MHD_Result Response::send(const RequestContext& request, bool verbose, MHD_Connection* connection)
MHD_Result Response::send(const RequestContext& request, MHD_Connection* connection)
{
MHD_Response* response = create_mhd_response(request);
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
getCacheControlHeader(m_kind));
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
@@ -644,7 +310,7 @@ MHD_Result Response::send(const RequestContext& request, bool verbose, MHD_Conne
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
if (verbose)
if (m_verbose)
print_response_info(m_returnCode, response);
auto ret = MHD_queue_response(connection, m_returnCode, response);
@@ -652,60 +318,78 @@ MHD_Result Response::send(const RequestContext& request, bool verbose, MHD_Conne
return ret;
}
ContentResponse::ContentResponse(const std::string& content, const std::string& mimetype) :
Response(),
void ContentResponse::set_taskbar(const std::string& bookName, const std::string& bookTitle)
{
m_bookName = bookName;
m_bookTitle = bookTitle;
}
ContentResponse::ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype) :
Response(verbose),
m_root(root),
m_content(content),
m_mimeType(mimetype)
m_mimeType(mimetype),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_bookName(""),
m_bookTitle("")
{
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
}
std::unique_ptr<ContentResponse> ContentResponse::build(
const std::string& content,
const std::string& mimetype)
std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage)
{
return std::make_unique<ContentResponse>(content, mimetype);
return std::unique_ptr<ContentResponse>(new ContentResponse(
server.m_root,
server.m_verbose.load(),
server.m_withTaskbar && !isHomePage,
server.m_withLibraryButton,
server.m_blockExternalLinks,
content,
mimetype));
}
std::unique_ptr<ContentResponse> ContentResponse::build(
const std::string& template_str,
kainjow::mustache::data data,
const std::string& mimetype)
{
std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage) {
auto content = render_template(template_str, data);
return ContentResponse::build(content, mimetype);
return ContentResponse::build(server, content, mimetype, isHomePage);
}
ItemResponse::ItemResponse(const zim::Item& item, const std::string& mimetype, const ByteRange& byterange) :
Response(),
ItemResponse::ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange) :
Response(verbose),
m_item(item),
m_mimeType(mimetype)
{
m_byteRange = byterange;
set_kind(Response::ZIM_CONTENT);
set_cacheable();
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
}
std::unique_ptr<Response> ItemResponse::build(const RequestContext& request, const zim::Item& item)
std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item)
{
const std::string mimetype = get_mime_type(item);
auto byteRange = request.get_range().resolve(item.getSize());
const bool noRange = byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if (noRange && is_compressible_mime_type(mimetype)) {
// Return a contentResponse
auto response = ContentResponse::build(item.getData(), mimetype);
response->set_kind(Response::ZIM_CONTENT);
auto response = ContentResponse::build(server, item.getData(), mimetype);
response->set_cacheable();
response->m_byteRange = byteRange;
return std::move(response);
}
if (byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE) {
auto response = Response::build_416(item.getSize());
response->set_kind(Response::ZIM_CONTENT);
auto response = Response::build_416(server, item.getSize());
response->set_cacheable();
return response;
}
return std::make_unique<ItemResponse>(item, mimetype, byteRange);
return std::unique_ptr<Response>(new ItemResponse(
server.m_verbose.load(),
item,
mimetype,
byteRange));
}
MHD_Response*

View File

@@ -26,47 +26,37 @@
#include <mustache.hpp>
#include "byte_range.h"
#include "entry.h"
#include "etag.h"
#include "i18n_utils.h"
#include <zim/item.h>
extern "C" {
#include "microhttpd_wrapper.h"
}
namespace zim {
class Archive;
} // namespace zim
namespace kiwix {
class InternalServer;
class RequestContext;
class EntryResponse;
class Response {
public:
enum Kind
{
STATIC_RESOURCE,
ZIM_CONTENT,
DYNAMIC_CONTENT
};
public:
Response();
Response(bool verbose);
virtual ~Response() = default;
static std::unique_ptr<Response> build();
static std::unique_ptr<Response> build_304(const ETag& etag);
static std::unique_ptr<Response> build_416(size_t resourceLength);
static std::unique_ptr<Response> build_redirect(const std::string& redirectUrl);
static std::unique_ptr<Response> build(const InternalServer& server);
static std::unique_ptr<Response> build_304(const InternalServer& server, const ETag& etag);
static std::unique_ptr<Response> build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details="");
static std::unique_ptr<Response> build_416(const InternalServer& server, size_t resourceLength);
static std::unique_ptr<Response> build_500(const InternalServer& server, const std::string& msg);
static std::unique_ptr<Response> build_redirect(const InternalServer& server, const std::string& redirectUrl);
MHD_Result send(const RequestContext& request, bool verbose, MHD_Connection* connection);
MHD_Result send(const RequestContext& request, MHD_Connection* connection);
void set_code(int code) { m_returnCode = code; }
void set_kind(Kind k);
Kind get_kind() const { return m_kind; }
void set_etag_body(const std::string& id) { m_etag.set_body(id); }
void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); }
void set_server_id(const std::string& id) { m_etag.set_server_id(id); }
void add_header(const std::string& name, const std::string& value) { m_customHeaders[name] = value; }
int getReturnCode() const { return m_returnCode; }
@@ -76,7 +66,7 @@ class Response {
MHD_Response* create_error_response(const RequestContext& request) const;
protected: // data
Kind m_kind = DYNAMIC_CONTENT;
bool m_verbose;
int m_returnCode;
ByteRange m_byteRange;
ETag m_etag;
@@ -88,110 +78,37 @@ class Response {
class ContentResponse : public Response {
public:
ContentResponse(
const std::string& content,
const std::string& mimetype);
ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype);
static std::unique_ptr<ContentResponse> build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage = false);
static std::unique_ptr<ContentResponse> build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage = false);
static std::unique_ptr<ContentResponse> build(
const std::string& content,
const std::string& mimetype);
static std::unique_ptr<ContentResponse> build(
const std::string& template_str,
kainjow::mustache::data data,
const std::string& mimetype);
const std::string& getContent() const { return m_content; }
const std::string& getMimeType() const { return m_mimeType; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
private:
MHD_Response* create_mhd_response(const RequestContext& request);
void introduce_taskbar();
void inject_externallinks_blocker();
void inject_root_link();
bool can_compress(const RequestContext& request) const;
bool contentDecorationAllowed() const;
private:
std::string m_root;
std::string m_content;
std::string m_mimeType;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
std::string m_bookName;
std::string m_bookTitle;
};
class ContentResponseBlueprint
{
public: // functions
ContentResponseBlueprint(const RequestContext* request,
int httpStatusCode,
const std::string& mimeType,
const std::string& templateStr,
bool includeKiwixResponseData = false);
~ContentResponseBlueprint();
operator std::unique_ptr<Response>() const
{
return generateResponseObject();
}
std::unique_ptr<ContentResponse> generateResponseObject() const;
protected: // types
class Data;
protected: //data
const RequestContext& m_request;
const int m_httpStatusCode;
const std::string m_mimeType;
const std::string m_template;
const bool m_includeKiwixResponseData;
std::unique_ptr<Data> m_data;
};
struct NewHTTP404Response : ContentResponseBlueprint
{
NewHTTP404Response(const RequestContext& request,
const std::string& root,
const std::string& urlPath);
};
struct HTTPErrorResponse : ContentResponseBlueprint
{
HTTPErrorResponse(const RequestContext& request,
int httpStatusCode,
const std::string& pageTitleMsgId,
const std::string& headingMsgId,
const std::string& cssUrl = "",
bool includeKiwixResponseData = false);
HTTPErrorResponse& operator+(const ParameterizedMessage& errorDetails);
HTTPErrorResponse& operator+=(const ParameterizedMessage& errorDetails);
};
struct HTTP404Response : HTTPErrorResponse
{
explicit HTTP404Response(const RequestContext& request);
};
struct UrlNotFoundResponse : HTTP404Response
{
explicit UrlNotFoundResponse(const RequestContext& request);
};
struct HTTP400Response : HTTPErrorResponse
{
explicit HTTP400Response(const RequestContext& request);
};
struct HTTP500Response : ContentResponseBlueprint
{
HTTP500Response(const RequestContext& request,
const std::string& root,
const std::string& urlPath,
const std::string& error = "");
};
class ItemResponse : public Response {
public:
ItemResponse(const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
static std::unique_ptr<Response> build(const RequestContext& request, const zim::Item& item);
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
static std::unique_ptr<Response> build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
private:
MHD_Response* create_mhd_response(const RequestContext& request);
@@ -200,13 +117,6 @@ class ItemResponse : public Response {
std::string m_mimeType;
};
struct BlockExternalLinkResponse : ContentResponseBlueprint
{
BlockExternalLinkResponse(const RequestContext& request,
const std::string& root,
const std::string& externalUrl);
};
}
#endif //KIWIXLIB_SERVER_RESPONSE_H

View File

@@ -1,108 +0,0 @@
/*
* Copyright (C) 2025 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "spelling_correction.h"
#include "zim/archive.h"
#include <sstream>
#include <stdexcept>
#include <xapian.h>
namespace kiwix
{
namespace
{
std::vector<std::string> getAllTitles(const zim::Archive& a)
{
std::vector<std::string> result;
for (const auto& entry : a.iterByPath() ) {
result.push_back(entry.getTitle());
}
return result;
}
void createXapianDB(std::string path, const zim::Archive& archive)
{
const int flags = Xapian::DB_BACKEND_GLASS|Xapian::DB_CREATE;
const auto tmpDbPath = path + ".tmp";
Xapian::WritableDatabase db(tmpDbPath, flags);
for (const auto& t : getAllTitles(archive)) {
db.add_spelling(t);
}
db.commit();
db.compact(path, Xapian::DBCOMPACT_SINGLE_FILE);
db.close();
std::filesystem::remove_all(tmpDbPath);
}
std::string spellingsDBPathForZIMArchive(std::filesystem::path cacheDirPath, const zim::Archive& a)
{
// The version of spellings DB must be updated each time an important change
// to the implementation is made that renders using the previous version
// impossible or undesirable.
const char SPELLINGS_DB_VERSION[] = "0.1";
std::ostringstream filename;
filename << a.getUuid() << ".spellingsdb.v" << SPELLINGS_DB_VERSION;
return (cacheDirPath / filename.str()).string();
}
std::unique_ptr<Xapian::Database> openOrCreateXapianDB(std::filesystem::path cacheDirPath, const zim::Archive& archive)
{
const auto path = spellingsDBPathForZIMArchive(cacheDirPath, archive);
try
{
return std::make_unique<Xapian::Database>(path);
}
catch (const Xapian::DatabaseOpeningError& )
{
createXapianDB(path, archive);
return std::make_unique<Xapian::Database>(path);
}
}
} // unnamed namespace
SpellingsDB::SpellingsDB(const zim::Archive& archive, std::filesystem::path cacheDirPath)
: impl_(openOrCreateXapianDB(cacheDirPath, archive))
{
}
SpellingsDB::~SpellingsDB()
{
}
std::vector<std::string> SpellingsDB::getSpellingCorrections(const std::string& word, uint32_t maxCount) const
{
if ( maxCount > 1 ) {
throw std::runtime_error("More than one spelling correction was requested");
}
std::vector<std::string> result;
const auto term = impl_->get_spelling_suggestion(word, 3);
if ( !term.empty() ) {
result.push_back(term);
}
return result;
}
} // namespace kiwix

Some files were not shown because too many files have changed in this diff Show More