Compare commits

..

1 Commits

Author SHA1 Message Date
luddens
6b04eb3214 wip 2020-03-11 18:12:40 +01:00
156 changed files with 3057 additions and 33290 deletions

View File

@@ -1,91 +0,0 @@
name: Packages
on: [push, pull_request]
jobs:
build-deb:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
distro:
- ubuntu-impish
- ubuntu-hirsute
- ubuntu-focal
- ubuntu-bionic
steps:
- uses: actions/checkout@v2
# Determine which PPA we should upload to
- name: PPA
id: ppa
run: |
if [[ $REF == refs/tags* ]]
then
echo "::set-output name=ppa::kiwixteam/release"
else
echo "::set-output name=ppa::kiwixteam/dev"
fi
env:
REF: ${{ github.ref }}
- uses: legoktm/gh-action-auto-dch@master
with:
fullname: Kiwix builder
email: release+launchpad@kiwix.org
distro: ${{ matrix.distro }}
- uses: legoktm/gh-action-build-deb@ubuntu-impish
if: matrix.distro == 'ubuntu-impish'
name: Build package for ubuntu-impish
id: build-ubuntu-impish
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-hirsute
if: matrix.distro == 'ubuntu-hirsute'
name: Build package for ubuntu-hirsute
id: build-ubuntu-hirsute
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-focal
if: matrix.distro == 'ubuntu-focal'
name: Build package for ubuntu-focal
id: build-ubuntu-focal
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: legoktm/gh-action-build-deb@ubuntu-bionic
if: matrix.distro == 'ubuntu-bionic'
name: Build package for ubuntu-bionic
id: build-ubuntu-bionic
with:
args: --no-sign
ppa: ${{ steps.ppa.outputs.ppa }}
- uses: actions/upload-artifact@v2
with:
name: Packages for ${{ matrix.distro }}
path: output
- uses: legoktm/gh-action-dput@master
name: Upload dev package
# Only upload on pushes to master
if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' && startswith(matrix.distro, 'ubuntu-')
with:
gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
repository: ppa:kiwixteam/dev
packages: output/*_source.changes
- uses: legoktm/gh-action-dput@master
name: Upload release package
# Only upload on pushes to master or tag
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') && startswith(matrix.distro, 'ubuntu-')
with:
gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
repository: ppa:kiwixteam/release
packages: output/*_source.changes

View File

@@ -1,6 +1,6 @@
name: CI
on: [push]
on: [pull_request]
jobs:
Macos:
@@ -13,15 +13,15 @@ jobs:
with:
python-version: '3.5'
- name: Install packages
run: |
brew update
brew install gcovr pkg-config ninja
uses: mstksg/get-package@v1
with:
brew: gcovr pkg-config ninja
- name: Install python modules
run: pip3 install meson==0.49.2 pytest
- name: Install deps
shell: bash
run: |
ARCHIVE_NAME=deps2_osx_native_dyn_libkiwix.tar.xz
ARCHIVE_NAME=deps2_osx_native_dyn_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C $HOME
- name: Compile
shell: bash
@@ -53,69 +53,63 @@ jobs:
strategy:
fail-fast: false
matrix:
name:
target:
- native_static
- native_dyn
- native_dyn_bionic
- android_arm
- android_arm64
- win32_static
- win32_dyn
include:
- name: native_static
target: native_static
- target: native_static
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: native_dyn
target: native_dyn
- target: native_dyn
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: native_dyn_bionic
target: native_dyn
image_variant: bionic
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm
target: android_arm
- target: android_arm
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: android_arm64
target: android_arm64
- target: android_arm64
image_variant: xenial
lib_postfix: '/x86_64-linux-gnu'
- name: win32_static
target: win32_static
- target: win32_static
image_variant: f31
lib_postfix: '64'
- name: win32_dyn
target: win32_dyn
- target: win32_dyn
image_variant: f31
lib_postfix: '64'
env:
HOME: /home/runner
runs-on: ubuntu-latest
container:
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-31"
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-26"
steps:
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
- name: Checkout code
shell: python
run: |
import json
from subprocess import check_call
from os import environ
with open(environ['GITHUB_EVENT_PATH'], 'r') as f:
content = f.read()
event_data = json.loads(content)
try:
branch_ref = event_data['ref'].split('/')[-1]
except KeyError:
branch_ref = event_data['pull_request']['head']['ref']
print("Cloning branch", branch_ref)
command = [
'git', 'clone',
'https://github.com/${{github.repository}}',
'--depth=1',
'--branch', '${{steps.extract_branch.outputs.branch}}'
'--branch', branch_ref
]
check_call(command, cwd=environ['HOME'])
- name: Install deps
shell: bash
run: |
ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_libkiwix.tar.xz
ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_kiwix-lib.tar.xz
wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C /home/runner
- name: Compile
shell: bash
@@ -134,7 +128,7 @@ jobs:
if [[ "${{matrix.target}}" =~ android_.* ]]; then
MESON_OPTION="$MESON_OPTION -Dandroid=true"
fi
cd $HOME/libkiwix
cd $HOME/kiwix-lib
meson . build ${MESON_OPTION}
cd build
ninja
@@ -145,7 +139,7 @@ jobs:
if: startsWith(matrix.target, 'native_')
shell: bash
run: |
cd $HOME/libkiwix/build
cd $HOME/kiwix-lib/build
meson test --verbose
ninja coverage
env:
@@ -154,10 +148,10 @@ jobs:
- name: Publish coverage
shell: bash
run: |
cd $HOME/libkiwix
cd $HOME/kiwix-lib
curl https://codecov.io/bash -o codecov.sh
bash codecov.sh -n "${OS_NAME}_${{matrix.target}}" -Z
rm codecov.sh
if: startsWith(matrix.target, 'native_') && matrix.image_variant == 'xenial'
if: startsWith(matrix.target, 'native_')
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

3
.gitignore vendored
View File

@@ -2,6 +2,3 @@
*.swp
subprojects/googletest-release*
*.class
build/
.vscode/
builddir/

View File

@@ -1,96 +1,3 @@
libkiwix 10.0.0
===============
* ...
kiwix-lib 9.4.1
===============
* Fix `M/Counter` parsing.
* [SERVER] Adjust body padding-top for taskbar
* Fix potential crash when stoping a server not started.
* Various fix in build system and the CI.
kiwix-lib 9.4.0
===============
* [SERVER] Make the headers handling case insensitive.
* [SERVER] Make server answer 204 http status code for empty search
* [PACKAGING] Made CI build deb packages.
* [SERVER] Add a way to prevent taskbar and external link bloquer at article
level.
* Fix meson file to be compatible with meson 0.45
* [SERVER] Update search requests to use pageStart/pageLength instead of
pageStart/pageEnd arguments.
* [SERVER] Set a fixed favicon size in the main page.
* [SERVER] Refactor the response system code to better handling future new
libzim api.
* Fix segmentation fault around exchange with aria2 process making
kiwix-desktop crash at exit.
kiwix-lib 9.3.1
===============
* Fix handling of samba path on windows.
* Do not include `kiwix_config.h` in public header.
* Fix compilation with libmicrohttpd v0.97.1
* Increase default test timeout to 160seconds/test.
* Add automatic debian packaging.
* Use non-minified version of jquery-ui.js
* Pass `-latomic` compile option for sh4 architecture.
* Make mesion install `kiwix-compile-resources` man page.
kiwix-lib 9.3.0
===============
* Add a thread safe method to search suggestions.
Old methods are now deprecated.
kiwix-lib 9.2.3
===============
* Add test on byte-range
* Fix compilation on bionic and windows.
* Allow building using debian packaged kainjow-mustache
* Pass `-latomic` compile option for architectures that need it
kiwix-lib 9.2.2
===============
* Fix handling on empty content in byte range management (wrong assert)
kiwix-lib 9.2.1
===============
* Fix support of byte range request.
kiwix-lib 9.2
=============
* Add tests
* Refactoring server code.
* [SERVER] Add HEAD, Etag and If-None-Match support.
* [SERVER] Compress opds catalog answers.
kiwix-lib 9.1.2
===============
* Do not use the pathToSave if it is empty.
kiwix-lib 9.1.1
===============
* Fix the detection of the dataDirectory on windows.
kiwix-lib 9.1.0
===============
* [JAVA] Add a method to get the size of an article.
* Add a new method to the libray to get the book by path.
* Add a option to make the server blocks external link.
Links are intercepted by js an redirected to a "portail" page.
* [ODPS] Correctly handle book's articleCount and mediaCount.
kiwix-lib 9.0.1
===============

120
README.md
View File

@@ -1,14 +1,15 @@
Libkiwix
========
Kiwix library
=============
The Libkiwix provides the [Kiwix](https://kiwix.org) software suite
core. It contains the code shared by all Kiwix ports (Windows,
The Kiwix library provides the [Kiwix](https://kiwix.org) software
suite core. It contains the code shared by all Kiwix ports (Windows,
GNU/Linux, macOS, Android, iOS, ...).
[![Repositories](https://img.shields.io/repology/repositories/libkiwix?label=repositories)](https://github.com/kiwix/libkiwix/wiki/Repology)
[![Build Status](https://github.com/kiwix/libkiwix/workflows/CI/badge.svg?query=branch%3Amaster)](https://github.com/kiwix/libkiwix/actions?query=branch%3Amaster)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/libkiwix/badge)](https://www.codefactor.io/repository/github/kiwix/libkiwix)
[![Codecov](https://codecov.io/gh/kiwix/libkiwix/branch/master/graph/badge.svg)](https://codecov.io/gh/kiwix/libkiwix)
[![Download](https://api.bintray.com/packages/kiwix/kiwix/kiwixlib/images/download.svg)](https://bintray.com/kiwix/kiwix/kiwixlib/_latestVersion)
[![AUR version](https://img.shields.io/aur/version/kiwix-lib)](https://aur.archlinux.org/packages/kiwix-lib/)
[![Build Status](https://travis-ci.com/kiwix/kiwix-lib.svg?branch=master)](https://travis-ci.com/kiwix/kiwix-lib)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/kiwix-lib/badge)](https://www.codefactor.io/repository/github/kiwix/kiwix-lib)
[![Codecov](https://codecov.io/gh/kiwix/kiwix-lib/branch/master/graph/badge.svg)](https://codecov.io/gh/kiwix/kiwix-lib)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
Disclaimer
@@ -16,38 +17,31 @@ Disclaimer
This document assumes you have a little knowledge about software
compilation. If you experience difficulties with the dependencies or
with the Libkiwix compilation itself, we recommend to have a look to
[kiwix-build](https://github.com/kiwix/kiwix-build).
with the Kiwix libary compilation itself, we recommend to have a look
to [kiwix-build](https://github.com/kiwix/kiwix-build).
Preamble
--------
Although the Libkiwix can be (cross-)compiled on/for many sytems, the
following documentation explains how to do it on POSIX ones. It is
primarly thought for GNU/Linux systems and has been tested on recent
releases of Ubuntu and Fedora.
Although the Kiwix library can be (cross-)compiled on/for many
sytems, the following documentation explains how to do it on POSIX
ones. It is primarly thought for GNU/Linux systems and has been tested
on recent releases of Ubuntu and Fedora.
Dependencies
------------
The Libkiwix relies on many third party software libraries. They are
prerequisites to the Libkiwix compilation. Following libraries need to
be available:
The Kiwix library relies on many third parts software libraries. They
are prerequisites to the Kiwix library compilation. Following
libraries need to be available:
* [ICU](https://site.icu-project.org/) (package `libicu-dev` on Ubuntu)
* [ZIM](https://openzim.org/) (package `libzim-dev` on Ubuntu)
* [Pugixml](https://pugixml.org/) (package `libpugixml-dev` on Ubuntu)
* [Aria2](https://aria2.github.io/) (package `aria2` on Ubuntu)
* [Mustache](https://github.com/kainjow/Mustache) (Just copy the
header `mustache.hpp` somewhere it can be found by the compiler and/or
set CPPFLAGS with correct `-I` option). Use Mustache version 4.1 or above.
* [Libcurl](https://curl.se/libcurl) (`libcurl4-gnutls-dev`, `libcurl4-nss-dev` or `libcurl4-openssl-dev` on Ubuntu)
* [Microhttpd](https://www.gnu.org/software/libmicrohttpd) (package `libmicrohttpd-dev` on Ubuntu)
* [Zlib](https://zlib.net/) (package `zlib1g-dev` on Ubuntu)
To test the code:
* [Google Test](https://github.com/google/googletest) (package `googletest` on Ubuntu)
The following dependency needs to be available at runtime:
* [Aria2](https://aria2.github.io/) (package `aria2` on Ubuntu)
set CPPFLAGS with correct `-I` option)
These dependencies may or may not be packaged by your operating
system. They may also be packaged but only in an older version. The
@@ -56,13 +50,13 @@ In the worse case, you will have to download and compile bleeding edge
version by hand.
If you want to install these dependencies locally, then use the
`libkiwix` directory as install prefix.
`kiwix-lib` directory as install prefix.
Environment
-------------
The Libkiwix builds using [Meson](https://mesonbuild.com/) version
0.45 or higher. Meson relies itself on Ninja, pkg-config and few other
The Kiwix library builds using [Meson](https://mesonbuild.com/) version
0.43 or higher. Meson relies itself on Ninja, pkg-config and few other
compilation tools.
Install first the few common compilation tools:
@@ -77,7 +71,7 @@ section.
Compilation
-----------
Once all dependencies are installed, you can compile the Libkiwix
Once all dependencies are installed, you can compile the Kiwix library
with:
```bash
meson . build
@@ -85,25 +79,16 @@ ninja -C build
```
By default, it will compile dynamic linked libraries. All binary files
will be created in the `build` directory created automatically by
will be created in the "build" directory created automatically by
Meson. If you want statically linked libraries, you can add
`--default-library=static` option to the Meson command.
Depending of you system, `ninja` may be called `ninja-build`.
Testing
-------
To run the automated tests:
```bash
cd build
meson test
```
Installation
------------
If you want to install the Libkiwix and the headers you just have
If you want to install the Kiwix library and the headers you just have
compiled on your system, here we go:
```bash
ninja -C build install
@@ -146,57 +131,6 @@ cp ninja ../bin
cd ..
```
Custom Index Page
-----------------
to use custom welcome page mention `customIndexPage` argument in `kiwix::internalServer()` or use `kiwix::server->setCustomIndexTemplate()`.
(note - while using custom html file please mention all external links as absolute path.)
to create a HTML template with custom JS you need to have a look at various OPDS based endpoints as mentioned [here](https://wiki.kiwix.org/wiki/OPDS) to load books.
To use JS provided by kiwix-serve you can use the following template to start with ->
```
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title><-- Custom Tittle --></title>
<script
type="text/javascript"
src="{{root}}/skin/jquery-ui/external/jquery/jquery.js"
></script>
<script
type="text/javascript"
src="{{root}}/skin/jquery-ui/jquery-ui.min.js"
></script>
<script src="{{root}}/skin/isotope.pkgd.min.js" defer></script>
<script src="{{root}}/skin/iso6391To3.js"></script>
<script type="text/javascript" src="{{root}}/skin/index.js" defer></script>
</head>
<body>
</body>
</html>
```
- To get books listed using `index.js` add - `<div class="book__list"></div>` under body tag.
- To get number of books listed add - `<h3 class="kiwixHomeBody__results"></h3>` under body tag.
- To add language select box add - `<select id="languageFilter"></select>` under body tag.
- To add language select box add - `<select id="categoryFilter"></select>` under body tag.
- To add search box for books use following form -
```
<form id='kiwixSearchForm'>
<input type="text" name="q" placeholder="Search" id="searchFilter" class='kiwixSearch filter'>
<input type="submit" class="searchButton" value="Search"/>
</form>
```
If you compile manually Libmicrohttpd, you might need to compile it
without GNU TLS, a bug here will empeach further compilation
otherwise.
If the compilation still fails, you might need to get a more recent
version of a dependency than the one packaged by your Linux
distribution. Try then with a source tarball distributed by the

View File

@@ -26,10 +26,10 @@ task writePom {
project {
groupId 'org.kiwix.kiwixlib'
artifactId 'kiwixlib'
version '10.0.0' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
version '9.0.1' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
packaging 'aar'
name 'kiwixlib'
url 'https://github.com/kiwix/libkiwix'
url 'https://github.com/kiwix/kiwix-lib'
licenses {
license {
name 'GPLv3'
@@ -44,9 +44,9 @@ task writePom {
}
}
scm {
connection 'https://github.com/kiwix/libkiwix.git'
developerConnection 'https://github.com/kiwix/libkiwix.git'
url 'https://github.com/kiwix/libkiwix'
connection 'https://github.com/kiwix/kiwix-lib.git'
developerConnection 'https://github.com/kiwix/kiwix-lib.git'
url 'https://github.com/kiwix/kiwix-lib'
}
}
}.withXml {

5
debian/changelog vendored
View File

@@ -1,5 +0,0 @@
libkiwix (0.0.0) unstable; urgency=medium
* Initial release
-- Kunal Mehta <legoktm@debian.org> Wed, 08 Jul 2020 18:12:32 -0700

46
debian/control vendored
View File

@@ -1,46 +0,0 @@
Source: libkiwix
Priority: optional
Maintainer: Kiwix team <kiwix@kiwix.org>
Build-Depends: debhelper-compat (= 13),
meson,
pkg-config,
libzim-dev (>= 6.1.8),
libcurl4-gnutls-dev,
libicu-dev,
libgtest-dev,
libkainjow-mustache-dev,
liblzma-dev,
libmicrohttpd-dev,
libpugixml-dev,
zlib1g-dev
Standards-Version: 4.5.0
Section: libs
Homepage: https://github.com/kiwix/libkiwix
Rules-Requires-Root: no
Package: libkiwix-dev
Section: libdevel
Architecture: any
Multi-Arch: same
Depends: libkiwix10 (= ${binary:Version}), ${misc:Depends}, python3,
libzim-dev (>= 6.0.0),
libicu-dev,
libpugixml-dev,
libcurl4-gnutls-dev,
libmicrohttpd-dev
Description: library of common code for Kiwix (development)
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all
Kiwix ports (Windows, Linux, OSX, Android, etc.).
.
This package contains development files.
Package: libkiwix10
Architecture: any
Multi-Arch: same
Depends: ${shlibs:Depends}, ${misc:Depends}, aria2
Conflicts: libkiwix0, libkiwix3, libkiwix9
Description: library of common code for Kiwix
Kiwix is an offline Wikipedia reader. libkiwix provides the
software core for Kiwix, and contains the code shared by all
Kiwix ports (Windows, Linux, OSX, Android, etc.).

1
debian/copyright vendored
View File

@@ -1 +0,0 @@
See COPYING in the repository root.

View File

@@ -1,4 +0,0 @@
usr/include
usr/lib/*/libkiwix.so
usr/lib/*/pkgconfig
usr/bin

View File

@@ -1 +0,0 @@
usr/share/man/man1/kiwix-compile-resources.1*

View File

@@ -1 +0,0 @@
usr/lib/*/libkiwix.so.*

8
debian/rules vendored
View File

@@ -1,8 +0,0 @@
#!/usr/bin/make -f
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
%:
dh $@ --buildsystem=meson
override_dh_auto_test:
dh_auto_test -- -t 3

View File

@@ -1 +0,0 @@
3.0 (native)

View File

@@ -7,7 +7,6 @@ files=(
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/common/archiveTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
@@ -23,7 +22,6 @@ files=(
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/archiveTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"

View File

@@ -26,10 +26,6 @@ namespace pugi {
class xml_node;
}
namespace zim {
class Archive;
}
namespace kiwix
{
@@ -47,7 +43,6 @@ class Book
bool update(const Book& other);
void update(const Reader& reader);
void update(const zim::Archive& archive);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
std::string getHumanReadableIdFromPath() const;
@@ -64,7 +59,6 @@ class Book
const std::string& getDate() const { return m_date; }
const std::string& getUrl() const { return m_url; }
const std::string& getName() const { return m_name; }
std::string getCategory() const;
const std::string& getTags() const { return m_tags; }
std::string getTagStr(const std::string& tagName) const;
bool getTagBool(const std::string& tagName) const;
@@ -100,17 +94,13 @@ class Book
void setFaviconMimeType(const std::string& faviconMimeType) { m_faviconMimeType = faviconMimeType; }
void setDownloadId(const std::string& downloadId) { m_downloadId = downloadId; }
private:
std::string getCategoryFromTags() const;
protected:
std::string m_id;
std::string m_downloadId;
std::string m_path;
bool m_pathValid = false;
bool m_pathValid;
std::string m_title;
std::string m_description;
std::string m_category;
std::string m_language;
std::string m_creator;
std::string m_publisher;
@@ -120,10 +110,10 @@ class Book
std::string m_flavour;
std::string m_tags;
std::string m_origId;
uint64_t m_articleCount = 0;
uint64_t m_mediaCount = 0;
bool m_readOnly = false;
uint64_t m_size = 0;
uint64_t m_articleCount;
uint64_t m_mediaCount;
bool m_readOnly;
uint64_t m_size;
mutable std::string m_favicon;
std::string m_faviconUrl;
std::string m_faviconMimeType;

View File

@@ -23,6 +23,7 @@
#include <string>
#include <vector>
#include <map>
#include <pthread.h>
#include <memory>
#include <stdexcept>
@@ -87,7 +88,7 @@ class Download {
class Downloader
{
public:
Downloader();
Downloader(std::string sessionFileDir = "");
virtual ~Downloader();
void close();

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +21,7 @@
#define KIWIX_ENTRY_H
#include <stdio.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <zim/article.h>
#include <exception>
#include <string>
@@ -42,12 +41,19 @@ class NoEntry : public std::exception {};
class Entry
{
public:
/**
* Default constructor.
*
* Construct an invalid entry.
*/
Entry() = default;
/**
* Construct an entry making reference to an zim article.
*
* @param article a zim::Article object
*/
Entry(zim::Entry entry);
Entry(zim::Article article);
virtual ~Entry() = default;
/**
@@ -57,14 +63,14 @@ class Entry
*
* @return the path of the entry.
*/
std::string getPath() const { return entry.getPath(); }
std::string getPath() const;
/**
* Get the title of the entry.
*
* @return the title of the entry.
*/
std::string getTitle() const { return entry.getTitle(); }
std::string getTitle() const;
/**
* Get the content of the entry.
@@ -74,7 +80,7 @@ class Entry
*
* @return the content of the entry.
*/
std::string getContent() const { return entry.getItem().getData(); }
std::string getContent() const;
/**
* Get the blob of the entry.
@@ -84,7 +90,7 @@ class Entry
* @param offset The starting offset of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
zim::Blob getBlob(offset_type offset = 0) const;
/**
* Get the blob of the entry.
@@ -95,7 +101,7 @@ class Entry
* @param size The size of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
zim::Blob getBlob(offset_type offset, size_type size) const;
/**
* Get the info for direct access to the content of the entry.
@@ -103,7 +109,7 @@ class Entry
* Some entry (ie binary ones) have their content plain stored
* in the zim file. Knowing the offset where the content is stored
* an user can directly read the content in the zim file bypassing the
* libkiwix/libzim.
* kiwix-lib/libzim.
*
* @return A pair specifying where to read the content.
* The string is the real file to read (may be different that .zim
@@ -111,7 +117,7 @@ class Entry
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
std::pair<std::string, offset_type> getDirectAccessInfo() const;
/**
* Get the size of the entry.
@@ -168,14 +174,17 @@ class Entry
Entry getFinalEntry() const;
/**
* Get the zim entry wrapped by this (kiwix) entry
* Convert the entry to a boolean value.
*
* @return the zim entry
* @return True if the entry is valid.
*/
const zim::Entry& getZimEntry() const { return entry; }
explicit operator bool() const { return good(); }
private:
zim::Entry entry;
zim::Article article;
mutable zim::Article final_article;
bool good() const { return article.good(); }
};
}

View File

@@ -18,6 +18,7 @@ class KiwixServe
bool isRunning();
int getPort() { return m_port; }
int setPort(int port);
void setLibraryPath(std::string path) { m_libraryPath = path; }
private:
std::unique_ptr<Subprocess> mp_kiwixServe;

View File

@@ -24,7 +24,6 @@
#include <vector>
#include <map>
#include <memory>
#include <zim/archive.h>
#include "book.h"
#include "bookmark.h"
@@ -36,7 +35,6 @@ namespace kiwix
{
class OPDSDumper;
class Library;
enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER };
enum supportedListMode {
@@ -50,23 +48,18 @@ enum supportedListMode {
};
class Filter {
public: // types
using Tags = std::vector<std::string>;
private: // data
private:
uint64_t activeFilters;
Tags _acceptTags;
Tags _rejectTags;
std::string _category;
std::vector<std::string> _acceptTags;
std::vector<std::string> _rejectTags;
std::string _lang;
std::string _publisher;
std::string _creator;
size_t _maxSize;
std::string _query;
bool _queryIsPartial;
std::string _name;
public: // functions
public:
Filter();
~Filter() = default;
@@ -100,42 +93,16 @@ class Filter {
/**
* Set the filter to only accept book with corresponding tag.
*/
Filter& acceptTags(const Tags& tags);
Filter& rejectTags(const Tags& tags);
Filter& acceptTags(std::vector<std::string> tags);
Filter& rejectTags(std::vector<std::string> tags);
Filter& category(std::string category);
Filter& lang(std::string lang);
Filter& publisher(std::string publisher);
Filter& creator(std::string creator);
Filter& maxSize(size_t size);
Filter& query(std::string query, bool partial=true);
Filter& query(std::string query);
Filter& name(std::string name);
bool hasQuery() const;
const std::string& getQuery() const { return _query; }
bool queryIsPartial() const { return _queryIsPartial; }
bool hasName() const;
const std::string& getName() const { return _name; }
bool hasCategory() const;
const std::string& getCategory() const { return _category; }
bool hasLang() const;
const std::string& getLang() const { return _lang; }
bool hasPublisher() const;
const std::string& getPublisher() const { return _publisher; }
bool hasCreator() const;
const std::string& getCreator() const { return _creator; }
const Tags& getAcceptTags() const { return _acceptTags; }
const Tags& getRejectTags() const { return _rejectTags; }
private: // functions
friend class Library;
bool accept(const Book& book) const;
};
@@ -147,27 +114,12 @@ class Library
{
std::map<std::string, kiwix::Book> m_books;
std::map<std::string, std::shared_ptr<Reader>> m_readers;
std::map<std::string, std::shared_ptr<zim::Archive>> m_archives;
std::vector<kiwix::Bookmark> m_bookmarks;
class BookDB;
std::unique_ptr<BookDB> m_bookDB;
public:
typedef std::vector<std::string> BookIdCollection;
typedef std::map<std::string, int> AttributeCounts;
public:
Library();
~Library();
/**
* Library is not a copiable object. However it can be moved.
*/
Library(const Library& ) = delete;
Library(Library&& );
void operator=(const Library& ) = delete;
Library& operator=(Library&& );
/**
* Add a book to the library.
*
@@ -196,12 +148,9 @@ class Library
*/
bool removeBookmark(const std::string& zimId, const std::string& url);
const Book& getBookById(const std::string& id) const;
Book& getBookById(const std::string& id);
const Book& getBookByPath(const std::string& path) const;
Book& getBookByPath(const std::string& path);
std::shared_ptr<Reader> getReaderById(const std::string& id);
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
/**
* Remove a book from the library.
@@ -217,7 +166,7 @@ class Library
* @param path the path of the file to write to.
* @return True if the library has been correctly saved.
*/
bool writeToFile(const std::string& path) const;
bool writeToFile(const std::string& path);
/**
* Write the library bookmarks to a file.
@@ -225,7 +174,7 @@ class Library
* @param path the path of the file to write to.
* @return True if the library has been correctly saved.
*/
bool writeBookmarksToFile(const std::string& path) const;
bool writeBookmarksToFile(const std::string& path);
/**
* Get the number of book in the library.
@@ -234,56 +183,42 @@ class Library
* @param remoteBooks If we must count remote books (books with an url)
* @return The number of books.
*/
unsigned int getBookCount(const bool localBooks, const bool remoteBooks) const;
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
/**
* Get all languagues of the books in the library.
* Get all langagues of the books in the library.
*
* @return A list of languages.
*/
std::vector<std::string> getBooksLanguages() const;
/**
* Get all languagues of the books in the library with counts.
*
* @return A list of languages with the count of books in each language.
*/
AttributeCounts getBooksLanguagesWithCounts() const;
/**
* Get all categories of the books in the library.
*
* @return A list of categories.
*/
std::vector<std::string> getBooksCategories() const;
std::vector<std::string> getBooksLanguages();
/**
* Get all book creators of the books in the library.
*
* @return A list of book creators.
*/
std::vector<std::string> getBooksCreators() const;
std::vector<std::string> getBooksCreators();
/**
* Get all book publishers of the books in the library.
*
* @return A list of book publishers.
*/
std::vector<std::string> getBooksPublishers() const;
std::vector<std::string> getBooksPublishers();
/**
* Get all bookmarks.
*
* @return A list of bookmarks
*/
const std::vector<kiwix::Bookmark> getBookmarks(bool onlyValidBookmarks = true) const;
const std::vector<kiwix::Bookmark> getBookmarks(bool onlyValidBookmarks = true);
/**
* Get all book ids of the books in the library.
*
* @return A list of book ids.
*/
BookIdCollection getBooksIds() const;
std::vector<std::string> getBooksIds();
/**
* Filter the library and generate a new one with the keep elements.
@@ -293,7 +228,7 @@ class Library
* @param search List only books with search in the title or description.
* @return The list of bookIds corresponding to the query.
*/
DEPRECATED BookIdCollection filter(const std::string& search) const;
DEPRECATED std::vector<std::string> filter(const std::string& search);
/**
@@ -302,7 +237,7 @@ class Library
* @param filter The filter to use.
* @return The list of bookIds corresponding to the filter.
*/
BookIdCollection filter(const Filter& filter) const;
std::vector<std::string> filter(const Filter& filter);
/**
@@ -312,7 +247,7 @@ class Library
* @param comparator how to sort the books
* @return The sorted list of books
*/
void sort(BookIdCollection& bookIds, supportedListSortBy sortBy, bool ascending) const;
void sort(std::vector<std::string>& bookIds, supportedListSortBy sortBy, bool ascending);
/**
* List books in the library.
@@ -336,7 +271,7 @@ class Library
* Set to 0 to cancel this filter.
* @return The list of bookIds corresponding to the query.
*/
DEPRECATED BookIdCollection listBooksIds(
DEPRECATED std::vector<std::string> listBooksIds(
int supportedListMode = ALL,
supportedListSortBy sortBy = UNSORTED,
const std::string& search = "",
@@ -344,21 +279,11 @@ class Library
const std::string& creator = "",
const std::string& publisher = "",
const std::vector<std::string>& tags = {},
size_t maxSize = 0) const;
size_t maxSize = 0);
friend class OPDSDumper;
friend class libXMLDumper;
private: // types
typedef const std::string& (Book::*BookStrPropMemFn)() const;
private: // functions
AttributeCounts getBookAttributeCounts(BookStrPropMemFn p) const;
std::vector<std::string> getBookPropValueSet(BookStrPropMemFn p) const;
BookIdCollection filterViaBookDB(const Filter& filter) const;
void updateBookDB(const Book& book);
};
}
#endif

View File

@@ -38,7 +38,7 @@ class LibXMLDumper
{
public:
LibXMLDumper() = default;
LibXMLDumper(const Library* library);
LibXMLDumper(Library* library);
~LibXMLDumper();
/**
@@ -69,10 +69,10 @@ class LibXMLDumper
*
* @param library The library to dump.
*/
void setLibrary(const Library* library) { this->library = library; }
void setLibrary(Library* library) { this->library = library; }
protected:
const kiwix::Library* library;
kiwix::Library* library;
std::string baseDir;
private:
void handleBook(Book book, pugi::xml_node root_node);

View File

@@ -13,8 +13,19 @@ headers = [
'search_renderer.h',
'server.h',
'kiwixserve.h',
'name_mapper.h',
'tools.h'
'name_mapper.h'
]
install_headers(headers, subdir:'kiwix')
install_headers(
'tools/base64.h',
'tools/networkTools.h',
'tools/otherTools.h',
'tools/pathTools.h',
'tools/regexTools.h',
'tools/stringTools.h',
'tools/lock.h',
subdir:'kiwix/tools'
)

View File

@@ -1,33 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_OPDS_CATALOG_H
#define KIWIX_OPDS_CATALOG_H
#include "library.h"
namespace kiwix
{
std::string getSearchUrl(const Filter& f);
} // namespace kiwix
#endif // KIWIX_OPDS_CATALOG_H

View File

@@ -26,6 +26,9 @@
#include <pugixml.hpp>
#include "tools/base64.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "library.h"
#include "reader.h"
@@ -48,50 +51,24 @@ class OPDSDumper
/**
* Dump the OPDS feed.
*
* @param bookIds the ids of the books to include in the feed
* @param query the query used to obtain the list of book ids
* @param id The id of the library.
* @return The OPDS feed.
*/
std::string dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const;
std::string dumpOPDSFeed(const std::vector<std::string>& bookIds);
/**
* Dump the OPDS feed.
*
* @param bookIds the ids of the books to include in the feed
* @param query the query used to obtain the list of book ids
* @param partial whether the feed should include partial or complete entries
* @return The OPDS feed.
*/
std::string dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const;
/**
* Dump the OPDS complete entry document.
*
* @param bookId the id of the book
* @return The OPDS complete entry document.
*/
std::string dumpOPDSCompleteEntry(const std::string& bookId) const;
/**
* Dump the categories OPDS feed.
*
* @return The OPDS feed.
*/
std::string categoriesOPDSFeed() const;
/**
* Dump the languages OPDS feed.
*
* @return The OPDS feed.
*/
std::string languagesOPDSFeed() const;
/**
* Set the id of the library.
* Set the id of the opds stream.
*
* @param id the id to use.
*/
void setLibraryId(const std::string& id) { this->libraryId = id;}
void setId(const std::string& id) { this->id = id;}
/**
* Set the title oft the opds stream.
*
* @param title the title to use.
*/
void setTitle(const std::string& title) { this->title = title; }
/**
* Set the root location used when generating url.
@@ -100,6 +77,13 @@ class OPDSDumper
*/
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
/**
* Set the search url.
*
* @param searchUrl the search url to use.
*/
void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; }
/**
* Set some informations about the search results.
*
@@ -109,13 +93,27 @@ class OPDSDumper
*/
void setOpenSearchInfo(int totalResult, int startIndex, int count);
/**
* Set the library to dump.
*
* @param library The library to dump.
*/
void setLibrary(Library* library) { this->library = library; }
protected:
kiwix::Library* library;
std::string libraryId;
std::string id;
std::string title;
std::string date;
std::string rootLocation;
std::string searchDescriptionUrl;
int m_totalResults;
int m_startIndex;
int m_count;
bool m_isSearchResult = false;
private:
pugi::xml_node handleBook(Book book, pugi::xml_node root_node);
};
}

View File

@@ -21,61 +21,28 @@
#define KIWIX_READER_H
#include <stdio.h>
#include <zim/article.h>
#include <zim/file.h>
#include <zim/fileiterator.h>
#include <zim/zim.h>
#include <zim/archive.h>
#include <exception>
#include <map>
#include <sstream>
#include <string>
#include "common.h"
#include "entry.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
using namespace std;
namespace kiwix
{
/**
* The SuggestionItem is a helper class that contains the info about a single
* suggestion item.
*/
class SuggestionItem
{
// Functions
// Temporarily making the constructor public until the code move is complete
public:
// Create a sugggestion item.
explicit SuggestionItem(const std::string& title, const std::string& normalizedTitle,
const std::string& path, const std::string& snippet = "") :
title(title),
normalizedTitle(normalizedTitle),
path(path),
snippet(snippet) {}
public:
const std::string& getTitle() const { return title;}
const std::string& getNormalizedTitle() const { return normalizedTitle;}
const std::string& getPath() const { return path;}
const std::string& getSnippet() const { return snippet;}
bool hasSnippet() const { return !snippet.empty();}
// Data
private:
std::string title;
std::string normalizedTitle;
std::string path;
std::string snippet;
friend class Reader;
};
/**
* The Reader class is the class who allow to get an entry content from a zim
* file.
*/
using SuggestionsList_t = std::vector<SuggestionItem>;
class Reader
{
public:
@@ -88,19 +55,8 @@ class Reader
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
explicit Reader(const string zimFilePath);
/**
* Create a Reader to read a zim file given by the Archive.
*
* @param archive The shared pointer to the Archive object.
*/
explicit Reader(const std::shared_ptr<zim::Archive> archive);
#ifndef _WIN32
explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default;
Reader(const string zimFilePath);
~Reader();
/**
* Get the number of "displayable" entries in the zim file.
@@ -143,6 +99,17 @@ class Reader
*/
string getId() const;
/**
* Get the url of a random page.
*
* Deprecated : Use `getRandomPage` instead.
*
* @return Url of a random page. The page is picked from all entries in
* the 'A' namespace.
* The main page is excluded from the potential results.
*/
DEPRECATED string getRandomPageUrl() const;
/**
* Get a random page.
*
@@ -152,6 +119,31 @@ class Reader
*/
Entry getRandomPage() const;
/**
* Get the url of the first page.
*
* Deprecated : Use `getFirstPage` instead.
*
* @return Url of the first entry in the 'A' namespace.
*/
DEPRECATED string getFirstPageUrl() const;
/**
* Get the entry of the first page.
*
* @return The first entry in the 'A' namespace.
*/
Entry getFirstPage() const;
/**
* Get the url of the main page.
*
* Deprecated : Use `getMainPage` instead.
*
* @return Url of the main page as specified in the zim file.
*/
DEPRECATED string getMainPageUrl() const;
/**
* Get the entry of the main page.
*
@@ -292,6 +284,16 @@ class Reader
*/
string getScraper() const;
/**
* Get the origId of the zim file.
*
* The origId is only used in the case of patch zim file and is the Id
* of the original zim file.
*
* @return The origId of the zim file as specified in the zim metadata.
*/
string getOrigId() const;
/**
* Get the favicon of the zim file.
*
@@ -330,15 +332,93 @@ class Reader
*/
Entry getEntryFromTitle(const std::string& title) const;
/**
* Get the url of a page specified by a title.
*
* @param[in] title the title of the page.
* @param[out] url the url of the page.
* @return True if the page can be found.
*/
DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;
/**
* Get the mimetype of a entry specified by a url.
*
* @param[in] url the url of the entry.
* @param[out] mimeType the mimeType of the entry.
* @return True if the mimeType has been found.
*/
DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
/**
* Get the content of an entry specifed by a url.
*
* Alias to `getContentByEncodedUrl`
*/
DEPRECATED bool getContentByUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an entry specified by a url encoded url.
*
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
*/
DEPRECATED bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an entry specified by an url encoded url.
*
* Equivalent to getContentByEncodedUrl but without baseUrl.
*/
DEPRECATED bool getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Get the content of an entry specified by a url.
*
* @param[in] url The url of the entry.
* @param[out] content The content of the entry.
* @param[out] title the title of the entry.
* @param[out] contentLength The size of the entry (size of content).
* @param[out] contentType The mimeType of the entry.
* @param[out] baseUrl Return the true url of the entry.
* If the specified entry is a redirection, contains
* the url of the targeted entry.
* @return True if the entry has been found.
*/
DEPRECATED bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
/**
* Get the content of an entry specified by a url.
*
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
*/
DEPRECATED bool getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const;
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
@@ -346,27 +426,12 @@ class Reader
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions have been added to the internal vector.
* @return True if some suggestions where added to the internal vector.
*/
DEPRECATED bool searchSuggestions(const string& prefix,
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
@@ -378,38 +443,22 @@ class Reader
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Search for entries for the given prefix.
* Check if the url exists in the zim file.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
* Deprecated : Use `pathExists` instead.
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
* @param url the url to check.
* @return True if the url exits in the zim file.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results);
DEPRECATED bool urlExists(const string& url) const;
/**
* Check if the path exists in the zim file.
@@ -441,7 +490,7 @@ class Reader
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
DEPRECATED bool getNextSuggestion(string& title);
bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
@@ -450,7 +499,7 @@ class Reader
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
DEPRECATED bool getNextSuggestion(string& title, string& url);
bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
@@ -466,6 +515,16 @@ class Reader
*/
bool isCorrupted() const;
/**
* Parse a full url into a namespace and url.
*
* @param[in] url The full url ("/N/url").
* @param[out] ns The namespace (N).
* @param[out] title The url (url).
* @return True
*/
DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;
/**
* Return the total size of the zim file.
*
@@ -480,14 +539,28 @@ class Reader
*
* @return The libzim file handler.
*/
zim::Archive* getZimArchive() const;
zim::File* getZimFileHandler() const;
/**
* Get the zim article object associated to a url.
*
* @param[in] url The url of the article.
* @param[out] article The libzim article object.
* @return True if the url is good (article.good()).
*/
DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const;
protected:
std::shared_ptr<zim::Archive> zimArchive;
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type nsACount;
zim::size_type nsICount;
std::string zimFilePath;
SuggestionsList_t suggestions;
SuggestionsList_t::iterator suggestionsOffset;
std::vector<std::vector<std::string>> suggestions;
std::vector<std::vector<std::string>>::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;

View File

@@ -21,7 +21,6 @@
#define KIWIX_SEARCH_RENDERER_H
#include <string>
#include <zim/search.h>
namespace kiwix
{
@@ -41,8 +40,6 @@ class SearchRenderer
* Used to generate pagination links.
*/
SearchRenderer(Searcher* searcher, NameMapper* mapper);
SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount);
~SearchRenderer();
@@ -63,13 +60,6 @@ class SearchRenderer
*/
void setSearchProtocolPrefix(const std::string& prefix);
/**
* set result count per page
*/
void setPageLength(unsigned int pageLength){
this->pageLength = pageLength;
}
/**
* Generate the html page with the resutls of the search.
*/
@@ -77,15 +67,16 @@ class SearchRenderer
protected:
std::string beautifyInteger(const unsigned int number);
zim::SearchResultSet m_srs;
Searcher* mp_searcher;
NameMapper* mp_nameMapper;
std::string searchContent;
std::string searchPattern;
std::string protocolPrefix;
std::string searchProtocolPrefix;
unsigned int pageLength;
unsigned int resultCountPerPage;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int resultEnd;
};

View File

@@ -27,10 +27,11 @@
#include <cctype>
#include <locale>
#include <string>
#include <memory>
#include <vector>
#include <zim/search.h>
#include <vector>
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "kiwix_config.h"
using namespace std;
@@ -48,11 +49,10 @@ class Result
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual std::string get_zimId() = 0;
virtual int get_readerIndex() = 0;
};
struct SearcherInternal;
struct SuggestionInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
@@ -86,12 +86,12 @@ class Searcher
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum results to get from start (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void search(const std::string& search,
unsigned int resultStart,
unsigned int maxResultCount,
unsigned int resultEnd,
const bool verbose = false);
/**
@@ -105,12 +105,12 @@ class Searcher
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum number of results to get from start (used for pagination).
* @param resultEnd the end offset of the search results (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int maxResultCount,
unsigned int resultEnd,
const bool verbose = false);
/**
@@ -143,29 +143,23 @@ class Searcher
*/
unsigned int getEstimatedResultCount();
/**
* Get a SearchResultSet object for current search
*/
zim::SearchResultSet getSearchResultSet();
unsigned int getResultStart() { return resultStart; }
unsigned int getMaxResultCount() { return maxResultCount; }
unsigned int getResultEnd() { return resultEnd; }
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int maxResultCount,
const unsigned int resultEnd,
const bool verbose = false);
std::vector<Reader*> readers;
std::unique_ptr<SearcherInternal> internal;
std::unique_ptr<SuggestionInternal> suggestionInternal;
SearcherInternal* internal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int maxResultCount;
unsigned int resultEnd;
private:
void reset();

View File

@@ -55,24 +55,19 @@ namespace kiwix
void setPort(int port) { m_port = port; }
void setNbThreads(int threads) { m_nbThreads = threads; }
void setVerbose(bool verbose) { m_verbose = verbose; }
void setIndexTemplateString(const std::string& indexTemplateString) { m_indexTemplateString = indexTemplateString; }
void setTaskbar(bool withTaskbar, bool withLibraryButton)
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
void setBlockExternalLinks(bool blockExternalLinks)
{ m_blockExternalLinks = blockExternalLinks; }
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
protected:
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_root = "";
std::string m_addr = "";
std::string m_indexTemplateString = "";
int m_port = 80;
int m_nbThreads = 1;
bool m_verbose = false;
bool m_withTaskbar = true;
bool m_withLibraryButton = true;
bool m_blockExternalLinks = false;
std::unique_ptr<InternalServer> mp_server;
};
}

View File

@@ -1,198 +0,0 @@
/*
* Copyright 2021 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_TOOLS_H
#define KIWIX_TOOLS_H
#include <string>
#include <vector>
namespace kiwix {
/**
* Return the current directory.
*
* @return the current directory (utf8 encoded)
*/
std::string getCurrentDirectory();
/**
* Return the data directory.
*
* The data directory is a directory where to put data (zim files, ...)
* It depends of the platform and it may be changed by user using environment variable.
*
* The resolution order is :
* - `KIWIX_DATA_DIR` env variable (if set).
* - On Windows :
* . `$APPDATA/kiwix` if $APPDATA is set
* . `$USERPROFILE/kiwix` if $USERPROFILE is set
* - Else :
* . `$XDG_DATA_HOME/kiwix`if $XDG_DATA_HOME is set
* . `$HOME/.local/share/kiwx` if $HOWE is set
* - current directory
*
* @return the path of the data directory (utf8 encoded)
*/
std::string getDataDirectory();
/** Return the path of the executable
*
* Some application may be packaged in auto extractible archive (Appimage) and the
* real executable is different of the path of the archive.
* If `realPathOnly` is true, return the path of the real executable instead of the
* archive launched by the user.
*
* @param realPathOnly If we must return the real path of the executable.
* @return the path of the executable (utf8 encoded)
*/
std::string getExecutablePath(bool realPathOnly = false);
/** Tell if the path is a relative path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path A utf8 encoded path.
* @return true if the path is relative.
*/
bool isRelativePath(const std::string& path);
/** Append a path to another one.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param basePath the base path.
* @param relativePath a path to add to the base path, must be a relative path.
* @return The concatenation of the paths, using the right separator.
*/
std::string appendToDirectory(const std::string& basePath, const std::string& relativePath);
/** Remove the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The parent directory (or empty string if none).
*/
std::string removeLastPathElement(const std::string& path);
/** Get the last element of a path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path a path.
* @return The base name of the path or empty string if none (ending with a separator).
*/
std::string getLastPathElement(const std::string& path);
/** Compute the absolute path of a relative path based on another one
*
* Equivalent to appendToDirectory followed by a normalization of the path.
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path (if empty, current directory is taken).
* @param relativePath the relative path.
* @return a absolute path.
*/
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
/** Compute the relative path of a path relative to another one
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate paths.
*
* @param path the base path.
* @param absolutePath the absolute path to find the relative path for.
* @return a relative path (pointing to absolutePath, relative to path).
*/
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
/** Sleep the current thread.
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* @param milliseconds The number of milliseconds to wait for.
*/
void sleep(unsigned int milliseconds);
/** Split a string
*
* This function is provided as a small helper. It is probably better to use native tools.
*
* Assuming text = "foo:;bar;baz,oups;"
*
* split(text, ":;", true, true) => ["foo", ":", ";", "bar", ";", "baz,oups", ";"]
* split(text, ":;", true, false) => ["foo", "bar", "baz,oups"] (default)
* split(text, ":;", false, true) => ["foo", ":", "", ";", "bar", ";", "baz,oups", ";", ""]
* split(text, ":;", false, false) => ["foo", "", "bar", "baz,oups", ""]
*
* @param str The string to split.
* @param delims A string of potential delimiters.
* Each charater in the string can be a individual delimiters.
* @param dropEmpty true if empty part must be dropped from the result.
* @param keepDelim true if delimiter must be included from the result.
* @return a list of part (potentially containing delimiters)
*/
std::vector<std::string> split(const std::string& str, const std::string& delims, bool dropEmpty=true, bool keepDelim = false);
/** Convert language code from iso2 code to iso3
*
* This function is provided as a small helper. It is probably better to use native tools
* to manipulate locales.
*
* @param a2code a iso2 code string.
* @return the corresponding iso3 code.
* @throw std::out_of_range if iso2 code is not known.
*/
std::string converta2toa3(const std::string& a2code);
/** Extracts content from given file.
*
* This function provides content of a file provided it's path.
*
* @param path The absolute path provided in string format.
* @return Content of corresponding file in string format.
*/
std::string getFileContent(const std::string& path);
/** checks if file exists.
*
* This function returns boolean stating if file exists or not.
*
* @param path The absolute path provided in string format.
* @return Boolean representing if file exists or not.
*/
bool fileExists(const std::string& path);
/** provides mimetype from filename.
*
* This function provides mimetype from file-name.
*
* @param filename string containing filename.
* @return mimetype from filename in string format.
*/
std::string getMimeTypeForFile(const std::string& filename);
}
#endif // KIWIX_TOOLS_H

46
include/tools/lock.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef KIWIXLIB_TOOL_LOCK_H
#define KIWIXLIB_TOOL_LOCK_H
#include <pthread.h>
namespace kiwix {
class Lock
{
public:
explicit Lock(pthread_mutex_t* mutex) :
mp_mutex(mutex)
{
pthread_mutex_lock(mp_mutex);
}
~Lock() {
if (mp_mutex != nullptr) {
pthread_mutex_unlock(mp_mutex);
}
}
Lock(Lock && other) :
mp_mutex(other.mp_mutex)
{
other.mp_mutex = nullptr;
}
Lock & operator=(Lock && other)
{
mp_mutex = other.mp_mutex;
other.mp_mutex = nullptr;
return *this;
}
private:
pthread_mutex_t* mp_mutex;
Lock(Lock const &) = delete;
Lock & operator=(Lock const &) = delete;
};
}
#endif //KIWIXLIB_TOOL_LOCK_H

View File

@@ -22,9 +22,6 @@
#include <string>
#include <vector>
#include <map>
#include <zim/zim.h>
#include <mustache.hpp>
namespace pugi {
class xml_node;
@@ -32,7 +29,9 @@ namespace pugi {
namespace kiwix
{
void sleep(unsigned int milliseconds);
std::string nodeToString(const pugi::xml_node& node);
std::string converta2toa3(const std::string& a2code);
/*
* Convert all format tag string to new format
@@ -42,13 +41,6 @@ namespace kiwix
const std::string& tagName);
bool convertStrToBool(const std::string& value);
using MimeCounterType = std::map<const std::string, zim::entry_index_type>;
MimeCounterType parseMimetypeCounter(const std::string& counterData);
std::string gen_date_str();
std::string gen_uuid(const std::string& s);
std::string render_template(const std::string& template_str, kainjow::mustache::data data);
}
#endif

View File

@@ -26,13 +26,23 @@
std::string WideToUtf8(const std::wstring& wstr);
std::wstring Utf8ToWide(const std::string& str);
#endif
bool isRelativePath(const std::string& path);
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath);
std::string computeRelativePath(const std::string& path, const std::string& absolutePath);
std::string removeLastPathElement(const std::string& path);
std::string appendToDirectory(const std::string& directoryPath, const std::string& filename);
unsigned int getFileSize(const std::string& path);
std::string getFileSizeAsString(const std::string& path);
std::string getFileContent(const std::string& path);
bool fileExists(const std::string& path);
bool makeDirectory(const std::string& path);
std::string makeTmpDirectory();
bool copyFile(const std::string& sourcePath, const std::string& destPath);
std::string getLastPathElement(const std::string& path);
std::string getExecutablePath(bool realPathOnly = false);
std::string getCurrentDirectory();
std::string getDataDirectory();
bool writeTextFile(const std::string& path, const std::string& content);
std::string getMimeTypeForFile(const std::string& filename);
#endif

View File

@@ -29,8 +29,5 @@ std::string replaceRegex(const std::string& content,
std::string appendToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement);
std::string prependToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement);
#endif

View File

@@ -43,6 +43,7 @@ void loadICUExternalTables();
std::string urlEncode(const std::string& value, bool encodeReserved = false);
std::string urlDecode(const std::string& value, bool component = false);
std::vector<std::string> split(const std::string&, const std::string&, bool trimEmpty = true);
std::string join(const std::vector<std::string>& list, const std::string& sep);
std::string ucAll(const std::string& word);
@@ -69,7 +70,5 @@ T extractFromString(const std::string& str) {
}
bool startsWith(const std::string& base, const std::string& start);
std::vector<std::string> getTitleVariants(const std::string& title);
} //namespace kiwix
#endif

View File

@@ -1,68 +1,43 @@
project('libkiwix', 'cpp',
version : '10.0.0', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
license : 'GPLv3+',
project('kiwix-lib', 'cpp',
version : '9.0.1', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
license : 'GPL',
default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true'])
compiler = meson.get_compiler('cpp')
wrapper = get_option('wrapper')
static_deps = wrapper.contains('android') or wrapper.contains('java') or get_option('default_library') == 'static'
if wrapper.contains('android')
static_deps = 'android' in wrapper or 'java' in wrapper or get_option('default_library') == 'static'
if 'android' in wrapper
extra_libs = ['-llog']
else
extra_libs = []
endif
if wrapper.contains('java')
if 'java' in wrapper
add_languages('java')
endif
# See https://github.com/kiwix/libkiwix/issues/371
if ['arm', 'mips', 'm68k', 'ppc', 'sh4'].contains(target_machine.cpu_family())
extra_libs += '-latomic'
endif
if (compiler.get_id() == 'gcc' and build_machine.system() == 'linux') or target_machine.system() == 'freebsd'
# C++ std::thread is implemented using pthread on linux by gcc
thread_dep = dependency('threads')
else
thread_dep = dependency('', required:false)
endif
thread_dep = dependency('threads')
libicu_dep = dependency('icu-i18n', static:static_deps)
libzim_dep = dependency('libzim', version : '>=5.0.0', static:static_deps)
pugixml_dep = dependency('pugixml', static:static_deps)
libcurl_dep = dependency('libcurl', static:static_deps)
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)
zlib_dep = dependency('zlib', static:static_deps)
xapian_dep = dependency('xapian-core', static:static_deps)
if compiler.has_header('mustache.hpp')
extra_include = []
elif compiler.has_header('mustache.hpp', args: '-I/usr/include/kainjow')
extra_include = ['/usr/include/kainjow']
else
if not compiler.has_header('mustache.hpp')
error('Cannot found header mustache.hpp')
endif
libzim_dep = dependency('libzim', version : '>=7.0.0', static:static_deps)
if not compiler.has_header_symbol('zim/zim.h', 'LIBZIM_WITH_XAPIAN')
error('Libzim seems to be compiled without xapian. Xapian support is mandatory.')
endif
extra_cflags = ''
if target_machine.system() == 'windows' and static_deps
add_project_arguments('-DCURL_STATICLIB', language : 'cpp')
extra_cflags += '-DCURL_STATICLIB'
endif
if target_machine.system() == 'windows'
add_project_arguments('-DNOMINMAX', language: 'cpp')
endif
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep]
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep, libcurl_dep, microhttpd_dep, zlib_dep, xapian_dep]
inc = include_directories('include', extra_include)
inc = include_directories('include')
conf = configuration_data()
conf.set('VERSION', '"@0@"'.format(meson.project_version()))
@@ -79,7 +54,7 @@ subdir('static')
subdir('src')
subdir('test')
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl', 'libmicrohttpd', 'xapian-core']
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl']
pkg_conf = configuration_data()
pkg_conf.set('prefix', get_option('prefix'))

View File

@@ -1,20 +0,0 @@
.TH KIWIX-COMPILE-RESOURCES "1" "August 2017" "Kiwix" "User Commands"
.SH NAME
kiwix-compile-resources \- helper to compile and generate some Kiwix resources
.SH SYNOPSIS
\fBkiwix\-compile\-resources\fR [\-h] [\-\-cxxfile CXXFILE] [\-\-hfile HFILE] resource_file\fR
.SH DESCRIPTION
.TP
resource_file
The list of resources to compile.
.TP
\fB\-h\fR, \fB\-\-help\fR
show a help message and exit
.TP
\fB\-\-cxxfile\fR CXXFILE
The Cpp file name to generate
.TP
\fB\-\-hfile\fR HFILE
The h file name to generate
.SH AUTHOR
Matthieu Gautier <mgautier@kymeria.fr>

View File

@@ -2,5 +2,3 @@
res_compiler = find_program('kiwix-compile-resources')
install_data(res_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-resources.1')

View File

@@ -6,10 +6,10 @@
#include <sstream>
#include <thread>
#include <chrono>
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include "downloader.h" // For AriaError
#include <tools/otherTools.h>
#include <tools/pathTools.h>
#include <tools/stringTools.h>
#include <downloader.h> // For AriaError
#ifdef _WIN32
# define ARIA2_CMD "aria2c.exe"
@@ -19,20 +19,15 @@
#endif
#define LOG_ARIA_ERROR() \
{ \
std::cerr << "ERROR: aria2 RPC request failed. (" << res << ")." << std::endl; \
std::cerr << (m_curlErrorBuffer[0] ? m_curlErrorBuffer.get() : curl_easy_strerror(res)) << std::endl; \
}
namespace kiwix {
Aria2::Aria2():
Aria2::Aria2(std::string sessionFileDir):
mp_aria(nullptr),
m_port(42042),
m_secret("kiwixariarpc"),
m_curlErrorBuffer(new char[CURL_ERROR_SIZE]),
mp_curl(nullptr)
mp_curl(nullptr),
m_lock(PTHREAD_MUTEX_INITIALIZER)
{
m_downloadDir = getDataDirectory();
makeDirectory(m_downloadDir);
@@ -40,7 +35,12 @@ Aria2::Aria2():
std::string rpc_port = "--rpc-listen-port=" + to_string(m_port);
std::string download_dir = "--dir=" + getDataDirectory();
std::string session_file = appendToDirectory(getDataDirectory(), "kiwix.session");
std::string session_file;
if (sessionFileDir.empty()) {
session_file = appendToDirectory(getDataDirectory(), "kiwix.session");
} else {
session_file = appendToDirectory(sessionFileDir, "kiwix.session");
}
std::string session = "--save-session=" + session_file;
std::string inputFile = "--input-file=" + session_file;
// std::string log_dir = "--log=\"" + logDir + "\"";
@@ -63,7 +63,6 @@ Aria2::Aria2():
// Try to use a potential installed aria2c.
callCmd.push_back(ARIA2_CMD);
}
callCmd.push_back("--follow-metalink=mem");
callCmd.push_back("--enable-rpc");
callCmd.push_back(rpc_secret.c_str());
callCmd.push_back(rpc_port.c_str());
@@ -90,21 +89,27 @@ Aria2::Aria2():
}
mp_aria = Subprocess::run(callCmd);
mp_curl = curl_easy_init();
char errbuf[CURL_ERROR_SIZE];
curl_easy_setopt(mp_curl, CURLOPT_URL, "http://localhost/rpc");
curl_easy_setopt(mp_curl, CURLOPT_PORT, m_port);
curl_easy_setopt(mp_curl, CURLOPT_POST, 1L);
curl_easy_setopt(mp_curl, CURLOPT_ERRORBUFFER, m_curlErrorBuffer.get());
curl_easy_setopt(mp_curl, CURLOPT_ERRORBUFFER, errbuf);
int watchdog = 50;
while(--watchdog) {
sleep(10);
m_curlErrorBuffer[0] = 0;
errbuf[0] = 0;
auto res = curl_easy_perform(mp_curl);
if (res == CURLE_OK) {
break;
} else if (watchdog == 1) {
LOG_ARIA_ERROR();
std::cerr <<" curl_easy_perform() failed." << std::endl;
fprintf(stderr, "\nlibcurl: (%d) ", res);
if (errbuf[0] != 0) {
std::cerr << errbuf << std::endl;
} else {
std::cerr << curl_easy_strerror(res) << std::endl;
}
}
}
if (!watchdog) {
@@ -115,7 +120,6 @@ Aria2::Aria2():
Aria2::~Aria2()
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_cleanup(mp_curl);
}
@@ -127,44 +131,38 @@ void Aria2::close()
size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
{
auto outStream = static_cast<std::stringstream*>(userdata);
outStream->write(ptr, nmemb);
auto str = static_cast<std::stringstream*>(userdata);
str->write(ptr, nmemb);
return nmemb;
}
std::string Aria2::doRequest(const MethodCall& methodCall)
{
pthread_mutex_lock(&m_lock);
auto requestContent = methodCall.toString();
std::stringstream outStream;
std::stringstream stringstream;
CURLcode res;
long response_code;
{
std::unique_lock<std::mutex> lock(m_lock);
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &outStream);
m_curlErrorBuffer[0] = 0;
res = curl_easy_perform(mp_curl);
if (res != CURLE_OK) {
LOG_ARIA_ERROR();
throw std::runtime_error("Cannot perform request");
}
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size());
curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str());
curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &stringstream);
res = curl_easy_perform(mp_curl);
if (res == CURLE_OK) {
long response_code;
curl_easy_getinfo(mp_curl, CURLINFO_RESPONSE_CODE, &response_code);
pthread_mutex_unlock(&m_lock);
if (response_code != 200) {
throw std::runtime_error("Invalid return code from aria");
}
auto responseContent = stringstream.str();
MethodResponse response(responseContent);
if (response.isFault()) {
throw AriaError(response.getFault().getFaultString());
}
return responseContent;
}
auto responseContent = outStream.str();
if (response_code != 200) {
std::cerr << "ERROR: Invalid return code (" << response_code << ") from aria :" << std::endl;
std::cerr << responseContent << std::endl;
throw std::runtime_error("Invalid return code from aria");
}
MethodResponse response(responseContent);
if (response.isFault()) {
throw AriaError(response.getFault().getFaultString());
}
return responseContent;
pthread_mutex_unlock(&m_lock);
throw std::runtime_error("Cannot perform request");
}
std::string Aria2::addUri(const std::vector<std::string>& uris, const std::vector<std::pair<std::string, std::string>>& options)

View File

@@ -12,8 +12,8 @@
#include "xmlrpc.h"
#include <memory>
#include <mutex>
#include <curl/curl.h>
#include <pthread.h>
namespace kiwix {
@@ -24,14 +24,13 @@ class Aria2
int m_port;
std::string m_secret;
std::string m_downloadDir;
std::unique_ptr<char[]> m_curlErrorBuffer;
CURL* mp_curl;
std::mutex m_lock;
pthread_mutex_t m_lock;
std::string doRequest(const MethodCall& methodCall);
public:
Aria2();
Aria2(std::string sessionFileDir = "");
virtual ~Aria2();
void close();

View File

@@ -20,16 +20,10 @@
#include "book.h"
#include "reader.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/networkTools.h"
#include "tools/otherTools.h"
#include "tools/stringTools.h"
#include "tools/pathTools.h"
#include "tools/archiveTools.h"
#include <zim/archive.h>
#include <pugixml.hpp>
@@ -67,7 +61,6 @@ bool Book::update(const kiwix::Book& other)
m_name = other.m_name;
m_flavour = other.m_flavour;
m_tags = other.m_tags;
m_category = other.m_category;
m_origId = other.m_origId;
m_articleCount = other.m_articleCount;
m_mediaCount = other.m_mediaCount;
@@ -83,28 +76,25 @@ bool Book::update(const kiwix::Book& other)
void Book::update(const kiwix::Reader& reader)
{
update(*reader.getZimArchive());
}
void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename();
m_path = reader.getZimFilePath();
m_pathValid = true;
m_id = reader.getId();
m_title = reader.getTitle();
m_description = reader.getDescription();
m_language = reader.getLanguage();
m_creator = reader.getCreator();
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
m_mediaCount = reader.getMediaCount();
m_size = static_cast<uint64_t>(reader.getFileSize()) << 10;
m_pathValid = true;
m_id = getArchiveId(archive);
m_title = getArchiveTitle(archive);
m_description = getMetaDescription(archive);
m_language = getMetaLanguage(archive);
m_creator = getMetaCreator(archive);
m_publisher = getMetaPublisher(archive);
m_date = getMetaDate(archive);
m_name = getMetaName(archive);
m_flavour = getMetaFlavour(archive);
m_tags = getMetaTags(archive);
m_category = getCategoryFromTags();
m_articleCount = archive.getArticleCount();
m_mediaCount = getArchiveMediaCount(archive);
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
getArchiveFavicon(archive, 48, m_favicon, m_faviconMimeType);
reader.getFavicon(m_favicon, m_faviconMimeType);
}
#define ATTR(name) node.attribute(name).value()
@@ -137,8 +127,6 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
try {
m_downloadId = ATTR("downloadId");
} catch(...) {}
const auto catattr = node.attribute("category");
m_category = catattr.empty() ? getCategoryFromTags() : catattr.value();
}
#undef ATTR
@@ -168,10 +156,6 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags");
const auto catnode = node.child("category");
m_category = catnode.empty() ? getCategoryFromTags() : catnode.child_value();
m_articleCount = strtoull(VALUE("articleCount"), 0, 0);
m_mediaCount = strtoull(VALUE("mediaCount"), 0, 0);
for(auto linkNode = node.child("link"); linkNode;
linkNode = linkNode.next_sibling("link")) {
std::string rel = linkNode.attribute("rel").value();
@@ -234,21 +218,4 @@ bool Book::getTagBool(const std::string& tagName) const {
return convertStrToBool(getTagStr(tagName));
}
std::string Book::getCategory() const
{
return m_category;
}
std::string Book::getCategoryFromTags() const
{
try
{
return getTagStr("category");
}
catch ( const std::out_of_range& )
{
return "";
}
}
}

View File

@@ -124,8 +124,8 @@ void Download::cancelDownload()
}
/* Constructor */
Downloader::Downloader() :
mp_aria(new Aria2())
Downloader::Downloader(std::string sessionFileDir) :
mp_aria(new Aria2(sessionFileDir))
{
try {
for (auto gid : mp_aria->tellActive()) {
@@ -133,7 +133,7 @@ Downloader::Downloader() :
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellActive failed : " << e.what() << std::endl;
std::cerr << "aria2 tellActive failed : " << e.what();
}
try {
for (auto gid : mp_aria->tellWaiting()) {
@@ -141,7 +141,7 @@ Downloader::Downloader() :
m_knownDownloads[gid]->updateStatus();
}
} catch (std::exception& e) {
std::cerr << "aria2 tellWaiting failed : " << e.what() << std::endl;
std::cerr << "aria2 tellWaiting failed : " << e.what();
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -20,54 +20,121 @@
#include "reader.h"
#include <time.h>
#include <zim/search.h>
namespace kiwix
{
Entry::Entry(zim::Entry entry)
: entry(entry)
Entry::Entry(zim::Article article)
: article(article)
{
}
#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
std::string Entry::getPath() const
{
RETURN_IF_INVALID("");
return article.getLongUrl();
}
std::string Entry::getTitle() const
{
RETURN_IF_INVALID("");
return article.getTitle();
}
std::string Entry::getContent() const
{
RETURN_IF_INVALID("");
return article.getData();
}
zim::Blob Entry::getBlob(offset_type offset) const
{
RETURN_IF_INVALID(zim::Blob());
return article.getData(offset);
}
zim::Blob Entry::getBlob(offset_type offset, size_type size) const
{
RETURN_IF_INVALID(zim::Blob());
return article.getData(offset, size);
}
std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
{
RETURN_IF_INVALID(std::make_pair("", 0));
return article.getDirectAccessInformation();
}
size_type Entry::getSize() const
{
if (entry.isRedirect()) {
return 0;
} else {
return entry.getItem().getSize();
}
RETURN_IF_INVALID(0);
return article.getArticleSize();
}
std::string Entry::getMimetype() const
{
return entry.getItem(true).getMimetype();
RETURN_IF_INVALID("");
try {
return article.getMimeType();
} catch (exception& e) {
return "application/octet-stream";
}
}
bool Entry::isRedirect() const
{
return entry.isRedirect();
RETURN_IF_INVALID(false);
return article.isRedirect();
}
bool Entry::isLinkTarget() const
{
RETURN_IF_INVALID(false);
return article.isLinktarget();
}
bool Entry::isDeleted() const
{
RETURN_IF_INVALID(false);
return article.isDeleted();
}
Entry Entry::getRedirectEntry() const
{
if ( !entry.isRedirect() ) {
RETURN_IF_INVALID(Entry());
if ( !article.isRedirect() ) {
throw NoEntry();
}
return entry.getRedirectEntry();
auto targeted_article = article.getRedirectArticle();
if ( !targeted_article.good()) {
throw NoEntry();
}
return targeted_article;
}
Entry Entry::getFinalEntry() const
{
RETURN_IF_INVALID(Entry());
if (final_article.good()) {
return final_article;
}
int loopCounter = 42;
auto final_entry = entry;
while (final_entry.isRedirect() && loopCounter--) {
final_entry = final_entry.getRedirectEntry();
final_article = article;
while (final_article.isRedirect() && loopCounter--) {
final_article = final_article.getRedirectArticle();
if ( !final_article.good()) {
throw NoEntry();
}
}
// Prevent infinite loops.
if (final_entry.isRedirect()) {
if (final_article.isRedirect()) {
throw NoEntry();
}
return final_entry;
return final_article;
}
}

View File

@@ -9,7 +9,6 @@
# include <unistd.h>
#endif
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/stringTools.h"

View File

@@ -22,7 +22,6 @@
#include "reader.h"
#include "libxml_dumper.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/pathTools.h"
@@ -31,42 +30,14 @@
#include <pugixml.hpp>
#include <algorithm>
#include <set>
#include <unicode/locid.h>
#include <xapian.h>
namespace kiwix
{
namespace
{
std::string iso639_3ToXapian(const std::string& lang) {
return icu::Locale(lang.c_str()).getLanguage();
};
std::string normalizeText(const std::string& text)
{
return removeAccents(text);
}
} // unnamed namespace
class Library::BookDB : public Xapian::WritableDatabase
{
public:
BookDB() : Xapian::WritableDatabase("", Xapian::DB_BACKEND_INMEMORY) {}
};
/* Constructor */
Library::Library()
: m_bookDB(new BookDB)
{
}
Library::Library(Library&& ) = default;
Library& Library::operator=(Library&& ) = default;
/* Destructor */
Library::~Library()
{
@@ -76,7 +47,6 @@ Library::~Library()
bool Library::addBook(const Book& book)
{
/* Try to find it */
updateBookDB(book);
try {
auto& oldbook = m_books.at(book.getId());
oldbook.update(book);
@@ -107,24 +77,15 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url)
bool Library::removeBookById(const std::string& id)
{
m_bookDB->delete_document("Q" + id);
m_readers.erase(id);
m_archives.erase(id);
return m_books.erase(id) == 1;
}
const Book& Library::getBookById(const std::string& id) const
{
return m_books.at(id);
}
Book& Library::getBookById(const std::string& id)
{
const Library& const_self = *this;
return const_cast<Book&>(const_self.getBookById(id));
return m_books.at(id);
}
const Book& Library::getBookByPath(const std::string& path) const
Book& Library::getBookByPath(const std::string& path)
{
for(auto& it: m_books) {
auto& book = it.second;
@@ -136,52 +97,22 @@ const Book& Library::getBookByPath(const std::string& path) const
throw std::out_of_range(ss.str());
}
Book& Library::getBookByPath(const std::string& path)
{
const Library& const_self = *this;
return const_cast<Book&>(const_self.getBookByPath(path));
}
std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
{
try {
return m_readers.at(id);
} catch (std::out_of_range& e) {}
try {
auto reader = make_shared<Reader>(m_archives.at(id));
m_readers[id] = reader;
return reader;
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
auto archive = make_shared<zim::Archive>(book.getPath());
m_archives[id] = archive;
auto reader = make_shared<Reader>(archive);
m_readers[id] = reader;
return reader;
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
{
try {
return m_archives.at(id);
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
auto sptr = make_shared<zim::Archive>(book.getPath());
m_archives[id] = sptr;
auto sptr = make_shared<Reader>(book.getPath());
m_readers[id] = sptr;
return sptr;
}
unsigned int Library::getBookCount(const bool localBooks,
const bool remoteBooks) const
const bool remoteBooks)
{
unsigned int result = 0;
for (auto& pair: m_books) {
@@ -194,7 +125,7 @@ unsigned int Library::getBookCount(const bool localBooks,
return result;
}
bool Library::writeToFile(const std::string& path) const
bool Library::writeToFile(const std::string& path)
{
auto baseDir = removeLastPathElement(path);
LibXMLDumper dumper(this);
@@ -202,70 +133,70 @@ bool Library::writeToFile(const std::string& path) const
return writeTextFile(path, dumper.dumpLibXMLContent(getBooksIds()));
}
bool Library::writeBookmarksToFile(const std::string& path) const
bool Library::writeBookmarksToFile(const std::string& path)
{
LibXMLDumper dumper(this);
return writeTextFile(path, dumper.dumpLibXMLBookmark());
}
Library::AttributeCounts Library::getBookAttributeCounts(BookStrPropMemFn p) const
std::vector<std::string> Library::getBooksLanguages()
{
AttributeCounts propValueCounts;
std::vector<std::string> booksLanguages;
std::map<std::string, bool> booksLanguagesMap;
for (const auto& pair: m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
propValueCounts[(book.*p)()] += 1;
}
}
return propValueCounts;
}
std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
{
std::vector<std::string> result;
for ( const auto& kv : getBookAttributeCounts(p) ) {
result.push_back(kv.first);
}
return result;
}
std::vector<std::string> Library::getBooksLanguages() const
{
return getBookPropValueSet(&Book::getLanguage);
}
Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
{
return getBookAttributeCounts(&Book::getLanguage);
}
std::vector<std::string> Library::getBooksCategories() const
{
std::set<std::string> categories;
for (const auto& pair: m_books) {
const auto& book = pair.second;
const auto& c = book.getCategory();
if ( !c.empty() ) {
categories.insert(c);
for (auto& pair: m_books) {
auto& book = pair.second;
auto& language = book.getLanguage();
if (booksLanguagesMap.find(language) == booksLanguagesMap.end()) {
if (book.getOrigId().empty()) {
booksLanguagesMap[language] = true;
booksLanguages.push_back(language);
}
}
}
return std::vector<std::string>(categories.begin(), categories.end());
return booksLanguages;
}
std::vector<std::string> Library::getBooksCreators() const
std::vector<std::string> Library::getBooksCreators()
{
return getBookPropValueSet(&Book::getCreator);
std::vector<std::string> booksCreators;
std::map<std::string, bool> booksCreatorsMap;
for (auto& pair: m_books) {
auto& book = pair.second;
auto& creator = book.getCreator();
if (booksCreatorsMap.find(creator) == booksCreatorsMap.end()) {
if (book.getOrigId().empty()) {
booksCreatorsMap[creator] = true;
booksCreators.push_back(creator);
}
}
}
return booksCreators;
}
std::vector<std::string> Library::getBooksPublishers() const
std::vector<std::string> Library::getBooksPublishers()
{
return getBookPropValueSet(&Book::getPublisher);
std::vector<std::string> booksPublishers;
std::map<std::string, bool> booksPublishersMap;
for (auto& pair:m_books) {
auto& book = pair.second;
auto& publisher = book.getPublisher();
if (booksPublishersMap.find(publisher) == booksPublishersMap.end()) {
if (book.getOrigId().empty()) {
booksPublishersMap[publisher] = true;
booksPublishers.push_back(publisher);
}
}
}
return booksPublishers;
}
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks) const
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks)
{
if (!onlyValidBookmarks) {
return m_bookmarks;
@@ -280,9 +211,9 @@ const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks
return validBookmarks;
}
Library::BookIdCollection Library::getBooksIds() const
std::vector<std::string> Library::getBooksIds()
{
BookIdCollection bookIds;
std::vector<std::string> bookIds;
for (auto& pair: m_books) {
bookIds.push_back(pair.first);
@@ -291,7 +222,7 @@ Library::BookIdCollection Library::getBooksIds() const
return bookIds;
}
Library::BookIdCollection Library::filter(const std::string& search) const
std::vector<std::string> Library::filter(const std::string& search)
{
if (search.empty()) {
return getBooksIds();
@@ -301,195 +232,16 @@ Library::BookIdCollection Library::filter(const std::string& search) const
}
void Library::updateBookDB(const Book& book)
std::vector<std::string> Library::filter(const Filter& filter)
{
Xapian::Stem stemmer;
Xapian::TermGenerator indexer;
const std::string lang = book.getLanguage();
try {
stemmer = Xapian::Stem(iso639_3ToXapian(lang));
indexer.set_stemmer(stemmer);
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
} catch (...) {}
Xapian::Document doc;
indexer.set_document(doc);
const std::string title = normalizeText(book.getTitle());
const std::string desc = normalizeText(book.getDescription());
// Index title and description without prefixes for general search
indexer.index_text(title);
indexer.increase_termpos();
indexer.index_text(desc);
// Index all fields for field-based search
indexer.index_text(title, 1, "S");
indexer.index_text(desc, 1, "XD");
indexer.index_text(lang, 1, "L");
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");
for ( const auto& tag : split(normalizeText(book.getTags()), ";") )
doc.add_boolean_term("XT" + tag);
const std::string idterm = "Q" + book.getId();
doc.add_boolean_term(idterm);
doc.set_data(book.getId());
m_bookDB->replace_document(idterm, doc);
}
namespace
{
bool willSelectEverything(const Xapian::Query& query)
{
return query.get_type() == Xapian::Query::LEAF_MATCH_ALL;
}
Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
{
if ( !filter.hasQuery() || filter.getQuery().empty() ) {
// This is a thread-safe way to construct an equivalent of
// a Xapian::Query::MatchAll query
return Xapian::Query(std::string());
}
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_AND);
queryParser.add_prefix("title", "S");
queryParser.add_prefix("description", "XD");
queryParser.add_prefix("name", "XN");
queryParser.add_prefix("category", "XC");
queryParser.add_prefix("lang", "L");
queryParser.add_prefix("publisher", "XP");
queryParser.add_prefix("creator", "A");
queryParser.add_prefix("tag", "XT");
const auto partialQueryFlag = filter.queryIsPartial()
? Xapian::QueryParser::FLAG_PARTIAL
: 0;
// Language assumed for the query is not known for sure so stemming
// is not applied
//queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???)));
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
const auto flags = Xapian::QueryParser::FLAG_PHRASE
| Xapian::QueryParser::FLAG_BOOLEAN
| Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
| Xapian::QueryParser::FLAG_LOVEHATE
| Xapian::QueryParser::FLAG_WILDCARD
| partialQueryFlag;
return queryParser.parse_query(normalizeText(filter.getQuery()), flags);
}
Xapian::Query nameQuery(const std::string& name)
{
return Xapian::Query("XN" + normalizeText(name));
}
Xapian::Query categoryQuery(const std::string& category)
{
return Xapian::Query("XC" + normalizeText(category));
}
Xapian::Query langQuery(const std::string& lang)
{
return Xapian::Query("L" + normalizeText(lang));
}
Xapian::Query publisherQuery(const std::string& publisher)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query creatorQuery(const std::string& creator)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(creator), flags, "A");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags)
{
Xapian::Query q = Xapian::Query(std::string());
if (!acceptTags.empty()) {
for ( const auto& tag : acceptTags )
q &= Xapian::Query("XT" + normalizeText(tag));
}
if (!rejectTags.empty()) {
for ( const auto& tag : rejectTags )
q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + normalizeText(tag));
}
return q;
}
Xapian::Query buildXapianQuery(const Filter& filter)
{
auto q = buildXapianQueryFromFilterQuery(filter);
if ( filter.hasName() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName()));
}
if ( filter.hasCategory() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory()));
}
if ( filter.hasLang() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang()));
}
if ( filter.hasPublisher() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher()));
}
if ( filter.hasCreator() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, creatorQuery(filter.getCreator()));
}
if ( !filter.getAcceptTags().empty() || !filter.getRejectTags().empty() ) {
const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags());
q = Xapian::Query(Xapian::Query::OP_AND, q, tq);;
}
return q;
}
} // unnamed namespace
Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const
{
const auto query = buildXapianQuery(filter);
if ( willSelectEverything(query) )
return getBooksIds();
BookIdCollection bookIds;
Xapian::Enquire enquire(*m_bookDB);
enquire.set_query(query);
const auto results = enquire.get_mset(0, m_books.size());
for ( auto it = results.begin(); it != results.end(); ++it ) {
bookIds.push_back(it.get_document().get_data());
}
return bookIds;
}
Library::BookIdCollection Library::filter(const Filter& filter) const
{
BookIdCollection result;
for(auto id : filterViaBookDB(filter)) {
if(filter.accept(m_books.at(id))) {
result.push_back(id);
std::vector<std::string> bookIds;
for(auto& pair:m_books) {
auto book = pair.second;
if(filter.accept(book)) {
bookIds.push_back(pair.first);
}
}
return result;
return bookIds;
}
template<supportedListSortBy SORT>
@@ -505,13 +257,13 @@ struct KEY_TYPE<SIZE> {
template<supportedListSortBy sort>
class Comparator {
private:
const Library* const lib;
const bool ascending;
Library* lib;
bool ascending;
inline typename KEY_TYPE<sort>::TYPE get_key(const std::string& id);
public:
Comparator(const Library* lib, bool ascending) : lib(lib), ascending(ascending) {}
Comparator(Library* lib, bool ascending) : lib(lib), ascending(ascending) {}
inline bool operator() (const std::string& id1, const std::string& id2) {
if (ascending) {
return get_key(id1) < get_key(id2);
@@ -551,7 +303,7 @@ std::string Comparator<PUBLISHER>::get_key(const std::string& id)
return lib->getBookById(id).getPublisher();
}
void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool ascending) const
void Library::sort(std::vector<std::string>& bookIds, supportedListSortBy sort, bool ascending)
{
switch(sort) {
case TITLE:
@@ -575,7 +327,7 @@ void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool asc
}
Library::BookIdCollection Library::listBooksIds(
std::vector<std::string> Library::listBooksIds(
int mode,
supportedListSortBy sortBy,
const std::string& search,
@@ -583,7 +335,7 @@ Library::BookIdCollection Library::listBooksIds(
const std::string& creator,
const std::string& publisher,
const std::vector<std::string>& tags,
size_t maxSize) const {
size_t maxSize) {
Filter _filter;
if (mode & LOCAL)
@@ -639,7 +391,6 @@ enum filterTypes {
MAXSIZE = FLAG(11),
QUERY = FLAG(12),
NAME = FLAG(13),
CATEGORY = FLAG(14),
};
Filter& Filter::local(bool accept)
@@ -678,27 +429,20 @@ Filter& Filter::valid(bool accept)
return *this;
}
Filter& Filter::acceptTags(const Tags& tags)
Filter& Filter::acceptTags(std::vector<std::string> tags)
{
_acceptTags = tags;
activeFilters |= ACCEPTTAGS;
return *this;
}
Filter& Filter::rejectTags(const Tags& tags)
Filter& Filter::rejectTags(std::vector<std::string> tags)
{
_rejectTags = tags;
activeFilters |= REJECTTAGS;
return *this;
}
Filter& Filter::category(std::string category)
{
_category = category;
activeFilters |= CATEGORY;
return *this;
}
Filter& Filter::lang(std::string lang)
{
_lang = lang;
@@ -727,10 +471,9 @@ Filter& Filter::maxSize(size_t maxSize)
return *this;
}
Filter& Filter::query(std::string query, bool partial)
Filter& Filter::query(std::string query)
{
_query = query;
_queryIsPartial = partial;
activeFilters |= QUERY;
return *this;
}
@@ -744,36 +487,6 @@ Filter& Filter::name(std::string name)
#define ACTIVE(X) (activeFilters & (X))
#define FILTER(TAG, TEST) if (ACTIVE(TAG) && !(TEST)) { return false; }
bool Filter::hasQuery() const
{
return ACTIVE(QUERY);
}
bool Filter::hasName() const
{
return ACTIVE(NAME);
}
bool Filter::hasCategory() const
{
return ACTIVE(CATEGORY);
}
bool Filter::hasLang() const
{
return ACTIVE(LANG);
}
bool Filter::hasPublisher() const
{
return ACTIVE(_PUBLISHER);
}
bool Filter::hasCreator() const
{
return ACTIVE(_CREATOR);
}
bool Filter::accept(const Book& book) const
{
auto local = !book.getPath().empty();
@@ -789,8 +502,40 @@ bool Filter::accept(const Book& book) const
FILTER(_NOREMOTE, !remote)
FILTER(MAXSIZE, book.getSize() <= _maxSize)
FILTER(LANG, book.getLanguage() == _lang)
FILTER(_PUBLISHER, book.getPublisher() == _publisher)
FILTER(_CREATOR, book.getCreator() == _creator)
FILTER(NAME, book.getName() == _name)
if (ACTIVE(ACCEPTTAGS)) {
if (!_acceptTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _acceptTags) {
if (sBookTags.find(t) == sBookTags.end()) {
return false;
}
}
}
}
if (ACTIVE(REJECTTAGS)) {
if (!_rejectTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _rejectTags) {
if (sBookTags.find(t) != sBookTags.end()) {
return false;
}
}
}
}
if ( ACTIVE(QUERY)
&& !(matchRegex(book.getTitle(), "\\Q" + _query + "\\E")
|| matchRegex(book.getDescription(), "\\Q" + _query + "\\E")))
return false;
return true;
}
}

View File

@@ -20,15 +20,15 @@
#include "libxml_dumper.h"
#include "book.h"
#include "tools.h"
#include "tools/base64.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/pathTools.h"
namespace kiwix
{
/* Constructor */
LibXMLDumper::LibXMLDumper(const Library* library)
LibXMLDumper::LibXMLDumper(Library* library)
: library(library)
{
}

View File

@@ -19,7 +19,6 @@
#include "manager.h"
#include "tools.h"
#include "tools/pathTools.h"
#include <pugixml.hpp>
@@ -81,7 +80,7 @@ bool Manager::readXml(const std::string& xml,
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)xml.data(), xml.size());
= doc.load_buffer_inplace((void*)xml.data(), xml.size());
if (result) {
this->parseXmlDom(doc, readOnly, libraryPath, trustLibrary);
@@ -125,7 +124,7 @@ bool Manager::readOpds(const std::string& content, const std::string& urlHost)
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)content.data(), content.size());
= doc.load_buffer_inplace((void*)content.data(), content.size());
if (result) {
this->parseOpdsDom(doc, urlHost);
@@ -176,7 +175,7 @@ std::string Manager::addBookFromPathAndGetId(const std::string& pathToOpen,
kiwix::Book book;
if (this->readBookFromPath(pathToOpen, &book)) {
if (!pathToSave.empty() && pathToSave != pathToOpen) {
if (pathToSave != pathToOpen) {
book.setPath(isRelativePath(pathToSave)
? computeAbsolutePath(
removeLastPathElement(writableLibraryPath),
@@ -215,8 +214,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
tmp_path = computeAbsolutePath(getCurrentDirectory(), path);
}
try {
zim::Archive archive(tmp_path);
book->update(archive);
kiwix::Reader reader(tmp_path);
book->update(reader);
book->setPathValid(true);
} catch (const std::exception& e) {
book->setPathValid(false);

View File

@@ -19,16 +19,10 @@ kiwix_sources = [
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',
'server/etag.cpp',
'server/request_context.cpp',
'server/response.cpp',
'server/internalServer.cpp',
'server/internalServer_catalog_v2.cpp',
'opds_catalog.cpp'
'server/response.cpp'
]
kiwix_sources += lib_resources
@@ -38,13 +32,13 @@ else
kiwix_sources += 'subprocess_unix.cpp'
endif
if wrapper.contains('android')
if 'android' in wrapper
install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi')
else
install_dir = get_option('libdir')
endif
if wrapper.contains('android') or wrapper.contains('java')
if 'android' in wrapper or 'java' in wrapper
subdir('wrapper/java')
endif
@@ -57,7 +51,6 @@ kiwixlib = library('kiwix',
kiwix_sources,
include_directories : inc,
dependencies : all_deps,
link_args: extra_libs,
version: meson.project_version(),
install: true,
install_dir: install_dir)

View File

@@ -1,24 +0,0 @@
/*
* Copyright 2020 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <microhttpd.h>
#if MHD_VERSION < 0x00097002
typedef int MHD_Result;
#endif

View File

@@ -1,74 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "opds_catalog.h"
#include "tools/stringTools.h"
#include <sstream>
namespace kiwix
{
namespace
{
const char opdsSearchEndpoint[] = "/catalog/v2/entries";
enum Separator { AMP };
std::ostringstream& operator<<(std::ostringstream& oss, Separator sep)
{
if ( oss.tellp() > 0 )
oss << "&";
return oss;
}
std::string buildSearchString(const Filter& f)
{
std::ostringstream oss;
if ( f.hasQuery() )
oss << AMP << "q=" << urlEncode(f.getQuery());
if ( f.hasCategory() )
oss << AMP << "category=" << urlEncode(f.getCategory());
if ( f.hasLang() )
oss << AMP << "lang=" << urlEncode(f.getLang());
if ( f.hasName() )
oss << AMP << "name=" << urlEncode(f.getName());
if ( !f.getAcceptTags().empty() )
oss << AMP << "tag=" << urlEncode(join(f.getAcceptTags(), ";"));
return oss.str();
}
} // unnamed namespace
std::string getSearchUrl(const Filter& f)
{
const std::string searchString = buildSearchString(f);
if ( searchString.empty() )
return opdsSearchEndpoint;
else
return opdsSearchEndpoint + ("?" + searchString);
}
} // namespace kiwix

View File

@@ -20,16 +20,11 @@
#include "opds_dumper.h"
#include "book.h"
#include "kiwixlib-resources.h"
#include <mustache.hpp>
#include <unicode/locid.h>
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include <iomanip>
namespace kiwix
{
/* Constructor */
OPDSDumper::OPDSDumper(Library* library)
: library(library)
@@ -40,189 +35,118 @@ OPDSDumper::~OPDSDumper()
{
}
std::string gen_date_str()
{
auto now = time(0);
auto tm = localtime(&now);
std::stringstream is;
is << std::setw(2) << std::setfill('0')
<< 1900+tm->tm_year << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mon << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
return is.str();
}
static std::string gen_date_from_yyyy_mm_dd(const std::string& date)
{
std::stringstream is;
is << date << "T00:00::00:Z";
return is.str();
}
void OPDSDumper::setOpenSearchInfo(int totalResults, int startIndex, int count)
{
m_totalResults = totalResults;
m_startIndex = startIndex,
m_count = count;
m_isSearchResult = true;
}
namespace
{
#define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str())
typedef kainjow::mustache::data MustacheData;
typedef kainjow::mustache::list BooksData;
typedef kainjow::mustache::list IllustrationInfo;
pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
auto entry_node = root_node.append_child("entry");
ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.getId());
ADD_TEXT_ENTRY(entry_node, "title", book.getTitle());
ADD_TEXT_ENTRY(entry_node, "summary", book.getDescription());
ADD_TEXT_ENTRY(entry_node, "language", book.getLanguage());
ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate()));
ADD_TEXT_ENTRY(entry_node, "name", book.getName());
ADD_TEXT_ENTRY(entry_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(entry_node, "tags", book.getTags());
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());
IllustrationInfo getBookIllustrationInfo(const Book& book)
auto content_node = entry_node.append_child("link");
content_node.append_attribute("type") = "text/html";
content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str();
auto author_node = entry_node.append_child("author");
ADD_TEXT_ENTRY(author_node, "name", book.getCreator());
auto publisher_node = entry_node.append_child("publisher");
ADD_TEXT_ENTRY(publisher_node, "name", book.getPublisher());
if (! book.getUrl().empty()) {
auto acquisition_link = entry_node.append_child("link");
acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access";
acquisition_link.append_attribute("type") = "application/x-zim";
acquisition_link.append_attribute("href") = book.getUrl().c_str();
acquisition_link.append_attribute("length") = to_string(book.getSize()).c_str();
}
if (! book.getFaviconMimeType().empty() ) {
auto image_link = entry_node.append_child("link");
image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail";
image_link.append_attribute("type") = book.getFaviconMimeType().c_str();
image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str();
}
return entry_node;
}
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds)
{
kainjow::mustache::list illustrations;
if ( book.isPathValid() ) {
for ( auto illustration_size : zim::Archive(book.getPath()).getIllustrationSizes() ) {
illustrations.push_back(kainjow::mustache::object{
{"icon_width", to_string(illustration_size)},
{"icon_height", to_string(illustration_size)},
{"icon_scale", "1"},
});
}
date = gen_date_str();
pugi::xml_document doc;
auto root_node = doc.append_child("feed");
root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom";
root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog";
ADD_TEXT_ENTRY(root_node, "id", id);
ADD_TEXT_ENTRY(root_node, "title", title);
ADD_TEXT_ENTRY(root_node, "updated", date);
if (m_isSearchResult) {
ADD_TEXT_ENTRY(root_node, "totalResults", to_string(m_totalResults));
ADD_TEXT_ENTRY(root_node, "startIndex", to_string(m_startIndex));
ADD_TEXT_ENTRY(root_node, "itemsPerPage", to_string(m_count));
}
auto self_link_node = root_node.append_child("link");
self_link_node.append_attribute("rel") = "self";
self_link_node.append_attribute("href") = "";
self_link_node.append_attribute("type") = "application/atom+xml";
if (!searchDescriptionUrl.empty() ) {
auto search_link = root_node.append_child("link");
search_link.append_attribute("rel") = "search";
search_link.append_attribute("type") = "application/opensearchdescription+xml";
search_link.append_attribute("href") = searchDescriptionUrl.c_str();
}
if (library) {
for (auto& bookId: bookIds) {
handleBook(library->getBookById(bookId), root_node);
}
return illustrations;
}
kainjow::mustache::object getSingleBookData(const Book& book)
{
const MustacheData bookUrl = book.getUrl().empty()
? MustacheData(false)
: MustacheData(book.getUrl());
return kainjow::mustache::object{
{"id", book.getId()},
{"name", book.getName()},
{"title", book.getTitle()},
{"description", book.getDescription()},
{"language", book.getLanguage()},
{"content_id", book.getHumanReadableIdFromPath()},
{"updated", book.getDate() + "T00:00:00Z"},
{"category", book.getCategory()},
{"flavour", book.getFlavour()},
{"tags", book.getTags()},
{"article_count", to_string(book.getArticleCount())},
{"media_count", to_string(book.getMediaCount())},
{"author_name", book.getCreator()},
{"publisher_name", book.getPublisher()},
{"url", bookUrl},
{"size", to_string(book.getSize())},
{"icons", getBookIllustrationInfo(book)},
};
}
std::string getSingleBookEntryXML(const Book& book, bool withXMLHeader, const std::string& endpointRoot, bool partial)
{
auto data = getSingleBookData(book);
data["with_xml_header"] = MustacheData(withXMLHeader);
data["dump_partial_entries"] = MustacheData(partial);
data["endpoint_root"] = endpointRoot;
return render_template(RESOURCE::templates::catalog_v2_entry_xml, data);
}
BooksData getBooksData(const Library* library, const std::vector<std::string>& bookIds, const std::string& endpointRoot, bool partial)
{
BooksData booksData;
for ( const auto& bookId : bookIds ) {
const Book& book = library->getBookById(bookId);
booksData.push_back(kainjow::mustache::object{
{"entry", getSingleBookEntryXML(book, false, endpointRoot, partial)}
});
}
return booksData;
}
std::string getLanguageSelfName(const std::string& lang) {
const icu::Locale locale(lang.c_str());
icu::UnicodeString ustring;
locale.getDisplayLanguage(locale, ustring);
std::string result;
ustring.toUTF8String(result);
return result;
};
} // unnamed namespace
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const
{
const auto booksData = getBooksData(library, bookIds, "", false);
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"root", rootLocation},
{"feed_id", gen_uuid(libraryId + "/catalog/search?"+query)},
{"filter", query.empty() ? MustacheData(false) : MustacheData(query)},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData }
};
return render_template(RESOURCE::templates::catalog_entries_xml, template_data);
}
string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const std::string& query, bool partial) const
{
const auto endpointRoot = rootLocation + "/catalog/v2";
const auto booksData = getBooksData(library, bookIds, endpointRoot, partial);
const char* const endpoint = partial ? "/partial_entries" : "/entries";
const kainjow::mustache::object template_data{
{"date", gen_date_str()},
{"endpoint_root", endpointRoot},
{"feed_id", gen_uuid(libraryId + endpoint + "?" + query)},
{"filter", query.empty() ? MustacheData(false) : MustacheData(query)},
{"query", query.empty() ? "" : "?" + urlEncode(query)},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
{"books", booksData },
{"dump_partial_entries", MustacheData(partial)}
};
return render_template(RESOURCE::templates::catalog_v2_entries_xml, template_data);
}
std::string OPDSDumper::dumpOPDSCompleteEntry(const std::string& bookId) const
{
return getSingleBookEntryXML(library->getBookById(bookId), true, "", false);
}
std::string OPDSDumper::categoriesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list categoryData;
for ( const auto& category : library->getBooksCategories() ) {
const auto urlencodedCategoryName = urlEncode(category);
categoryData.push_back(kainjow::mustache::object{
{"name", category},
{"urlencoded_name", urlencodedCategoryName},
{"updated", now},
{"id", gen_uuid(libraryId + "/categories/" + urlencodedCategoryName)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_categories_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/categories")},
{"categories", categoryData }
}
);
}
std::string OPDSDumper::languagesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
const auto languageSelfName = getLanguageSelfName(languageCode);
languageData.push_back(kainjow::mustache::object{
{"lang_code", languageCode},
{"lang_self_name", languageSelfName},
{"book_count", to_string(bookCount)},
{"updated", now},
{"id", gen_uuid(libraryId + "/languages/" + languageCode)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_languages_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/languages")},
{"languages", languageData }
}
);
return nodeToString(root_node);
}
}

View File

@@ -21,21 +21,51 @@
#include <time.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/item.h>
#include <zim/error.h>
#include "tools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
inline char hi(char v)
{
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v)
{
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID(std::string in)
{
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]);
}
std::string op = out.str();
return op;
}
namespace kiwix
{
/* Constructor */
Reader::Reader(const string zimFilePath)
: zimArchive(nullptr),
zimFilePath(zimFilePath)
Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
{
string tmpZimFilePath = zimFilePath;
@@ -46,43 +76,56 @@ Reader::Reader(const string zimFilePath)
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
}
zimArchive.reset(new zim::Archive(tmpZimFilePath));
this->zimFileHandler = new zim::File(tmpZimFilePath);
if (this->zimFileHandler != NULL) {
this->firstArticleOffset
= this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
this->zimFilePath = zimFilePath;
}
/* initialize random seed: */
srand(time(nullptr));
srand(time(NULL));
}
Reader::Reader(const std::shared_ptr<zim::Archive> archive)
: zimArchive(archive),
zimFilePath(archive->getFilename())
{}
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
zimFilePath("")
/* Destructor */
Reader::~Reader()
{
/* initialize random seed: */
srand(time(nullptr));
if (this->zimFileHandler != NULL) {
delete this->zimFileHandler;
}
}
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
: zimArchive(new zim::Archive(fd, offset, size)),
zimFilePath("")
zim::File* Reader::getZimFileHandler() const
{
/* initialize random seed: */
srand(time(nullptr));
return this->zimFileHandler;
}
#endif // #ifndef _WIN32
zim::Archive* Reader::getZimArchive() const
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
{
return zimArchive.get();
}
std::map<const std::string, unsigned int> counters;
string mimeType, item, counterString;
unsigned int counter;
MimeCounterType Reader::parseCounterMetadata() const
{
return kiwix::parseArchiveCounter(*zimArchive);
zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
if (article.good()) {
stringstream ssContent(article.getData());
while (getline(ssContent, item, ';')) {
stringstream ssItem(item);
getline(ssItem, mimeType, '=');
getline(ssItem, counterString, '=');
if (!counterString.empty() && !mimeType.empty()) {
sscanf(counterString.c_str(), "%u", &counter);
counters.insert(pair<string, int>(mimeType, counter));
}
}
}
return counters;
}
/* Get the count of articles which can be indexed/displayed */
@@ -92,9 +135,12 @@ unsigned int Reader::getArticleCount() const
= this->parseCounterMetadata();
unsigned int counter = 0;
for(auto &pair:counterMap) {
if (startsWith(pair.first, "text/html")) {
counter += pair.second;
if (counterMap.empty()) {
counter = this->nsACount;
} else {
auto it = counterMap.find("text/html");
if (it != counterMap.end()) {
counter = it->second;
}
}
@@ -104,51 +150,143 @@ unsigned int Reader::getArticleCount() const
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
return kiwix::getArchiveMediaCount(*zimArchive);
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
if (counterMap.empty()) {
counter = this->nsICount;
} else {
auto it = counterMap.find("image/jpeg");
if (it != counterMap.end()) {
counter += it->second;
}
it = counterMap.find("image/gif");
if (it != counterMap.end()) {
counter += it->second;
}
it = counterMap.find("image/png");
if (it != counterMap.end()) {
counter += it->second;
}
}
return counter;
}
/* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() const
{
return zimArchive->getEntryCount();
return this->zimFileHandler->getCountArticles();
}
/* Return the UID of the ZIM file */
string Reader::getId() const
{
return kiwix::getArchiveId(*zimArchive);
std::ostringstream s;
s << this->zimFileHandler->getFileheader().getUuid();
return s.str();
}
/* Return a page url from a title */
bool Reader::getPageUrlFromTitle(const string& title, string& url) const
{
try {
auto entry = getEntryFromTitle(title);
entry = entry.getFinalEntry();
url = entry.getPath();
return true;
} catch (NoEntry& e) {
return false;
}
}
/* Return an URL from a title */
string Reader::getRandomPageUrl() const
{
return getRandomPage().getPath();
}
Entry Reader::getRandomPage() const
{
try {
return zimArchive->getRandomEntry();
} catch(...) {
if (!this->zimFileHandler) {
throw NoEntry();
}
zim::Article article;
std::string mainPagePath = this->getMainPage().getPath();
int watchdog = 42;
do {
auto idx = this->firstArticleOffset
+ (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
* this->nsACount);
article = zimFileHandler->getArticle(idx);
if (!watchdog--) {
throw NoEntry();
}
} while (!article.good() && article.getLongUrl() == mainPagePath);
return article;
}
/* Return the welcome page URL */
string Reader::getMainPageUrl() const
{
return getMainPage().getPath();
}
Entry Reader::getMainPage() const
{
return zimArchive->getMainEntry();
if (!this->zimFileHandler) {
throw NoEntry();
}
zim::Article article;
if (this->zimFileHandler->getFileheader().hasMainPage())
{
article = zimFileHandler->getArticle(
this->zimFileHandler->getFileheader().getMainPage());
}
if (!article.good())
{
return getFirstPage();
}
return article;
}
bool Reader::getFavicon(string& content, string& mimeType) const
{
return kiwix::getArchiveFavicon(*zimArchive, 48, content, mimeType);
static const char* const paths[] = {"-/favicon", "-/favicon.png", "I/favicon.png", "I/favicon"};
for (auto &path: paths) {
try {
auto entry = getEntryFromPath(path);
entry = entry.getFinalEntry();
content = entry.getContent();
mimeType = entry.getMimetype();
return true;
} catch(NoEntry& e) {};
}
return false;
}
string Reader::getZimFilePath() const
{
return zimFilePath;
return this->zimFilePath;
}
/* Return a metatag value */
bool Reader::getMetadata(const string& name, string& value) const
{
try {
value = zimArchive->getMetadata(name);
auto entry = getEntryFromPath("M/"+name);
value = entry.getContent();
return true;
} catch(zim::EntryNotFound& e) {
} catch(NoEntry& e) {
return false;
}
}
@@ -157,32 +295,48 @@ bool Reader::getMetadata(const string& name, string& value) const
string Reader::getName() const
{
return kiwix::getMetaName(*zimArchive);
METADATA("Name")
}
string Reader::getTitle() const
{
return kiwix::getArchiveTitle(*zimArchive);
string value;
this->getMetadata("Title", value);
if (value.empty()) {
value = getLastPathElement(zimFileHandler->getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
string Reader::getCreator() const
{
return kiwix::getMetaCreator(*zimArchive);
METADATA("Creator")
}
string Reader::getPublisher() const
{
return kiwix::getMetaPublisher(*zimArchive);
METADATA("Publisher")
}
string Reader::getDate() const
{
return kiwix::getMetaDate(*zimArchive);
METADATA("Date")
}
string Reader::getDescription() const
{
return kiwix::getMetaDescription(*zimArchive);
string value;
this->getMetadata("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
this->getMetadata("Subtitle", value);
}
return value;
}
string Reader::getLongDescription() const
@@ -192,7 +346,7 @@ string Reader::getLongDescription() const
string Reader::getLanguage() const
{
return kiwix::getMetaLanguage(*zimArchive);
METADATA("Language")
}
string Reader::getLicense() const
@@ -202,7 +356,13 @@ string Reader::getLicense() const
string Reader::getTags(bool original) const
{
return kiwix::getMetaTags(*zimArchive, original);
string tags_str;
getMetadata("Tags", tags_str);
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
@@ -225,7 +385,7 @@ string Reader::getRelation() const
string Reader::getFlavour() const
{
return kiwix::getMetaFlavour(*zimArchive);
METADATA("Flavour")
}
string Reader::getSource() const
@@ -239,13 +399,108 @@ string Reader::getScraper() const
}
#undef METADATA
Entry Reader::getEntryFromPath(const std::string& path) const
string Reader::getOrigId() const
{
try {
return kiwix::getEntryFromPath(*zimArchive, path);
} catch (zim::EntryNotFound& e) {
string value;
this->getMetadata("startfileuid", value);
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = hexUUID(tempArray);
return origID;
}
/* Return the first page URL */
string Reader::getFirstPageUrl() const
{
return getFirstPage().getPath();
}
Entry Reader::getFirstPage() const
{
if (!this->zimFileHandler) {
throw NoEntry();
}
auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
auto article = zimFileHandler->getArticle(firstPageOffset);
if (! article.good()) {
throw NoEntry();
}
return article;
}
bool _parseUrl(const string& url, char* ns, string& title)
{
/* Offset to visit the url */
unsigned int urlLength = url.size();
unsigned int offset = 0;
/* Ignore the first '/' */
if (url[offset] == '/')
offset++;
if (url[offset] == '/' || offset >= urlLength)
return false;
/* Get namespace */
*ns = url[offset++];
if (url[offset] != '/' || offset >= urlLength)
return false;
offset++;
if ( offset >= urlLength)
return false;
/* Get content title */
title = url.substr(offset, urlLength - offset);
return true;
}
bool Reader::parseUrl(const string& url, char* ns, string& title) const
{
return _parseUrl(url, ns, title);
}
Entry Reader::getEntryFromPath(const std::string& path) const
{
char ns = 0;
std::string short_url;
if (!this->zimFileHandler) {
throw NoEntry();
}
_parseUrl(path, &ns, short_url);
if (short_url.empty() && ns == 0) {
return getMainPage();
}
auto article = zimFileHandler->getArticle(ns, short_url);
if (!article.good()) {
throw NoEntry();
}
return article;
}
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
@@ -255,30 +510,212 @@ Entry Reader::getEntryFromEncodedPath(const std::string& path) const
Entry Reader::getEntryFromTitle(const std::string& title) const
{
try {
return zimArchive->getEntryByTitle(title);
} catch(zim::EntryNotFound& e) {
if (!this->zimFileHandler) {
throw NoEntry();
}
auto article = this->zimFileHandler->getArticleByTitle('A', title);
if (!article.good()) {
throw NoEntry();
}
return article;
}
/* Return article by url */
bool Reader::getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const
{
if (this->zimFileHandler == NULL) {
return false;
}
/* Parse the url */
char ns = 0;
string urlStr;
_parseUrl(url, &ns, urlStr);
/* Main page */
if (urlStr.empty() && ns == 0) {
_parseUrl(this->getMainPage().getPath(), &ns, urlStr);
}
/* Extract the content from the zim file */
article = zimFileHandler->getArticle(ns, urlStr);
return article.good();
}
/* Return the mimeType without the content */
bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
{
try {
auto entry = getEntryFromPath(url);
mimeType = entry.getMimetype();
return true;
} catch (NoEntry& e) {
mimeType = "";
return false;
}
}
bool get_content_by_decoded_url(const Reader& reader,
const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl)
{
content = "";
contentType = "";
contentLength = 0;
try {
auto entry = reader.getEntryFromPath(url);
entry = entry.getFinalEntry();
baseUrl = entry.getPath();
contentType = entry.getMimetype();
content = entry.getContent();
contentLength = entry.getSize();
title = entry.getTitle();
/* Try to set a stub HTML header/footer if necesssary */
if (contentType.find("text/html") != string::npos
&& content.find("<body") == std::string::npos
&& content.find("<BODY") == std::string::npos) {
content = "<html><head><title>" + title +
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
"charset=utf-8\" /></head><body>" +
content + "</body></html>";
}
return true;
} catch (NoEntry& e) {
return false;
}
}
/* Get a content from a zim file */
bool Reader::getContentByUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
baseUrl);
}
bool Reader::getContentByEncodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
kiwix::urlDecode(url),
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl;
return get_content_by_decoded_url(*this,
url,
content,
title,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string& url,
string& content,
string& title,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
return get_content_by_decoded_url(*this,
url,
content,
title,
contentLength,
contentType,
baseUrl);
}
/* Check if an article exists */
bool Reader::urlExists(const string& url) const
{
return pathExists(url);
}
bool Reader::pathExists(const string& path) const
{
return zimArchive->hasEntryByPath(path);
if (!zimFileHandler)
{
return false;
}
char ns = 0;
string titleStr;
_parseUrl(path, &ns, titleStr);
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
}
/* Does the ZIM file has a fulltext index */
bool Reader::hasFulltextIndex() const
{
return zimArchive->hasFulltextIndex();
if (!zimFileHandler || zimFileHandler->is_multiPart() )
{
return false;
}
return ( pathExists("Z//fulltextIndex/xapian")
|| pathExists("X/fulltext/xapian"));
}
/* Search titles by prefix */
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset)
{
bool retVal = false;
zim::File::const_iterator articleItr;
/* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
if (reset) {
@@ -290,48 +727,39 @@ bool Reader::searchSuggestions(const string& prefix,
}
}
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
bool retVal = false;
/* Return if no prefix */
if (prefix.size() == 0) {
return false;
}
for (auto& entry: zimArchive->findByTitle(prefix)) {
if (results.size() >= suggestionsCount) {
break;
}
for (articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end()
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
&& this->suggestions.size() < suggestionsCount;
++articleItr) {
/* Extract the interesting part of article title & url */
std::string normalizedArticleTitle
= kiwix::normalize(entry.getTitle());
// Get the final path.
auto item = entry.getItem(true);
std::string articleFinalUrl = item.getPath();
= kiwix::normalize(articleItr->getTitle());
std::string articleFinalUrl = "/A/" + articleItr->getUrl();
if (articleItr->isRedirect()) {
zim::Article article = *articleItr;
unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++ < 42) {
article = article.getRedirectArticle();
}
articleFinalUrl = "/A/" + article.getUrl();
}
/* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other
title) */
bool insert = true;
std::vector<SuggestionItem>::iterator suggestionItr;
for (suggestionItr = results.begin();
suggestionItr != results.end();
std::vector<std::vector<std::string>>::iterator suggestionItr;
for (suggestionItr = this->suggestions.begin();
suggestionItr != this->suggestions.end();
suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr).getNormalizedTitle());
if (result == 0 && articleFinalUrl.compare((*suggestionItr).getPath()) == 0) {
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
insert = false;
break;
} else if (result < 0) {
@@ -341,73 +769,72 @@ bool Reader::searchSuggestions(const string& prefix,
/* Insert if possible */
if (insert) {
SuggestionItem suggestion(entry.getTitle(), normalizedArticleTitle, articleFinalUrl);
results.insert(suggestionItr, suggestion);
std::vector<std::string> suggestion;
suggestion.push_back(articleItr->getTitle());
suggestion.push_back(articleFinalUrl);
suggestion.push_back(normalizedArticleTitle);
this->suggestions.insert(suggestionItr, suggestion);
}
/* Suggestions where found */
retVal = true;
}
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return retVal;
}
std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
return kiwix::getTitleVariants(title);
}
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount)
{
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
this->suggestionsOffset = this->suggestions.begin();
return ret;
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
unsigned int suggestionsCount)
{
std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false;
auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
/* Try to search in the title using fulltext search database */
const auto suggestionSearch
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
if (zimArchive->hasTitleIndex()) {
auto suggestionSearch = suggestionSearcher.suggest(prefix);
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
for (auto current : suggestions) {
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
if (suggestionSearch->get_matches_estimated()) {
for (auto current = suggestionSearch->begin();
current != suggestionSearch->end();
current++) {
if (!current->good()) {
continue;
}
std::vector<std::string> suggestion;
suggestion.push_back(current->getTitle());
suggestion.push_back("/A/" + current->getUrl());
suggestion.push_back(kiwix::normalize(current->getTitle()));
this->suggestions.push_back(suggestion);
}
this->suggestionsOffset = this->suggestions.begin();
retVal = true;
} else {
// Check some of the variants of the prefix
for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end();
variantsItr++) {
auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
if (results.size() >= suggestionsCount) {
break;
}
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false)
|| retVal;
}
}
return results.size() > 0;
return retVal;
}
/* Get next suggestion */
@@ -415,7 +842,7 @@ bool Reader::getNextSuggestion(string& title)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
title = (*(this->suggestionsOffset))[0];
/* increment the cursor for the next call */
this->suggestionsOffset++;
@@ -430,8 +857,8 @@ bool Reader::getNextSuggestion(string& title, string& url)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
url = (*(this->suggestionsOffset)).getPath();
title = (*(this->suggestionsOffset))[0];
url = (*(this->suggestionsOffset))[1];
/* increment the cursor for the next call */
this->suggestionsOffset++;
@@ -445,14 +872,14 @@ bool Reader::getNextSuggestion(string& title, string& url)
/* Check if the file has as checksum */
bool Reader::canCheckIntegrity() const
{
return zimArchive->hasChecksum();
return this->zimFileHandler->getChecksum() != "";
}
/* Return true if corrupted, false otherwise */
bool Reader::isCorrupted() const
{
try {
if (zimArchive->check() == true) {
if (this->zimFileHandler->verify() == true) {
return false;
}
} catch (exception& e) {
@@ -466,7 +893,13 @@ bool Reader::isCorrupted() const
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
return kiwix::getArchiveFileSize(*zimArchive);
}
zim::File* file = this->getZimFileHandler();
zim::size_type size = 0;
if (file != NULL) {
size = file->getFilesize();
}
return (size / 1024);
}
}

View File

@@ -30,29 +30,17 @@
#include <mustache.hpp>
#include "kiwixlib-resources.h"
#include "tools/stringTools.h"
namespace kiwix
{
/* Constructor */
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: m_srs(searcher->getSearchResultSet()),
: mp_searcher(searcher),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
estimatedResultCount(searcher->getEstimatedResultCount()),
resultStart(searcher->getResultStart())
{}
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount)
: m_srs(srs),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
estimatedResultCount(estimatedResultCount),
resultStart(start)
searchProtocolPrefix("search://?")
{}
/* Destructor */
@@ -82,48 +70,53 @@ std::string SearchRenderer::getHtml()
{
kainjow::mustache::data results{kainjow::mustache::data::type::list};
for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
mp_searcher->restart_search();
Result* p_result = NULL;
while ((p_result = mp_searcher->getNextResult())) {
kainjow::mustache::data result;
result.set("title", it.getTitle());
result.set("url", it.getPath());
result.set("snippet", it.getSnippet());
std::ostringstream s;
s << it.getZimId();
result.set("resultContentId", mp_nameMapper->getNameForId(s.str()));
result.set("title", p_result->get_title());
result.set("url", p_result->get_url());
result.set("snippet", p_result->get_snippet());
auto readerIndex = p_result->get_readerIndex();
auto reader = mp_searcher->get_reader(readerIndex);
result.set("resultContentId", mp_nameMapper->getNameForId(reader->getId()));
if (it.getWordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(it.getWordCount()));
if (p_result->get_wordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount()));
}
results.push_back(result);
delete p_result;
}
// pages
kainjow::mustache::data pages{kainjow::mustache::data::type::list};
auto resultEnd = 0U;
auto resultStart = mp_searcher->getResultStart();
auto resultEnd = mp_searcher->getResultEnd();
auto resultCountPerPage = resultEnd - resultStart;
auto estimatedResultCount = mp_searcher->getEstimatedResultCount();
auto currentPage = 0U;
auto pageStart = 0U;
auto pageEnd = 0U;
auto lastPageStart = 0U;
if (pageLength) {
currentPage = resultStart/pageLength;
if (resultCountPerPage) {
currentPage = resultStart/resultCountPerPage;
pageStart = currentPage > 4 ? currentPage-4 : 0;
pageEnd = currentPage + 5;
if (pageEnd > estimatedResultCount / pageLength) {
pageEnd = (estimatedResultCount + pageLength - 1) / pageLength;
if (pageEnd > estimatedResultCount / resultCountPerPage) {
pageEnd = estimatedResultCount / resultCountPerPage;
}
if (estimatedResultCount > pageLength) {
lastPageStart = ((estimatedResultCount-1)/pageLength)*pageLength;
if (estimatedResultCount > resultCountPerPage) {
lastPageStart = static_cast<int>(round(estimatedResultCount/resultCountPerPage)) * resultCountPerPage;
}
}
resultEnd = resultStart+pageLength; //setting result end
for (unsigned int i = pageStart; i < pageEnd; i++) {
kainjow::mustache::data page;
page.set("label", to_string(i + 1));
page.set("start", to_string(i * pageLength));
page.set("start", to_string(i * resultCountPerPage));
page.set("end", to_string((i + 1) * resultCountPerPage));
if (i == currentPage) {
page.set("selected", true);
@@ -144,8 +137,9 @@ std::string SearchRenderer::getHtml()
allData.set("searchPatternEncoded", urlEncode(this->searchPattern));
allData.set("resultStart", to_string(resultStart + 1));
allData.set("resultEnd", to_string(min(resultEnd, estimatedResultCount)));
allData.set("pageLength", to_string(pageLength));
allData.set("resultRange", to_string(resultCountPerPage));
allData.set("resultLastPageStart", to_string(lastPageStart));
allData.set("lastResult", to_string(estimatedResultCount));
allData.set("protocolPrefix", this->protocolPrefix);
allData.set("searchProtocolPrefix", this->searchProtocolPrefix);
allData.set("contentId", this->searchContent);

View File

@@ -18,16 +18,14 @@
*/
#include <cmath>
#include "searcher.h"
#include "reader.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
#include <cmath>
#include "tools/stringTools.h"
#include "kiwixlib-resources.h"
#define MAX_SEARCH_LEN 140
@@ -37,8 +35,7 @@ namespace kiwix
class _Result : public Result
{
public:
_Result(zim::SearchResultSet::iterator iterator);
_Result(SuggestionItem suggestionItem);
_Result(zim::Search::iterator& iterator);
virtual ~_Result(){};
virtual std::string get_url();
@@ -48,38 +45,32 @@ class _Result : public Result
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual std::string get_zimId();
virtual int get_readerIndex();
private:
zim::SearchResultSet::iterator iterator;
SuggestionItem suggestionItem;
bool isSuggestion;
zim::Search::iterator iterator;
};
struct SearcherInternal : zim::SearchResultSet {
explicit SearcherInternal(const zim::SearchResultSet& srs)
: zim::SearchResultSet(srs)
, current_iterator(srs.begin())
struct SearcherInternal {
const zim::Search* _search;
zim::Search::iterator current_iterator;
SearcherInternal() : _search(NULL) {}
~SearcherInternal()
{
if (_search != NULL) {
delete _search;
}
}
zim::SearchResultSet::iterator current_iterator;
};
struct SuggestionInternal : zim::SuggestionResultSet {
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
: zim::SuggestionResultSet(srs),
currentIterator(srs.begin()) {}
zim::SuggestionResultSet::iterator currentIterator;
};
/* Constructor */
Searcher::Searcher()
: searchPattern(""),
: internal(new SearcherInternal()),
searchPattern(""),
estimatedResultCount(0),
resultStart(0),
maxResultCount(0)
resultEnd(0)
{
loadICUExternalTables();
}
@@ -87,6 +78,7 @@ Searcher::Searcher()
/* Destructor */
Searcher::~Searcher()
{
delete internal;
}
bool Searcher::add_reader(Reader* reader)
@@ -107,7 +99,7 @@ Reader* Searcher::get_reader(int readerIndex)
/* Search strings in the database */
void Searcher::search(const std::string& search,
unsigned int resultStart,
unsigned int maxResultCount,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
@@ -118,25 +110,25 @@ void Searcher::search(const std::string& search,
this->searchPattern = search;
this->resultStart = resultStart;
this->maxResultCount = maxResultCount;
this->resultEnd = resultEnd;
/* Try to find results */
if (maxResultCount != 0) {
if (resultStart != resultEnd) {
/* Perform the search */
string unaccentedSearch = removeAccents(search);
std::vector<zim::Archive> archives;
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
if ( (*current)->hasFulltextIndex() ) {
archives.push_back(*(*current)->getZimArchive());
zims.push_back((*current)->getZimFileHandler());
}
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setQuery(unaccentedSearch);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
return;
@@ -145,7 +137,7 @@ void Searcher::search(const std::string& search,
void Searcher::geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int maxResultCount,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
@@ -159,53 +151,43 @@ void Searcher::geo_search(float latitude, float longitude, float distance,
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
this->searchPattern = oss.str();
this->resultStart = resultStart;
this->maxResultCount = maxResultCount;
this->resultEnd = resultEnd;
/* Try to find results */
if (maxResultCount == 0) {
if (resultStart == resultEnd) {
return;
}
std::vector<zim::Archive> archives;
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
archives.push_back(*(*current)->getZimArchive());
zims.push_back((*current)->getZimFileHandler());
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setQuery("");
query.setGeorange(latitude, longitude, distance);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query("");
search->set_georange(latitude, longitude, distance);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
void Searcher::restart_search()
{
if (internal.get()) {
internal->current_iterator = internal->begin();
if (internal->_search) {
internal->current_iterator = internal->_search->begin();
}
}
Result* Searcher::getNextResult()
{
if (internal.get() && internal->current_iterator != internal->end()) {
if (internal->_search &&
internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(internal->current_iterator);
internal->current_iterator++;
return result;
} else if (suggestionInternal.get() &&
suggestionInternal->currentIterator != suggestionInternal->end()) {
SuggestionItem item(
suggestionInternal->currentIterator->getTitle(),
normalize(suggestionInternal->currentIterator->getTitle()),
suggestionInternal->currentIterator->getPath(),
suggestionInternal->currentIterator->getSnippet()
);
Result* result = new _Result(item);
suggestionInternal->currentIterator++;
return result;
}
return NULL;
}
@@ -228,16 +210,22 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose)
this->searchPattern = searchPattern;
this->resultStart = 0;
this->maxResultCount = 10;
this->resultEnd = 10;
string unaccentedSearch = removeAccents(searchPattern);
// Multizim suggestion is not supported as of now! taking only one archive
zim::Archive archive = *(*this->readers.begin())->getZimArchive();
zim::SuggestionSearcher searcher(archive);
searcher.setVerbose(verbose);
zim::SuggestionSearch search = searcher.suggest(searchPattern);
suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_verbose(verbose);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
search->set_suggestion_mode(true);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
/* Return the result count estimation */
@@ -246,78 +234,45 @@ unsigned int Searcher::getEstimatedResultCount()
return this->estimatedResultCount;
}
zim::SearchResultSet Searcher::getSearchResultSet()
_Result::_Result(zim::Search::iterator& iterator)
: iterator(iterator)
{
return *(this->internal);
}
_Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator),
suggestionItem("", "", ""),
isSuggestion(false)
{}
_Result::_Result(SuggestionItem item)
: iterator(),
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
isSuggestion(true)
{}
std::string _Result::get_url()
{
if (isSuggestion) {
return suggestionItem.getPath();
}
return iterator.getPath();
return iterator.get_url();
}
std::string _Result::get_title()
{
if (isSuggestion) {
return suggestionItem.getTitle();
}
return iterator.getTitle();
return iterator.get_title();
}
int _Result::get_score()
{
if (isSuggestion) {
return 0;
}
return iterator.getScore();
return iterator.get_score();
}
std::string _Result::get_snippet()
{
if (isSuggestion) {
return suggestionItem.getSnippet();
}
return iterator.getSnippet();
return iterator.get_snippet();
}
std::string _Result::get_content()
{
if (isSuggestion) return "";
return iterator->getItem(true).getData();
if (iterator->good()) {
return iterator->getData();
}
return "";
}
int _Result::get_size()
{
if (isSuggestion) {
return 0;
}
return iterator.getSize();
return iterator.get_size();
}
int _Result::get_wordCount()
{
if (isSuggestion) {
return 0;
}
return iterator.getWordCount();
return iterator.get_wordCount();
}
std::string _Result::get_zimId()
int _Result::get_readerIndex()
{
if (isSuggestion) {
return "";
}
std::ostringstream s;
s << iterator.getZimId();
return s.str();
return iterator.get_fileIndex();
}

View File

@@ -19,15 +19,125 @@
#include "server.h"
#ifdef _WIN32
# if !defined(__MINGW32__) && (_MSC_VER < 1600)
# include "stdint4win.h"
# endif
# include <winsock2.h>
# include <ws2tcpip.h>
# ifdef __GNUC__
// inet_pton is not declared in mingw, even if the function exists.
extern "C" {
WINSOCK_API_LINKAGE INT WSAAPI inet_pton( INT Family, PCSTR pszAddrString, PVOID pAddrBuf);
}
# endif
typedef UINT64 uint64_t;
typedef UINT16 uint16_t;
#endif
extern "C" {
#include <microhttpd.h>
}
#include "tools/otherTools.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "library.h"
#include "name_mapper.h"
#include "entry.h"
#include "searcher.h"
#include "search_renderer.h"
#include "opds_dumper.h"
#include <zim/uuid.h>
#include <mustache.hpp>
#include <pthread.h>
#include <atomic>
#include <string>
#include <vector>
#include <chrono>
#include "kiwixlib-resources.h"
#include "server/internalServer.h"
#ifndef _WIN32
# include <arpa/inet.h>
#endif
#include "server/request_context.h"
#include "server/response.h"
#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
namespace kiwix {
static IdNameMapper defaultNameMapper;
static int staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
class InternalServer {
public:
InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton);
virtual ~InternalServer() = default;
int handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
bool start();
void stop();
private:
Response handle_request(const RequestContext& request);
Response build_500(const std::string& msg);
Response build_404(const RequestContext& request, const std::string& zimName);
Response build_homepage(const RequestContext& request);
Response handle_skin(const RequestContext& request);
Response handle_catalog(const RequestContext& request);
Response handle_meta(const RequestContext& request);
Response handle_search(const RequestContext& request);
Response handle_suggest(const RequestContext& request);
Response handle_random(const RequestContext& request);
Response handle_content(const RequestContext& request);
kainjow::mustache::data get_default_data();
Response get_default_response();
std::string m_addr;
int m_port;
std::string m_root;
int m_nbThreads;
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
struct MHD_Daemon* mp_daemon;
Library* mp_library;
NameMapper* mp_nameMapper;
};
Server::Server(Library* library, NameMapper* nameMapper) :
mp_library(library),
mp_nameMapper(nameMapper),
@@ -47,17 +157,13 @@ bool Server::start() {
m_nbThreads,
m_verbose,
m_withTaskbar,
m_withLibraryButton,
m_blockExternalLinks,
m_indexTemplateString));
m_withLibraryButton));
return mp_server->start();
}
void Server::stop() {
if (mp_server) {
mp_server->stop();
mp_server.reset(nullptr);
}
mp_server->stop();
mp_server.reset(nullptr);
}
void Server::setRoot(const std::string& root)
@@ -71,4 +177,722 @@ void Server::setRoot(const std::string& root)
}
}
InternalServer::InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton) :
m_addr(addr),
m_port(port),
m_root(root),
m_nbThreads(nbThreads),
m_verbose(verbose),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
{}
bool InternalServer::start() {
#ifdef _WIN32
int flags = MHD_USE_SELECT_INTERNALLY;
#else
int flags = MHD_USE_POLL_INTERNALLY;
#endif
if (m_verbose.load())
flags |= MHD_USE_DEBUG;
struct sockaddr_in sockAddr;
memset(&sockAddr, 0, sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(m_port);
if (m_addr.empty()) {
if (0 != INADDR_ANY)
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
} else {
if (inet_pton(AF_INET, m_addr.c_str(), &(sockAddr.sin_addr.s_addr)) == 0) {
std::cerr << "Ip address " << m_addr << " is not a valid ip address" << std::endl;
return false;
}
}
mp_daemon = MHD_start_daemon(flags,
m_port,
NULL,
NULL,
&staticHandlerCallback,
this,
MHD_OPTION_SOCK_ADDR, &sockAddr,
MHD_OPTION_THREAD_POOL_SIZE, m_nbThreads,
MHD_OPTION_END);
if (mp_daemon == nullptr) {
std::cerr << "Unable to instantiate the HTTP daemon. The port " << m_port
<< " is maybe already occupied or need more permissions to be open. "
"Please try as root or with a port number higher or equal to 1024."
<< std::endl;
return false;
}
return true;
}
void InternalServer::stop()
{
MHD_stop_daemon(mp_daemon);
}
static int staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
InternalServer* _this = static_cast<InternalServer*>(cls);
return _this->handlerCallback(connection,
url,
method,
version,
upload_data,
upload_data_size,
cont_cls);
}
int InternalServer::handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
auto start_time = std::chrono::steady_clock::now();
if (m_verbose.load() ) {
printf("======================\n");
printf("Requesting : \n");
printf("full_url : %s\n", url);
}
RequestContext request(connection, m_root, url, method, version);
if (m_verbose.load() ) {
request.print_debug_info();
}
/* Unexpected method */
if (request.get_method() != RequestMethod::GET
&& request.get_method() != RequestMethod::POST) {
printf("Reject request because of unhandled request method.\n");
printf("----------------------\n");
return MHD_NO;
}
auto response = handle_request(request);
if (response.getReturnCode() == MHD_HTTP_INTERNAL_SERVER_ERROR) {
printf("========== INTERNAL ERROR !! ============\n");
if (!m_verbose.load()) {
printf("Requesting : \n");
printf("full_url : %s\n", url);
request.print_debug_info();
}
}
auto ret = response.send(request, connection);
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
if (m_verbose.load()) {
printf("Request time : %fs\n", time_span.count());
printf("----------------------\n");
}
return ret;
}
Response InternalServer::handle_request(const RequestContext& request)
{
try {
if (! request.is_valid_url())
return build_404(request, "");
if (kiwix::startsWith(request.get_url(), "/skin/"))
return handle_skin(request);
if (startsWith(request.get_url(), "/catalog"))
return handle_catalog(request);
if (request.get_url() == "/meta")
return handle_meta(request);
if (request.get_url() == "/search")
return handle_search(request);
if (request.get_url() == "/suggest")
return handle_suggest(request);
if (request.get_url() == "/random")
return handle_random(request);
return handle_content(request);
} catch (std::exception& e) {
fprintf(stderr, "===== Unhandled error : %s\n", e.what());
return build_500(e.what());
} catch (...) {
fprintf(stderr, "===== Unhandled unknown error\n");
return build_500("Unknown error");
}
}
kainjow::mustache::data InternalServer::get_default_data()
{
kainjow::mustache::data data;
data.set("root", m_root);
return data;
}
Response InternalServer::get_default_response()
{
return Response(m_root, m_verbose.load(), m_withTaskbar, m_withLibraryButton);
}
Response InternalServer::build_404(const RequestContext& request,
const std::string& bookName)
{
kainjow::mustache::data results;
results.set("url", request.get_full_url());
auto response = get_default_response();
response.set_template(RESOURCE::templates::_404_html, results);
response.set_mimeType("text/html");
response.set_code(MHD_HTTP_NOT_FOUND);
response.set_compress(true);
response.set_taskbar(bookName, "");
return response;
}
Response InternalServer::build_500(const std::string& msg)
{
kainjow::mustache::data data;
data.set("error", msg);
Response response(m_root, true, false, false);
response.set_template(RESOURCE::templates::_500_html, data);
response.set_mimeType("text/html");
response.set_code(MHD_HTTP_INTERNAL_SERVER_ERROR);
return response;
}
Response InternalServer::build_homepage(const RequestContext& request)
{
auto data = get_default_data();
kainjow::mustache::data books{kainjow::mustache::data::type::list};
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto& currentBook = mp_library->getBookById(bookId);
kainjow::mustache::data book;
book.set("name", mp_nameMapper->getNameForId(bookId));
book.set("title", currentBook.getTitle());
book.set("description", currentBook.getDescription());
book.set("articleCount", beautifyInteger(currentBook.getArticleCount()));
book.set("mediaCount", beautifyInteger(currentBook.getMediaCount()));
books.push_back(book);
}
data.set("books", books);
auto response = get_default_response();
response.set_template(RESOURCE::templates::index_html, data);
response.set_mimeType("text/html; charset=utf-8");
response.set_compress(true);
response.set_taskbar("", "");
return response;
}
Response InternalServer::handle_meta(const RequestContext& request)
{
std::string bookName;
std::string bookId;
std::string meta_name;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
meta_name = request.get_argument("name");
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
return build_404(request, bookName);
}
if (reader == nullptr) {
return build_404(request, bookName);
}
std::string content;
std::string mimeType = "text";
if (meta_name == "title") {
content = reader->getTitle();
} else if (meta_name == "description") {
content = reader->getDescription();
} else if (meta_name == "language") {
content = reader->getLanguage();
} else if (meta_name == "name") {
content = reader->getName();
} else if (meta_name == "tags") {
content = reader->getTags();
} else if (meta_name == "date") {
content = reader->getDate();
} else if (meta_name == "creator") {
content = reader->getCreator();
} else if (meta_name == "publisher") {
content = reader->getPublisher();
} else if (meta_name == "favicon") {
reader->getFavicon(content, mimeType);
} else {
return build_404(request, bookName);
}
auto response = get_default_response();
response.set_content(content);
response.set_mimeType(mimeType);
response.set_compress(false);
response.set_cache(true);
return response;
}
Response InternalServer::handle_suggest(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_suggest\n");
}
std::string content;
std::string mimeType;
unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0;
std::string suggestion;
std::string bookName;
std::string bookId;
std::string term;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
term = request.get_argument("term");
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {
return build_404(request, bookName);
}
if (m_verbose.load()) {
printf("Searching suggestions for: \"%s\"\n", term.c_str());
}
kainjow::mustache::data results{kainjow::mustache::data::type::list};
bool first = true;
if (reader != nullptr) {
/* Get the suggestions */
reader->searchSuggestionsSmart(term, maxSuggestionCount);
while (reader->getNextSuggestion(suggestion)) {
kainjow::mustache::data result;
result.set("label", suggestion);
result.set("value", suggestion);
result.set("first", first);
first = false;
results.push_back(result);
suggestionCount++;
}
}
/* Propose the fulltext search if possible */
if (reader->hasFulltextIndex()) {
kainjow::mustache::data result;
result.set("label", "containing '" + term + "'...");
result.set("value", term + " ");
result.set("first", first);
results.push_back(result);
}
auto data = get_default_data();
data.set("suggestions", results);
auto response = get_default_response();
response.set_template(RESOURCE::templates::suggestion_json, data);
response.set_mimeType("application/json; charset=utf-8");
response.set_compress(true);
return response;
}
Response InternalServer::handle_skin(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_skin\n");
}
auto response = get_default_response();
auto resourceName = request.get_url().substr(1);
try {
response.set_content(getResource(resourceName));
} catch (const ResourceNotFound& e) {
return build_404(request, "");
}
response.set_mimeType(getMimeTypeForFile(resourceName));
response.set_compress(true);
response.set_cache(true);
return response;
}
Response InternalServer::handle_search(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_search\n");
}
std::string bookName;
std::string bookId;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
} catch (const std::out_of_range&) {}
std::string patternString;
try {
patternString = request.get_argument("pattern");
} catch (const std::out_of_range&) {}
/* Retrive geo search */
bool has_geo_query = false;
float latitude = 0;
float longitude = 0;
float distance = 0;
try {
latitude = request.get_argument<float>("latitude");
longitude = request.get_argument<float>("longitude");
distance = request.get_argument<float>("distance");
has_geo_query = true;
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}
std::shared_ptr<Reader> reader(nullptr);
try {
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {}
/* Try first to load directly the article */
if (reader != nullptr && !patternString.empty()) {
std::string patternCorrespondingUrl;
auto variants = reader->getTitleVariants(patternString);
auto variantsItr = variants.begin();
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
try {
auto entry = reader->getEntryFromTitle(*variantsItr);
entry = entry.getFinalEntry();
patternCorrespondingUrl = entry.getPath();
break;
} catch(kiwix::NoEntry& e) {
variantsItr++;
}
}
/* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) {
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" + patternCorrespondingUrl);
return response;
}
}
/* Make the search */
auto response = get_default_response();
response.set_mimeType("text/html; charset=utf-8");
response.set_taskbar(bookName, reader ? reader->getTitle() : "");
response.set_compress(true);
if ( (!reader && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data();
data.set("pattern", encodeDiples(patternString));
response.set_template(RESOURCE::templates::no_search_result_html, data);
response.set_code(MHD_HTTP_NOT_FOUND);
return response;
}
Searcher searcher;
if (reader) {
searcher.add_reader(reader.get());
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentReader = mp_library->getReaderById(bookId);
if (currentReader) {
searcher.add_reader(currentReader.get());
}
}
}
auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}
auto end = 25;
try {
end = request.get_argument<unsigned int>("end");
} catch (const std::exception&) {}
if (start>end) {
auto tmp = start;
start = end;
end = tmp;
}
if (end > start + MAX_SEARCH_LEN) {
end = start + MAX_SEARCH_LEN;
}
/* Get the results */
try {
if (patternString.empty()) {
searcher.geo_search(latitude, longitude, distance,
start, end, m_verbose.load());
} else {
searcher.search(patternString,
start, end, m_verbose.load());
}
SearchRenderer renderer(&searcher, mp_nameMapper);
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?");
response.set_content(renderer.getHtml());
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}
return response;
}
Response InternalServer::handle_random(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_random\n");
}
std::string bookName;
std::string bookId;
std::shared_ptr<Reader> reader;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range&) {
return build_404(request, bookName);
}
if (reader == nullptr) {
return build_404(request, bookName);
}
try {
auto entry = reader->getRandomPage();
entry = entry.getFinalEntry();
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath()));
return response;
} catch(kiwix::NoEntry& e) {
return build_404(request, bookName);
}
}
Response InternalServer::handle_catalog(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog");
}
std::string host;
std::string url;
try {
host = request.get_header("Host");
url = request.get_url_part(1);
} catch (const std::out_of_range&) {
return build_404(request, "");
}
if (url != "searchdescription.xml" && url != "root.xml" && url != "search") {
return build_404(request, "");
}
auto response = get_default_response();
response.set_compress(true);
if (url == "searchdescription.xml") {
response.set_template(RESOURCE::opensearchdescription_xml, get_default_data());
response.set_mimeType("application/opensearchdescription+xml");
return response;
}
zim::Uuid uuid;
kiwix::OPDSDumper opdsDumper;
opdsDumper.setRootLocation(m_root);
opdsDumper.setSearchDescriptionUrl("catalog/searchdescription.xml");
opdsDumper.setLibrary(mp_library);
response.set_mimeType("application/atom+xml;profile=opds-catalog;kind=acquisition; charset=utf-8");
std::vector<std::string> bookIdsToDump;
if (url == "root.xml") {
opdsDumper.setTitle("All zims");
uuid = zim::Uuid::generate(host);
bookIdsToDump = mp_library->filter(kiwix::Filter().valid(true).local(true).remote(true));
} else if (url == "search") {
auto filter = kiwix::Filter().valid(true).local(true).remote(true);
string query("<Empty query>");
size_t count(10);
size_t startIndex(0);
try {
query = request.get_argument("q");
filter.query(query);
} catch (const std::out_of_range&) {}
try {
filter.maxSize(extractFromString<unsigned long>(request.get_argument("maxsize")));
} catch (...) {}
try {
filter.name(request.get_argument("name"));
} catch (const std::out_of_range&) {}
try {
filter.lang(request.get_argument("lang"));
} catch (const std::out_of_range&) {}
try {
count = extractFromString<unsigned long>(request.get_argument("count"));
} catch (...) {}
try {
startIndex = extractFromString<unsigned long>(request.get_argument("start"));
} catch (...) {}
try {
filter.acceptTags(kiwix::split(request.get_argument("tag"), ";"));
} catch (...) {}
try {
filter.rejectTags(kiwix::split(request.get_argument("notag"), ";"));
} catch (...) {}
opdsDumper.setTitle("Search result for " + query);
uuid = zim::Uuid::generate();
bookIdsToDump = mp_library->filter(filter);
auto totalResults = bookIdsToDump.size();
bookIdsToDump.erase(bookIdsToDump.begin(), bookIdsToDump.begin()+startIndex);
if (count>0 && bookIdsToDump.size() > count) {
bookIdsToDump.resize(count);
}
opdsDumper.setOpenSearchInfo(totalResults, startIndex, bookIdsToDump.size());
}
opdsDumper.setId(kiwix::to_string(uuid));
response.set_content(opdsDumper.dumpOPDSFeed(bookIdsToDump));
return response;
}
Response InternalServer::handle_content(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_content\n");
}
std::string baseUrl;
std::string content;
std::string mimeType;
kiwix::Entry entry;
std::string bookName;
try {
bookName = request.get_url_part(0);
} catch (const std::out_of_range& e) {
return build_homepage(request);
}
if (bookName.empty())
return build_homepage(request);
std::string bookId;
std::shared_ptr<Reader> reader;
try {
bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
return build_404(request, bookName);
}
if (reader == nullptr) {
return build_404(request, bookName);
}
auto urlStr = request.get_url().substr(bookName.size()+1);
if (urlStr[0] == '/') {
urlStr = urlStr.substr(1);
}
try {
entry = reader->getEntryFromPath(urlStr);
if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page.
entry = entry.getFinalEntry();
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" +
kiwix::urlEncode(entry.getPath()));
return response;
}
} catch(kiwix::NoEntry& e) {
if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str());
return build_404(request, bookName);
}
try {
mimeType = entry.getMimetype();
} catch (exception& e) {
mimeType = "application/octet-stream";
}
if (m_verbose.load()) {
printf("Found %s\n", urlStr.c_str());
printf("mimeType: %s\n", mimeType.c_str());
}
if (mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/json") != string::npos) {
zim::Blob raw_content = entry.getBlob();
content = string(raw_content.data(), raw_content.size());
auto response = get_default_response();
if (mimeType.find("text/html") != string::npos)
response.set_taskbar(bookName, reader->getTitle());
response.set_mimeType(mimeType);
response.set_content(content);
response.set_compress(true);
response.set_cache(true);
return response;
} else {
int range_len;
if (request.get_range().second == -1) {
range_len = entry.getSize() - request.get_range().first;
} else {
range_len = request.get_range().second - request.get_range().first;
}
auto response = get_default_response();
response.set_entry(entry);
response.set_mimeType(mimeType);
response.set_range_first(request.get_range().first);
response.set_range_len(range_len);
response.set_cache(true);
return response;
}
}
}

View File

@@ -1,127 +0,0 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "byte_range.h"
#include "tools/stringTools.h"
#include <cassert>
#include <algorithm>
namespace kiwix {
namespace {
ByteRange parseByteRange(const std::string& rangeStr)
{
std::istringstream iss(rangeStr);
int64_t start, end = INT64_MAX;
if (iss >> start) {
if ( start < 0 ) {
if ( iss.eof() )
return ByteRange(-start);
} else {
char c;
if (iss >> c && c=='-') {
iss >> end; // if this fails, end is not modified, which is OK
if (iss.eof() && start <= end)
return ByteRange(ByteRange::PARSED, start, end);
}
}
}
return ByteRange(ByteRange::INVALID, 0, INT64_MAX);
}
} // unnamed namespace
ByteRange::ByteRange()
: kind_(NONE)
, first_(0)
, last_(INT64_MAX)
{}
ByteRange::ByteRange(Kind kind, int64_t first, int64_t last)
: kind_(kind)
, first_(first)
, last_(last)
{
assert(kind != NONE);
assert(first >= 0);
assert(last >= first || (first == 0 && last == -1));
}
ByteRange::ByteRange(int64_t suffix_length)
: kind_(PARSED)
, first_(-suffix_length)
, last_(INT64_MAX)
{
assert(suffix_length > 0);
}
int64_t ByteRange::first() const
{
assert(kind_ > PARSED);
return first_;
}
int64_t ByteRange::last() const
{
assert(kind_ > PARSED);
return last_;
}
int64_t ByteRange::length() const
{
assert(kind_ > PARSED);
return last_ + 1 - first_;
}
ByteRange ByteRange::parse(const std::string& rangeStr)
{
const std::string byteUnitSpec("bytes=");
if ( ! kiwix::startsWith(rangeStr, byteUnitSpec) )
return ByteRange(INVALID, 0, INT64_MAX);
return parseByteRange(rangeStr.substr(byteUnitSpec.size()));
}
ByteRange ByteRange::resolve(int64_t contentSize) const
{
if ( kind() == NONE )
return ByteRange(RESOLVED_FULL_CONTENT, 0, contentSize-1);
if ( kind() == INVALID )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
const int64_t resolved_first = first_ < 0
? std::max(int64_t(0), contentSize + first_)
: first_;
const int64_t resolved_last = std::min(contentSize-1, last_);
if ( resolved_first > resolved_last )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
return ByteRange(RESOLVED_PARTIAL_CONTENT, resolved_first, resolved_last);
}
} // namespace kiwix

View File

@@ -1,86 +0,0 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_BYTE_RANGE_H
#define KIWIXLIB_SERVER_BYTE_RANGE_H
#include <cstdint>
#include <string>
namespace kiwix {
class ByteRange
{
public: // types
// ByteRange is parsed in a request, then it must be resolved (taking
// into account the actual size of the requested resource) before
// being applied in the response.
// The Kind enum represents possible states in such a lifecycle.
enum Kind {
// The request is not a range request (no Range header)
NONE,
// The value of the Range header is not a valid continuous
// range. Note that a valid (according to RFC7233) sequence of multiple
// byte ranges is considered invalid in the current implementation
// (i.e. only single-range partial requests are supported).
INVALID,
// This byte-range has been successfully parsed from the request
PARSED,
// This is a response to a regular (non-range) request
RESOLVED_FULL_CONTENT,
// The range request is invalid or unsatisfiable
RESOLVED_UNSATISFIABLE,
// This is a response to a (satisfiable) range request
RESOLVED_PARTIAL_CONTENT,
};
public: // functions
// Constructs a ByteRange object of NONE kind
ByteRange();
// Constructs a ByteRange object of the given kind (except NONE)
ByteRange(Kind kind, int64_t first, int64_t last);
// Constructs a ByteRange object of PARSED kind corresponding to a
// range request of the form "Range: bytes=-suffix_length"
explicit ByteRange(int64_t suffix_length);
Kind kind() const { return kind_; }
int64_t first() const;
int64_t last() const;
int64_t length() const;
static ByteRange parse(const std::string& rangeStr);
ByteRange resolve(int64_t contentSize) const;
private: // data
Kind kind_;
int64_t first_;
int64_t last_;
};
} // namespace kiwix
#endif //KIWIXLIB_SERVER_BYTE_RANGE_H

View File

@@ -1,135 +0,0 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "etag.h"
#include "tools/stringTools.h"
#include <algorithm>
#include <sstream>
namespace kiwix {
namespace {
// Characters in the options part of the ETag could in principle be picked up
// from the latin alphabet in natural order (the character corresponding to
// ETag::Option opt would be 'a'+opt; that would somewhat simplify the code in
// this file). However it is better to have some mnemonics in the option names,
// hence below variable: all_options[opt] corresponds to the character going
// into the ETag for ETag::Option opt.
// IMPORTANT: The characters in all_options must come in sorted order (so that
// IMPORTANT: isValidOptionsString() works correctly).
const char all_options[] = "cz";
static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, "");
bool isValidServerId(const std::string& s)
{
return !s.empty() && s.find_first_of("\"/") == std::string::npos;
}
bool isSubsequenceOf(const std::string& s, const std::string& sortedString)
{
std::string::size_type i = 0;
for ( const char c : s )
{
const std::string::size_type j = sortedString.find(c, i);
if ( j == std::string::npos )
return false;
i = j+1;
}
return true;
}
bool isValidOptionsString(const std::string& s)
{
return isSubsequenceOf(s, all_options);
}
} // namespace
void ETag::set_option(Option opt)
{
if ( ! get_option(opt) )
{
m_options.push_back(all_options[opt]);
std::sort(m_options.begin(), m_options.end());
}
}
bool ETag::get_option(Option opt) const
{
return m_options.find(all_options[opt]) != std::string::npos;
}
std::string ETag::get_etag() const
{
if ( m_serverId.empty() )
return std::string();
return "\"" + m_serverId + "/" + m_options + "\"";
}
ETag::ETag(const std::string& serverId, const std::string& options)
{
if ( isValidServerId(serverId) && isValidOptionsString(options) )
{
m_serverId = serverId;
m_options = options;
}
}
ETag ETag::parse(std::string s)
{
if ( kiwix::startsWith("W/", s) )
s = s.substr(2);
if ( s.front() != '"' || s.back() != '"' )
return ETag();
s = s.substr(1, s.size()-2);
const std::string::size_type i = s.find('/');
if ( i == std::string::npos )
return ETag();
return ETag(s.substr(0, i), s.substr(i+1));
}
ETag ETag::match(const std::string& etags, const std::string& server_id)
{
std::istringstream ss(etags);
std::string etag_str;
while ( ss >> etag_str )
{
if ( etag_str.back() == ',' )
etag_str.pop_back();
const ETag etag = parse(etag_str);
if ( etag && etag.m_serverId == server_id )
return etag;
}
return ETag();
}
} // namespace kiwix

View File

@@ -1,85 +0,0 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_ETAG_H
#define KIWIXLIB_SERVER_ETAG_H
#include <string>
namespace kiwix {
// The ETag string used by Kiwix server (more precisely, its value inside the
// double quotes) consists of two parts:
//
// 1. ServerId - The string obtained on server start up
//
// 2. Options - Zero or more characters encoding the values of some of the
// headers of the response
//
// The two parts are separated with a slash (/) symbol (which is always present,
// even when the the options part is empty). Neither portion of a Kiwix ETag
// may contain the slash symbol.
// Examples of valid Kiwix server ETags (including the double quotes):
//
// "abcdefghijklmn/"
// "1234567890/z"
// "1234567890/cz"
//
// The options part of the Kiwix ETag allows to correctly set the required
// headers when responding to a conditional If-None-Match request with a 304
// (Not Modified) response without following the full code path that would
// discover the necessary options.
class ETag
{
public: // types
enum Option {
CACHEABLE_ENTITY,
COMPRESSED_CONTENT,
OPTION_COUNT
};
public: // functions
ETag() {}
void set_server_id(const std::string& id) { m_serverId = id; }
void set_option(Option opt);
explicit operator bool() const { return !m_serverId.empty(); }
bool get_option(Option opt) const;
std::string get_etag() const;
static ETag match(const std::string& etags, const std::string& server_id);
private: // functions
ETag(const std::string& serverId, const std::string& options);
static ETag parse(std::string s);
private: // data
std::string m_serverId;
std::string m_options;
};
} // namespace kiwix
#endif // KIWIXLIB_SERVER_ETAG_H

View File

@@ -1,894 +0,0 @@
/*
* Copyright 2019 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "internalServer.h"
#ifdef __FreeBSD__
#include <netinet/in.h>
#endif
#ifdef _WIN32
# if !defined(__MINGW32__) && (_MSC_VER < 1600)
# include "stdint4win.h"
# endif
# include <winsock2.h>
# include <ws2tcpip.h>
# ifdef __GNUC__
// inet_pton is not declared in mingw, even if the function exists.
extern "C" {
WINSOCK_API_LINKAGE INT WSAAPI inet_pton( INT Family, PCSTR pszAddrString, PVOID pAddrBuf);
}
# endif
typedef UINT64 uint64_t;
typedef UINT16 uint16_t;
#endif
extern "C" {
#include "microhttpd_wrapper.h"
}
#include "tools.h"
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "tools/archiveTools.h"
#include "library.h"
#include "name_mapper.h"
#include "entry.h"
#include "searcher.h"
#include "search_renderer.h"
#include "opds_dumper.h"
#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <mustache.hpp>
#include <atomic>
#include <string>
#include <vector>
#include <chrono>
#include "kiwixlib-resources.h"
#ifndef _WIN32
# include <arpa/inet.h>
#endif
#include "request_context.h"
#include "response.h"
#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
namespace kiwix {
namespace
{
inline std::string normalizeRootUrl(std::string rootUrl)
{
while ( !rootUrl.empty() && rootUrl.back() == '/' )
rootUrl.pop_back();
while ( !rootUrl.empty() && rootUrl.front() == '/' )
rootUrl = rootUrl.substr(1);
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}
unsigned parseIllustration(const std::string& s)
{
int nw(0), nh(0), nEnd(0);
long int w(-1), h(-1);
if ( sscanf(s.c_str(), "Illustration_%n%ldx%n%ld@1%n)", &nw, &w, &nh, &h, &nEnd) == 2
&& nEnd == (int)s.size() && !isspace(s[nw]) && !isspace(s[nh]) && w == h && w >= 0) {
return w;
}
return 0;
}
} // unnamed namespace
static IdNameMapper defaultNameMapper;
static MHD_Result staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
InternalServer::InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks,
std::string indexTemplateString) :
m_addr(addr),
m_port(port),
m_root(normalizeRootUrl(root)),
m_nbThreads(nbThreads),
m_verbose(verbose),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_indexTemplateString(indexTemplateString.empty() ? RESOURCE::templates::index_html : indexTemplateString),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
{}
bool InternalServer::start() {
#ifdef _WIN32
int flags = MHD_USE_SELECT_INTERNALLY;
#else
int flags = MHD_USE_POLL_INTERNALLY;
#endif
if (m_verbose.load())
flags |= MHD_USE_DEBUG;
struct sockaddr_in sockAddr;
memset(&sockAddr, 0, sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(m_port);
if (m_addr.empty()) {
if (0 != INADDR_ANY)
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
} else {
if (inet_pton(AF_INET, m_addr.c_str(), &(sockAddr.sin_addr.s_addr)) == 0) {
std::cerr << "Ip address " << m_addr << " is not a valid ip address" << std::endl;
return false;
}
}
mp_daemon = MHD_start_daemon(flags,
m_port,
NULL,
NULL,
&staticHandlerCallback,
this,
MHD_OPTION_SOCK_ADDR, &sockAddr,
MHD_OPTION_THREAD_POOL_SIZE, m_nbThreads,
MHD_OPTION_END);
if (mp_daemon == nullptr) {
std::cerr << "Unable to instantiate the HTTP daemon. The port " << m_port
<< " is maybe already occupied or need more permissions to be open. "
"Please try as root or with a port number higher or equal to 1024."
<< std::endl;
return false;
}
auto server_start_time = std::chrono::system_clock::now().time_since_epoch();
m_server_id = kiwix::to_string(server_start_time.count());
m_library_id = m_server_id;
return true;
}
void InternalServer::stop()
{
MHD_stop_daemon(mp_daemon);
}
static MHD_Result staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
InternalServer* _this = static_cast<InternalServer*>(cls);
return _this->handlerCallback(connection,
url,
method,
version,
upload_data,
upload_data_size,
cont_cls);
}
MHD_Result InternalServer::handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls)
{
auto start_time = std::chrono::steady_clock::now();
if (m_verbose.load() ) {
printf("======================\n");
printf("Requesting : \n");
printf("full_url : %s\n", url);
}
RequestContext request(connection, m_root, url, method, version);
if (m_verbose.load() ) {
request.print_debug_info();
}
/* Unexpected method */
if (request.get_method() != RequestMethod::GET
&& request.get_method() != RequestMethod::POST
&& request.get_method() != RequestMethod::HEAD) {
printf("Reject request because of unhandled request method.\n");
printf("----------------------\n");
return MHD_NO;
}
auto response = handle_request(request);
if (response->getReturnCode() == MHD_HTTP_INTERNAL_SERVER_ERROR) {
printf("========== INTERNAL ERROR !! ============\n");
if (!m_verbose.load()) {
printf("Requesting : \n");
printf("full_url : %s\n", url);
request.print_debug_info();
}
}
if (response->getReturnCode() == MHD_HTTP_OK && !etag_not_needed(request))
response->set_server_id(m_server_id);
auto ret = response->send(request, connection);
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
if (m_verbose.load()) {
printf("Request time : %fs\n", time_span.count());
printf("----------------------\n");
}
return ret;
}
std::unique_ptr<Response> InternalServer::handle_request(const RequestContext& request)
{
try {
if (! request.is_valid_url())
return Response::build_404(*this, request, "", "");
const ETag etag = get_matching_if_none_match_etag(request);
if ( etag )
return Response::build_304(*this, etag);
if (startsWith(request.get_url(), "/skin/"))
return handle_skin(request);
if (startsWith(request.get_url(), "/catalog/"))
return handle_catalog(request);
if (request.get_url() == "/meta")
return handle_meta(request);
if (request.get_url() == "/search")
return handle_search(request);
if (request.get_url() == "/suggest")
return handle_suggest(request);
if (request.get_url() == "/random")
return handle_random(request);
if (request.get_url() == "/catch/external")
return handle_captured_external(request);
return handle_content(request);
} catch (std::exception& e) {
fprintf(stderr, "===== Unhandled error : %s\n", e.what());
return Response::build_500(*this, e.what());
} catch (...) {
fprintf(stderr, "===== Unhandled unknown error\n");
return Response::build_500(*this, "Unknown error");
}
}
MustacheData InternalServer::get_default_data() const
{
MustacheData data;
data.set("root", m_root);
return data;
}
bool InternalServer::etag_not_needed(const RequestContext& request) const
{
const std::string url = request.get_url();
return kiwix::startsWith(url, "/catalog")
|| url == "/search"
|| url == "/suggest"
|| url == "/random"
|| url == "/catch/external";
}
ETag
InternalServer::get_matching_if_none_match_etag(const RequestContext& r) const
{
try {
const std::string etag_list = r.get_header(MHD_HTTP_HEADER_IF_NONE_MATCH);
return ETag::match(etag_list, m_server_id);
} catch (const std::out_of_range&) {
return ETag();
}
}
std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& request)
{
return ContentResponse::build(*this, m_indexTemplateString, get_default_data(), "text/html; charset=utf-8", true);
}
/**
* Archive and Zim handlers begin
**/
// TODO: retrieve searcher from caching mechanism
SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int start, int suggestionCount)
{
SuggestionsList_t suggestions;
auto searcher = zim::SuggestionSearcher(*archive);
if (archive->hasTitleIndex()) {
auto search = searcher.suggest(queryString);
auto srs = search.getResults(start, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet());
suggestions.push_back(suggestion);
}
} else {
// TODO: This case should be handled by libzim
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
auto search = searcher.suggest(queryString);
auto srs = search.getResults(0, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath());
suggestions.push_back(suggestion);
currCount++;
}
}
}
return suggestions;
}
/**
* Archive and Zim handlers end
**/
std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& request)
{
std::string bookName;
std::string bookId;
std::string meta_name;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
meta_name = request.get_argument("name");
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) {
return Response::build_404(*this, request, bookName, "");
}
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
std::string content;
std::string mimeType = "text";
if (meta_name == "title") {
content = getArchiveTitle(*archive);
} else if (meta_name == "description") {
content = getMetaDescription(*archive);
} else if (meta_name == "language") {
content = getMetaLanguage(*archive);
} else if (meta_name == "name") {
content = getMetaName(*archive);
} else if (meta_name == "tags") {
content = getMetaTags(*archive);
} else if (meta_name == "date") {
content = getMetaDate(*archive);
} else if (meta_name == "creator") {
content = getMetaCreator(*archive);
} else if (meta_name == "publisher") {
content = getMetaPublisher(*archive);
} else if (meta_name == "favicon") {
getArchiveFavicon(*archive, 48, content, mimeType);
} else if (const unsigned illustrationSize = parseIllustration(meta_name)) {
getArchiveFavicon(*archive, illustrationSize, content, mimeType);
} else {
return Response::build_404(*this, request, bookName, "");
}
auto response = ContentResponse::build(*this, content, mimeType);
response->set_cacheable();
return std::move(response);
}
std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_suggest\n");
}
std::string content;
std::string mimeType;
std::string bookName;
std::string bookId;
std::string queryString;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
queryString = request.get_argument("term");
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, "");
}
auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}
unsigned int count = 10;
try {
count = request.get_argument<unsigned int>("count");
} catch (const std::exception&) {}
if (count == 0) {
count = 10;
}
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
if (m_verbose.load()) {
printf("Searching suggestions for: \"%s\"\n", queryString.c_str());
}
MustacheData results{MustacheData::type::list};
bool first = true;
/* Get the suggestions */
SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, start, count);
for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion.getTitle());
if (suggestion.hasSnippet()) {
result.set("label", suggestion.getSnippet());
}
result.set("value", suggestion.getTitle());
result.set("kind", "path");
result.set("path", suggestion.getPath());
result.set("first", first);
first = false;
results.push_back(result);
}
/* Propose the fulltext search if possible */
if (archive->hasFulltextIndex()) {
MustacheData result;
result.set("label", "containing '" + queryString + "'...");
result.set("value", queryString + " ");
result.set("kind", "pattern");
result.set("first", first);
results.push_back(result);
}
auto data = get_default_data();
data.set("suggestions", results);
auto response = ContentResponse::build(*this, RESOURCE::templates::suggestion_json, data, "application/json; charset=utf-8");
return std::move(response);
}
std::unique_ptr<Response> InternalServer::handle_skin(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_skin\n");
}
auto resourceName = request.get_url().substr(1);
try {
auto response = ContentResponse::build(
*this,
getResource(resourceName),
getMimeTypeForFile(resourceName));
response->set_cacheable();
return std::move(response);
} catch (const ResourceNotFound& e) {
return Response::build_404(*this, request, "", "");
}
}
std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_search\n");
}
std::string bookName;
std::string bookId;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
} catch (const std::out_of_range&) {}
std::string patternString;
try {
patternString = request.get_argument("pattern");
} catch (const std::out_of_range&) {}
/* Retrive geo search */
bool has_geo_query = false;
float latitude = 0;
float longitude = 0;
float distance = 0;
try {
latitude = request.get_argument<float>("latitude");
longitude = request.get_argument<float>("longitude");
distance = request.get_argument<float>("distance");
has_geo_query = true;
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}
std::shared_ptr<zim::Archive> archive;
try {
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}
/* Make the search */
if ( (!archive && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data();
data.set("pattern", encodeDiples(patternString));
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
response->set_code(MHD_HTTP_NOT_FOUND);
return std::move(response);
}
std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = std::make_shared<zim::Searcher>(*archive);
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
if (currentArchive) {
if (! searcher) {
searcher = std::make_shared<zim::Searcher>(*currentArchive);
} else {
searcher->addArchive(*currentArchive);
}
}
}
}
auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}
auto pageLength = 25;
try {
pageLength = request.get_argument<unsigned int>("pageLength");
} catch (const std::exception&) {}
if (pageLength > MAX_SEARCH_LEN) {
pageLength = MAX_SEARCH_LEN;
}
if (pageLength == 0) {
pageLength = 25;
}
/* Get the results */
try {
zim::Query query;
if (patternString.empty()) {
// Execute geo-search
if (m_verbose.load()) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
query.setQuery("");
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}
std::string queryString = removeAccents(patternString);
query.setQuery(queryString);
}
zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, start,
search.getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?");
renderer.setPageLength(pageLength);
auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8");
response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
return std::move(response);
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
return Response::build_500(*this, e.what());
}
}
std::unique_ptr<Response> InternalServer::handle_random(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_random\n");
}
std::string bookName;
std::string bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, "");
}
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
try {
auto entry = archive->getRandomEntry();
return build_redirect(bookName, getFinalItem(*archive, entry));
} catch(zim::EntryNotFound& e) {
return Response::build_404(*this, request, bookName, "");
}
}
std::unique_ptr<Response> InternalServer::handle_captured_external(const RequestContext& request)
{
std::string source = "";
try {
source = kiwix::urlDecode(request.get_argument("source"));
} catch (const std::out_of_range& e) {}
if (source.empty())
return Response::build_404(*this, request, "", "");
auto data = get_default_data();
data.set("source", source);
return ContentResponse::build(*this, RESOURCE::templates::captured_external_html, data, "text/html; charset=utf-8");
}
std::unique_ptr<Response> InternalServer::handle_catalog(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog");
}
std::string host;
std::string url;
try {
host = request.get_header("Host");
url = request.get_url_part(1);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, "", "");
}
if (url == "v2") {
return handle_catalog_v2(request);
}
if (url != "searchdescription.xml" && url != "root.xml" && url != "search") {
return Response::build_404(*this, request, "", "");
}
if (url == "searchdescription.xml") {
auto response = ContentResponse::build(*this, RESOURCE::opensearchdescription_xml, get_default_data(), "application/opensearchdescription+xml");
return std::move(response);
}
zim::Uuid uuid;
kiwix::OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
std::vector<std::string> bookIdsToDump;
if (url == "root.xml") {
uuid = zim::Uuid::generate(host);
bookIdsToDump = mp_library->filter(kiwix::Filter().valid(true).local(true).remote(true));
} else if (url == "search") {
bookIdsToDump = search_catalog(request, opdsDumper);
uuid = zim::Uuid::generate();
}
auto response = ContentResponse::build(
*this,
opdsDumper.dumpOPDSFeed(bookIdsToDump, request.get_query()),
"application/atom+xml; profile=opds-catalog; kind=acquisition; charset=utf-8");
return std::move(response);
}
namespace
{
Filter get_search_filter(const RequestContext& request)
{
auto filter = kiwix::Filter().valid(true).local(true);
try {
filter.query(request.get_argument("q"));
} catch (const std::out_of_range&) {}
try {
filter.maxSize(request.get_argument<unsigned long>("maxsize"));
} catch (...) {}
try {
filter.name(request.get_argument("name"));
} catch (const std::out_of_range&) {}
try {
filter.category(request.get_argument("category"));
} catch (const std::out_of_range&) {}
try {
filter.lang(request.get_argument("lang"));
} catch (const std::out_of_range&) {}
try {
filter.acceptTags(kiwix::split(request.get_argument("tag"), ";"));
} catch (...) {}
try {
filter.rejectTags(kiwix::split(request.get_argument("notag"), ";"));
} catch (...) {}
return filter;
}
template<class T>
std::vector<T> subrange(const std::vector<T>& v, size_t s, size_t n)
{
const size_t e = std::min(v.size(), s+n);
return std::vector<T>(v.begin()+std::min(v.size(), s), v.begin()+e);
}
} // unnamed namespace
std::vector<std::string>
InternalServer::search_catalog(const RequestContext& request,
kiwix::OPDSDumper& opdsDumper)
{
const auto filter = get_search_filter(request);
const std::string q = filter.hasQuery()
? filter.getQuery()
: "<Empty query>";
std::vector<std::string> bookIdsToDump = mp_library->filter(filter);
const auto totalResults = bookIdsToDump.size();
const size_t count = request.get_optional_param("count", 10UL);
const size_t startIndex = request.get_optional_param("start", 0UL);
bookIdsToDump = subrange(bookIdsToDump, startIndex, count);
opdsDumper.setOpenSearchInfo(totalResults, startIndex, bookIdsToDump.size());
return bookIdsToDump;
}
namespace
{
std::string get_book_name(const RequestContext& request)
{
try {
return request.get_url_part(0);
} catch (const std::out_of_range& e) {
return std::string();
}
}
std::string searchSuggestionHTML(const std::string& searchURL, const std::string& pattern)
{
kainjow::mustache::mustache tmpl("Make a full text search for <a href=\"{{{searchURL}}}\">{{pattern}}</a>");
MustacheData data;
data.set("pattern", pattern);
data.set("searchURL", searchURL);
return (tmpl.render(data));
}
} // unnamed namespace
std::unique_ptr<Response>
InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const
{
auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(item.getPath());
return Response::build_redirect(*this, redirectUrl);
}
std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& request)
{
const std::string url = request.get_url();
const std::string pattern = url.substr((url.find_last_of('/'))+1);
if (m_verbose.load()) {
printf("** running handle_content\n");
}
const std::string bookName = get_book_name(request);
if (bookName.empty())
return build_homepage(request);
std::shared_ptr<zim::Archive> archive;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) {}
if (archive == nullptr) {
std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
return Response::build_404(*this, request, bookName, "", details);
}
auto urlStr = request.get_url().substr(bookName.size()+1);
if (urlStr[0] == '/') {
urlStr = urlStr.substr(1);
}
try {
auto entry = getEntryFromPath(*archive, urlStr);
if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page.
return build_redirect(bookName, getFinalItem(*archive, entry));
}
auto response = ItemResponse::build(*this, request, entry.getItem());
try {
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, getArchiveTitle(*archive));
} catch (std::bad_cast& e) {}
if (m_verbose.load()) {
printf("Found %s\n", entry.getPath().c_str());
printf("mimeType: %s\n", entry.getItem(true).getMimetype().c_str());
}
return response;
} catch(zim::EntryNotFound& e) {
if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str());
std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details);
}
}
}

View File

@@ -1,125 +0,0 @@
/*
* Copyright 2019 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_INTERNALSERVER_H
#define KIWIXLIB_SERVER_INTERNALSERVER_H
extern "C" {
#include "microhttpd_wrapper.h"
}
#include "library.h"
#include "name_mapper.h"
#include <mustache.hpp>
#include <atomic>
#include <string>
#include "server/request_context.h"
#include "server/response.h"
namespace kiwix {
typedef kainjow::mustache::data MustacheData;
class Entry;
class OPDSDumper;
class InternalServer {
public:
InternalServer(Library* library,
NameMapper* nameMapper,
std::string addr,
int port,
std::string root,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton,
bool blockExternalLinks,
std::string indexTemplateString);
virtual ~InternalServer() = default;
MHD_Result handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* method,
const char* version,
const char* upload_data,
size_t* upload_data_size,
void** cont_cls);
bool start();
void stop();
private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request);
std::unique_ptr<Response> build_redirect(const std::string& bookName, const zim::Item& item) const;
std::unique_ptr<Response> build_homepage(const RequestContext& request);
std::unique_ptr<Response> handle_skin(const RequestContext& request);
std::unique_ptr<Response> handle_catalog(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_root(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request, bool partial);
std::unique_ptr<Response> handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId);
std::unique_ptr<Response> handle_catalog_v2_categories(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_languages(const RequestContext& request);
std::unique_ptr<Response> handle_meta(const RequestContext& request);
std::unique_ptr<Response> handle_search(const RequestContext& request);
std::unique_ptr<Response> handle_suggest(const RequestContext& request);
std::unique_ptr<Response> handle_random(const RequestContext& request);
std::unique_ptr<Response> handle_captured_external(const RequestContext& request);
std::unique_ptr<Response> handle_content(const RequestContext& request);
std::vector<std::string> search_catalog(const RequestContext& request,
kiwix::OPDSDumper& opdsDumper);
MustacheData get_default_data() const;
bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const;
private: // data
std::string m_addr;
int m_port;
std::string m_root;
int m_nbThreads;
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
std::string m_indexTemplateString;
struct MHD_Daemon* mp_daemon;
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_server_id;
std::string m_library_id;
friend std::unique_ptr<Response> Response::build(const InternalServer& server);
friend std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage);
friend std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
friend std::unique_ptr<Response> Response::build_500(const InternalServer& server, const std::string& msg);
};
}
#endif //KIWIXLIB_SERVER_INTERNALSERVER_H

View File

@@ -1,149 +0,0 @@
/*
* Copyright 2021 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "internalServer.h"
#include "library.h"
#include "opds_dumper.h"
#include "request_context.h"
#include "response.h"
#include "tools/otherTools.h"
#include "kiwixlib-resources.h"
#include <mustache.hpp>
#include <string>
#include <vector>
namespace kiwix {
std::unique_ptr<Response> InternalServer::handle_catalog_v2(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_catalog_v2");
}
std::string url;
try {
url = request.get_url_part(2);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, "", "");
}
if (url == "root.xml") {
return handle_catalog_v2_root(request);
} else if (url == "searchdescription.xml") {
const std::string endpoint_root = m_root + "/catalog/v2";
return ContentResponse::build(*this,
RESOURCE::catalog_v2_searchdescription_xml,
kainjow::mustache::object({{"endpoint_root", endpoint_root}}),
"application/opensearchdescription+xml"
);
} else if (url == "entry") {
const std::string entryId = request.get_url_part(3);
return handle_catalog_v2_complete_entry(request, entryId);
} else if (url == "entries") {
return handle_catalog_v2_entries(request, /*partial=*/false);
} else if (url == "partial_entries") {
return handle_catalog_v2_entries(request, /*partial=*/true);
} else if (url == "categories") {
return handle_catalog_v2_categories(request);
} else if (url == "languages") {
return handle_catalog_v2_languages(request);
} else {
return Response::build_404(*this, request, "", "");
}
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_root(const RequestContext& request)
{
return ContentResponse::build(
*this,
RESOURCE::templates::catalog_v2_root_xml,
kainjow::mustache::object{
{"date", gen_date_str()},
{"endpoint_root", m_root + "/catalog/v2"},
{"feed_id", gen_uuid(m_library_id)},
{"all_entries_feed_id", gen_uuid(m_library_id + "/entries")},
{"partial_entries_feed_id", gen_uuid(m_library_id + "/partial_entries")},
{"category_list_feed_id", gen_uuid(m_library_id + "/categories")},
{"language_list_feed_id", gen_uuid(m_library_id + "/languages")}
},
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_entries(const RequestContext& request, bool partial)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
const auto bookIds = search_catalog(request, opdsDumper);
const auto opdsFeed = opdsDumper.dumpOPDSFeedV2(bookIds, request.get_query(), partial);
return ContentResponse::build(
*this,
opdsFeed,
"application/atom+xml;profile=opds-catalog;kind=acquisition"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_complete_entry(const RequestContext& request, const std::string& entryId)
{
try {
mp_library->getBookById(entryId);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, "", "");
}
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
const auto opdsFeed = opdsDumper.dumpOPDSCompleteEntry(entryId);
return ContentResponse::build(
*this,
opdsFeed,
"application/atom+xml;type=entry;profile=opds-catalog"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_categories(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
return ContentResponse::build(
*this,
opdsDumper.categoriesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_languages(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
return ContentResponse::build(
*this,
opdsDumper.languagesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
} // namespace kiwix

View File

@@ -26,84 +26,106 @@
#include <cstdio>
#include <atomic>
#include "tools/stringTools.h"
namespace kiwix {
static std::atomic_ullong s_requestIndex(0);
namespace {
RequestMethod str2RequestMethod(const std::string& method) {
if (method == "GET") return RequestMethod::GET;
else if (method == "HEAD") return RequestMethod::HEAD;
else if (method == "POST") return RequestMethod::POST;
else if (method == "PUT") return RequestMethod::PUT;
else if (method == "DELETE") return RequestMethod::DELETE_;
else if (method == "CONNECT") return RequestMethod::CONNECT;
else if (method == "OPTIONS") return RequestMethod::OPTIONS;
else if (method == "TRACE") return RequestMethod::TRACE;
else if (method == "PATCH") return RequestMethod::PATCH;
else return RequestMethod::OTHER;
}
std::string
fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation)
{
if (rootLocation.empty()) {
// nothing special to handle.
return full_url;
} else if (full_url == rootLocation) {
return "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
return full_url.substr(rootLocation.size());
} else {
return "";
}
}
} // unnamed namespace
RequestContext::RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& _url,
const std::string& _method,
const std::string& method,
const std::string& version) :
full_url(_url),
url(fullURL2LocalURL(_url, rootLocation)),
method(str2RequestMethod(_method)),
url(_url),
valid_url(true),
version(version),
requestIndex(s_requestIndex++),
acceptEncodingDeflate(false),
byteRange_()
accept_range(false),
range_pair(0, -1)
{
if (method == "GET") {
this->method = RequestMethod::GET;
} else if (method == "HEAD") {
this->method = RequestMethod::HEAD;
} else if (method == "POST") {
this->method = RequestMethod::POST;
} else if (method == "PUT") {
this->method = RequestMethod::PUT;
} else if (method == "DELETE") {
this->method = RequestMethod::DELETE_;
} else if (method == "CONNECT") {
this->method = RequestMethod::CONNECT;
} else if (method == "OPTIONS") {
this->method = RequestMethod::OPTIONS;
} else if (method == "TRACE") {
this->method = RequestMethod::TRACE;
} else if (method == "PATCH") {
this->method = RequestMethod::PATCH;
} else {
this->method = RequestMethod::OTHER;
}
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
valid_url = true;
if (rootLocation.empty()) {
// nothing special to handle.
url = full_url;
} else {
if (full_url == rootLocation) {
url = "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
url = full_url.substr(rootLocation.size());
} else {
valid_url = false;
}
}
try {
acceptEncodingDeflate =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
} catch (const std::out_of_range&) {}
/*Check if range is requested. */
try {
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
auto range = get_header(MHD_HTTP_HEADER_RANGE);
int start = 0;
int end = -1;
std::istringstream iss(range);
char c;
iss >> start >> c;
if (iss.good() && c=='-') {
iss >> end;
if (iss.fail()) {
// Something went wrong will extracting.
end = -1;
}
if (iss.eof()) {
accept_range = true;
range_pair = std::pair<int, int>(start, end);
}
}
} catch (const std::out_of_range&) {}
}
RequestContext::~RequestContext()
{}
MHD_Result RequestContext::fill_header(void *__this, enum MHD_ValueKind kind,
const char *key, const char *value)
int RequestContext::fill_header(void *__this, enum MHD_ValueKind kind,
const char *key, const char *value)
{
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->headers[lcAll(key)] = value;
_this->headers[key] = value;
return MHD_YES;
}
MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
int RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
{
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->arguments[key] = value == nullptr ? "" : value;
@@ -125,11 +147,10 @@ void RequestContext::print_debug_info() const {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf("has_range : %d\n", accept_range);
printf("is_valid_url : %d\n", valid_url);
printf(".............\n");
}
@@ -167,11 +188,15 @@ std::string RequestContext::get_full_url() const {
}
bool RequestContext::is_valid_url() const {
return !url.empty();
return valid_url;
}
ByteRange RequestContext::get_range() const {
return byteRange_;
bool RequestContext::has_range() const {
return accept_range;
}
std::pair<int, int> RequestContext::get_range() const {
return range_pair;
}
template<>
@@ -180,17 +205,7 @@ std::string RequestContext::get_argument(const std::string& name) const {
}
std::string RequestContext::get_header(const std::string& name) const {
return headers.at(lcAll(name));
}
std::string RequestContext::get_query() const {
std::string q;
const char* sep = "";
for ( const auto& a : arguments ) {
q += sep + a.first + '=' + a.second;
sep = "&";
}
return q;
return headers.at(name);
}
}

View File

@@ -27,10 +27,8 @@
#include <map>
#include <stdexcept>
#include "byte_range.h"
extern "C" {
#include "microhttpd_wrapper.h"
#include <microhttpd.h>
}
namespace kiwix {
@@ -53,7 +51,7 @@ class IndexError: public std::runtime_error {};
class RequestContext {
public: // functions
public:
RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& url,
@@ -74,42 +72,34 @@ class RequestContext {
return v;
}
template<class T>
T get_optional_param(const std::string& name, T default_value) const
{
try {
return get_argument<T>(name);
} catch (...) {}
return default_value;
}
RequestMethod get_method() const;
std::string get_url() const;
std::string get_url_part(int part) const;
std::string get_full_url() const;
std::string get_query() const;
ByteRange get_range() const;
bool has_range() const;
std::pair<int, int> get_range() const;
bool can_compress() const { return acceptEncodingDeflate; }
private: // data
private:
std::string full_url;
std::string url;
bool valid_url;
RequestMethod method;
std::string version;
unsigned long long requestIndex;
bool acceptEncodingDeflate;
ByteRange byteRange_;
bool accept_range;
std::pair<int, int> range_pair;
std::map<std::string, std::string> headers;
std::map<std::string, std::string> arguments;
private: // functions
static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
static int fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static int fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
};
template<> std::string RequestContext::get_argument(const std::string& name) const;

View File

@@ -1,30 +1,12 @@
/*
* Copyright 2019 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "response.h"
#include "request_context.h"
#include "internalServer.h"
#include "kiwixlib-resources.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "string.h"
#include <mustache.hpp>
@@ -35,102 +17,25 @@
namespace kiwix {
namespace
{
// some utilities
std::string get_mime_type(const zim::Item& item)
{
try {
return item.getMimetype();
} catch (exception& e) {
return "application/octet-stream";
}
}
bool is_compressible_mime_type(const std::string& mimeType)
{
return mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/atom") != string::npos
|| mimeType.find("application/opensearchdescription") != string::npos
|| mimeType.find("application/json") != string::npos;
}
} // unnamed namespace
Response::Response(bool verbose)
Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton)
: m_verbose(verbose),
m_returnCode(MHD_HTTP_OK)
m_root(root),
m_content(""),
m_mimeType(""),
m_returnCode(MHD_HTTP_OK),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_useCache(false),
m_addTaskbar(false),
m_bookName(""),
m_startRange(0),
m_lenRange(0)
{
add_header(MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
}
std::unique_ptr<Response> Response::build(const InternalServer& server)
{
return std::unique_ptr<Response>(new Response(server.m_verbose.load()));
}
std::unique_ptr<Response> Response::build_304(const InternalServer& server, const ETag& etag)
{
auto response = Response::build(server);
response->set_code(MHD_HTTP_NOT_MODIFIED);
response->m_etag = etag;
if ( etag.get_option(ETag::COMPRESSED_CONTENT) ) {
response->add_header(MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
return response;
}
std::unique_ptr<Response> Response::build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details)
{
MustacheData results;
results.set("url", request.get_full_url());
results.set("details", details);
auto response = ContentResponse::build(server, RESOURCE::templates::_404_html, results, "text/html");
response->set_code(MHD_HTTP_NOT_FOUND);
response->set_taskbar(bookName, bookTitle);
return std::move(response);
}
std::unique_ptr<Response> Response::build_416(const InternalServer& server, size_t resourceLength)
{
auto response = Response::build(server);
// [FIXME] (compile with recent enough version of libmicrohttpd)
// response->set_code(MHD_HTTP_RANGE_NOT_SATISFIABLE);
response->set_code(416);
std::ostringstream oss;
oss << "bytes */" << resourceLength;
response->add_header(MHD_HTTP_HEADER_CONTENT_RANGE, oss.str());
return response;
}
std::unique_ptr<Response> Response::build_500(const InternalServer& server, const std::string& msg)
{
MustacheData data;
data.set("error", msg);
auto content = render_template(RESOURCE::templates::_500_html, data);
std::unique_ptr<Response> response (
new ContentResponse(server.m_root, true, false, false, false, content, "text/html"));
response->set_code(MHD_HTTP_INTERNAL_SERVER_ERROR);
return response;
}
std::unique_ptr<Response> Response::build_redirect(const InternalServer& server, const std::string& redirectUrl)
{
auto response = Response::build(server);
response->m_returnCode = MHD_HTTP_FOUND;
response->add_header(MHD_HTTP_HEADER_LOCATION, redirectUrl);
return response;
}
static MHD_Result print_key_value (void *cls, enum MHD_ValueKind kind,
const char *key, const char *value)
static int print_key_value (void *cls, enum MHD_ValueKind kind,
const char *key, const char *value)
{
printf (" - %s: '%s'\n", key, value);
return MHD_YES;
@@ -138,17 +43,17 @@ static MHD_Result print_key_value (void *cls, enum MHD_ValueKind kind,
struct RunningResponse {
zim::Item item;
kiwix::Entry entry;
int range_start;
RunningResponse(zim::Item item,
RunningResponse(kiwix::Entry entry,
int range_start) :
item(item),
entry(entry),
range_start(range_start)
{}
};
static ssize_t callback_reader_from_item(void* cls,
static ssize_t callback_reader_from_entry(void* cls,
uint64_t pos,
char* buf,
size_t max)
@@ -157,13 +62,13 @@ static ssize_t callback_reader_from_item(void* cls,
size_t max_size_to_set = min<size_t>(
max,
response->item.getSize() - pos - response->range_start);
response->entry.getSize() - pos - response->range_start);
if (max_size_to_set <= 0) {
return MHD_CONTENT_READER_END_WITH_ERROR;
}
zim::Blob blob = response->item.getData(response->range_start+pos, max_size_to_set);
zim::Blob blob = response->entry.getBlob(response->range_start+pos, max_size_to_set);
memcpy(buf, blob.data(), max_size_to_set);
return max_size_to_set;
}
@@ -185,18 +90,33 @@ void print_response_info(int retCode, MHD_Response* response)
}
void ContentResponse::introduce_taskbar()
std::string render_template(const std::string& template_str, kainjow::mustache::data data)
{
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data urlencode{kainjow::mustache::lambda2{
[](const std::string& str,const kainjow::mustache::renderer& r) { return urlEncode(r(str), true); }}};
data.set("urlencoded", urlencode);
std::stringstream ss;
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}
void Response::introduce_taskbar()
{
if (! m_withTaskbar)
// Taskbar is globally disabled.
return;
kainjow::mustache::data data;
data.set("root", m_root);
data.set("content", m_bookName);
data.set("hascontent", (!m_bookName.empty() && !m_bookTitle.empty()));
data.set("hascontent", !m_bookName.empty());
data.set("title", m_bookTitle);
data.set("withlibrarybutton", m_withLibraryButton);
auto head_content = render_template(RESOURCE::templates::head_taskbar_html, data);
m_content = prependToFirstOccurence(
auto head_content = render_template(RESOURCE::templates::head_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"</head[ \\t]*>",
"<head>",
head_content);
auto taskbar_part = render_template(RESOURCE::templates::taskbar_part_html, data);
@@ -207,107 +127,88 @@ void ContentResponse::introduce_taskbar()
}
void ContentResponse::inject_externallinks_blocker()
int Response::send(const RequestContext& request, MHD_Connection* connection)
{
kainjow::mustache::data data;
data.set("root", m_root);
auto script_tag = render_template(RESOURCE::templates::external_blocker_part_html, data);
m_content = prependToFirstOccurence(
m_content,
"</head[ \\t]*>",
script_tag);
}
MHD_Response* response = nullptr;
switch (m_mode) {
case ResponseMode::RAW_CONTENT : {
if (m_addTaskbar) {
introduce_taskbar();
}
void ContentResponse::inject_root_link(){
m_content = prependToFirstOccurence(
m_content,
"</head[ \\t]*>",
"<link type=\"root\" href=\"" + m_root + "\">");
}
bool shouldCompress = m_compress && request.can_compress();
shouldCompress &= m_mimeType.find("text/") != string::npos
|| m_mimeType.find("application/javascript") != string::npos
|| m_mimeType.find("application/json") != string::npos;
bool
ContentResponse::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
shouldCompress &= (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
bool
ContentResponse::contentDecorationAllowed() const
{
return (startsWith(m_mimeType, "text/html")
&& m_mimeType.find(";raw=true") == std::string::npos);
}
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
} else {
shouldCompress = false;
}
}
MHD_Response*
Response::create_mhd_response(const RequestContext& request)
{
MHD_Response* response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_PERSISTENT);
return response;
}
response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
MHD_Response*
ContentResponse::create_mhd_response(const RequestContext& request)
{
if (contentDecorationAllowed()) {
inject_root_link();
if (m_withTaskbar) {
introduce_taskbar();
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
break;
}
if (m_blockExternalLinks) {
inject_externallinks_blocker();
case ResponseMode::REDIRECTION : {
response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_MUST_COPY);
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, m_content.c_str());
break;
}
case ResponseMode::ENTRY : {
response = MHD_create_response_from_callback(m_entry.getSize(),
16384,
callback_reader_from_entry,
new RunningResponse(m_entry, m_startRange),
callback_free_response);
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
std::ostringstream oss;
oss << "bytes " << m_startRange << "-" << m_startRange + m_lenRange - 1
<< "/" << m_entry.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(m_lenRange).c_str());
break;
}
}
bool shouldCompress = can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
return response;
}
MHD_Result Response::send(const RequestContext& request, MHD_Connection* connection)
{
MHD_Response* response = create_mhd_response(request);
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
for(auto& p: m_customHeaders) {
MHD_add_response_header(response, p.first.c_str(), p.second.c_str());
}
m_useCache ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
if (m_returnCode == MHD_HTTP_OK && request.has_range())
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
if (m_verbose)
@@ -318,103 +219,32 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect
return ret;
}
void ContentResponse::set_taskbar(const std::string& bookName, const std::string& bookTitle)
void Response::set_template(const std::string& template_str, kainjow::mustache::data data) {
set_content(render_template(template_str, data));
}
void Response::set_content(const std::string& content) {
m_content = content;
m_mode = ResponseMode::RAW_CONTENT;
}
void Response::set_redirection(const std::string& url) {
m_content = url;
m_mode = ResponseMode::REDIRECTION;
m_returnCode = MHD_HTTP_FOUND;
}
void Response::set_entry(const Entry& entry) {
m_entry = entry;
m_mode = ResponseMode::ENTRY;
}
void Response::set_taskbar(const std::string& bookName, const std::string& bookTitle)
{
m_addTaskbar = true;
m_bookName = bookName;
m_bookTitle = bookTitle;
}
ContentResponse::ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype) :
Response(verbose),
m_root(root),
m_content(content),
m_mimeType(mimetype),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_bookName(""),
m_bookTitle("")
{
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
}
std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage)
{
return std::unique_ptr<ContentResponse>(new ContentResponse(
server.m_root,
server.m_verbose.load(),
server.m_withTaskbar && !isHomePage,
server.m_withLibraryButton,
server.m_blockExternalLinks,
content,
mimetype));
}
std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage) {
auto content = render_template(template_str, data);
return ContentResponse::build(server, content, mimetype, isHomePage);
}
ItemResponse::ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange) :
Response(verbose),
m_item(item),
m_mimeType(mimetype)
{
m_byteRange = byterange;
set_cacheable();
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
}
std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item)
{
const std::string mimetype = get_mime_type(item);
auto byteRange = request.get_range().resolve(item.getSize());
const bool noRange = byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if (noRange && is_compressible_mime_type(mimetype)) {
// Return a contentResponse
auto response = ContentResponse::build(server, item.getData(), mimetype);
response->set_cacheable();
response->m_byteRange = byteRange;
return std::move(response);
}
if (byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE) {
auto response = Response::build_416(server, item.getSize());
response->set_cacheable();
return response;
}
return std::unique_ptr<Response>(new ItemResponse(
server.m_verbose.load(),
item,
mimetype,
byteRange));
}
MHD_Response*
ItemResponse::create_mhd_response(const RequestContext& request)
{
const auto content_length = m_byteRange.length();
MHD_Response* response = MHD_create_response_from_callback(content_length,
16384,
callback_reader_from_item,
new RunningResponse(m_item, m_byteRange.first()),
callback_free_response);
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
std::ostringstream oss;
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
<< "/" << m_item.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str());
return response;
}
}

View File

@@ -22,99 +22,66 @@
#define KIWIXLIB_SERVER_RESPONSE_H
#include <string>
#include <map>
#include <mustache.hpp>
#include "byte_range.h"
#include "entry.h"
#include "etag.h"
extern "C" {
#include "microhttpd_wrapper.h"
#include <microhttpd.h>
}
namespace kiwix {
class InternalServer;
class RequestContext;
enum class ResponseMode {
RAW_CONTENT,
REDIRECTION,
ENTRY
};
class EntryResponse;
class RequestContext;
class Response {
public:
Response(bool verbose);
virtual ~Response() = default;
Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton);
~Response() = default;
static std::unique_ptr<Response> build(const InternalServer& server);
static std::unique_ptr<Response> build_304(const InternalServer& server, const ETag& etag);
static std::unique_ptr<Response> build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details="");
static std::unique_ptr<Response> build_416(const InternalServer& server, size_t resourceLength);
static std::unique_ptr<Response> build_500(const InternalServer& server, const std::string& msg);
static std::unique_ptr<Response> build_redirect(const InternalServer& server, const std::string& redirectUrl);
int send(const RequestContext& request, MHD_Connection* connection);
MHD_Result send(const RequestContext& request, MHD_Connection* connection);
void set_template(const std::string& template_str, kainjow::mustache::data data);
void set_content(const std::string& content);
void set_redirection(const std::string& url);
void set_entry(const Entry& entry);
void set_mimeType(const std::string& mimeType) { m_mimeType = mimeType; }
void set_code(int code) { m_returnCode = code; }
void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); }
void set_server_id(const std::string& id) { m_etag.set_server_id(id); }
void add_header(const std::string& name, const std::string& value) { m_customHeaders[name] = value; }
int getReturnCode() const { return m_returnCode; }
private: // functions
virtual MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_error_response(const RequestContext& request) const;
protected: // data
bool m_verbose;
int m_returnCode;
ByteRange m_byteRange;
ETag m_etag;
std::map<std::string, std::string> m_customHeaders;
friend class ItemResponse;
};
class ContentResponse : public Response {
public:
ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype);
static std::unique_ptr<ContentResponse> build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage = false);
static std::unique_ptr<ContentResponse> build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage = false);
void set_cache(bool cache) { m_useCache = cache; }
void set_compress(bool compress) { m_compress = compress; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
void set_range_first(uint64_t start) { m_startRange = start; }
void set_range_len(uint64_t len) { m_lenRange = len; }
private:
MHD_Response* create_mhd_response(const RequestContext& request);
int getReturnCode() { return m_returnCode; }
void introduce_taskbar();
void inject_externallinks_blocker();
void inject_root_link();
bool can_compress(const RequestContext& request) const;
bool contentDecorationAllowed() const;
private:
bool m_verbose;
ResponseMode m_mode;
std::string m_root;
std::string m_content;
Entry m_entry;
std::string m_mimeType;
int m_returnCode;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
bool m_useCache;
bool m_compress;
bool m_addTaskbar;
std::string m_bookName;
std::string m_bookTitle;
};
class ItemResponse : public Response {
public:
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
static std::unique_ptr<Response> build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
private:
MHD_Response* create_mhd_response(const RequestContext& request);
zim::Item m_item;
std::string m_mimeType;
uint64_t m_startRange;
uint64_t m_lenRange;
};
}

View File

@@ -12,31 +12,45 @@
UnixImpl::UnixImpl():
m_pid(0),
m_running(false),
m_shouldQuit(false)
m_mutex(PTHREAD_MUTEX_INITIALIZER),
m_waitingThread()
{
}
UnixImpl::~UnixImpl()
{
kill();
m_shouldQuit = true;
m_waitingThread.join();
// Android has no pthread_cancel :(
#ifdef __ANDROID__
pthread_kill(m_waitingThread, SIGUSR1);
#else
pthread_cancel(m_waitingThread);
#endif
}
#ifdef __ANDROID__
void thread_exit_handler(int sig) {
pthread_exit(0);
}
#endif
void* UnixImpl::waitForPID(void* _self)
{
UnixImpl* self = static_cast<UnixImpl*>(_self);
while (true) {
if (!waitpid(self->m_pid, NULL, WNOHANG)) {
break;
}
if (self->m_shouldQuit) {
return nullptr;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
#ifdef __ANDROID__
struct sigaction actions;
memset(&actions, 0, sizeof(actions));
sigemptyset(&actions.sa_mask);
actions.sa_flags = 0;
actions.sa_handler = thread_exit_handler;
sigaction(SIGUSR1, &actions, NULL);
#endif
UnixImpl* self = static_cast<UnixImpl*>(_self);
waitpid(self->m_pid, NULL, 0);
pthread_mutex_lock(&self->m_mutex);
self->m_running = false;
pthread_mutex_unlock(&self->m_mutex);
return self;
}
@@ -60,7 +74,7 @@ void UnixImpl::run(commandLine_t& commandLine)
default:
m_pid = pid;
m_running = true;
m_waitingThread = std::thread(waitForPID, this);
pthread_create(&m_waitingThread, NULL, waitForPID, this);
break;
}
}
@@ -72,5 +86,8 @@ bool UnixImpl::kill()
bool UnixImpl::isRunning()
{
return m_running;
pthread_mutex_lock(&m_mutex);
bool ret = m_running;
pthread_mutex_unlock(&m_mutex);
return ret;
}

View File

@@ -3,16 +3,16 @@
#include "subprocess.h"
#include <atomic>
#include <thread>
#include <pthread.h>
class UnixImpl : public SubprocessImpl
{
private:
int m_pid;
std::atomic<bool> m_running;
std::atomic<bool> m_shouldQuit;
std::thread m_waitingThread;
bool m_running;
pthread_mutex_t m_mutex;
pthread_t m_waitingThread;
public:
UnixImpl();

View File

@@ -11,8 +11,7 @@
WinImpl::WinImpl():
m_pid(0),
m_running(false),
m_subprocessHandle(INVALID_HANDLE_VALUE),
m_waitingThreadHandle(INVALID_HANDLE_VALUE)
m_handle(INVALID_HANDLE_VALUE)
{
InitializeCriticalSection(&m_criticalSection);
}
@@ -20,15 +19,14 @@ WinImpl::WinImpl():
WinImpl::~WinImpl()
{
kill();
WaitForSingleObject(m_waitingThreadHandle, INFINITE);
CloseHandle(m_subprocessHandle);
CloseHandle(m_handle);
DeleteCriticalSection(&m_criticalSection);
}
DWORD WINAPI WinImpl::waitForPID(void* _self)
{
WinImpl* self = static_cast<WinImpl*>(_self);
WaitForSingleObject(self->m_subprocessHandle, INFINITE);
WaitForSingleObject(self->m_handle, INFINITE);
EnterCriticalSection(&self->m_criticalSection);
self->m_running = false;
@@ -81,16 +79,16 @@ void WinImpl::run(commandLine_t& commandLine)
&procInfo))
{
m_pid = procInfo.dwProcessId;
m_subprocessHandle = procInfo.hProcess;
m_handle = procInfo.hProcess;
CloseHandle(procInfo.hThread);
m_running = true;
m_waitingThreadHandle = CreateThread(NULL, 0, &waitForPID, this, 0, NULL);
CreateThread(NULL, 0, &waitForPID, this, 0, NULL );
}
}
bool WinImpl::kill()
{
return TerminateProcess(m_subprocessHandle, 0);
return TerminateProcess(m_handle, 0);
}
bool WinImpl::isRunning()

View File

@@ -11,8 +11,7 @@ class WinImpl : public SubprocessImpl
private:
int m_pid;
bool m_running;
HANDLE m_subprocessHandle;
HANDLE m_waitingThreadHandle;
HANDLE m_handle;
CRITICAL_SECTION m_criticalSection;
public:

View File

@@ -1,158 +0,0 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "archiveTools.h"
#include "tools.h"
#include "pathTools.h"
#include "otherTools.h"
#include "stringTools.h"
#include <zim/error.h>
#include <zim/item.h>
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name) {
try {
return archive.getMetadata(name);
} catch (zim::EntryNotFound& e) {
return "";
}
}
std::string getArchiveTitle(const zim::Archive& archive) {
std::string value = getMetadata(archive, "Title");
if (value.empty()) {
value = getLastPathElement(archive.getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
std::string getMetaDescription(const zim::Archive& archive) {
std::string value;
value = getMetadata(archive, "Description");
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
value = getMetadata(archive, "Subtitle");
}
return value;
}
std::string getMetaTags(const zim::Archive& archive, bool original) {
std::string tags_str = getMetadata(archive, "Tags");
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
std::string getMetaName(const zim::Archive& archive) {
return getMetadata(archive, "Name");
}
std::string getMetaDate(const zim::Archive& archive) {
return getMetadata(archive, "Date");
}
std::string getMetaCreator(const zim::Archive& archive) {
return getMetadata(archive, "Creator");
}
std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}
std::string getMetaFlavour(const zim::Archive& archive) {
return getMetadata(archive, "Flavour");
}
std::string getArchiveId(const zim::Archive& archive) {
return (std::string) archive.getUuid();
}
bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem(size);
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
// should this be in libzim
unsigned int getArchiveMediaCount(const zim::Archive& archive) {
std::map<const std::string, unsigned int> counterMap = parseArchiveCounter(archive);
unsigned int counter = 0;
for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}
return counter;
}
unsigned int getArchiveFileSize(const zim::Archive& archive) {
return archive.getFilesize() / 1024;
}
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
}
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path)
{
try {
return archive.getEntryByPath(path);
} catch (zim::EntryNotFound& e) {
if (path.empty() || path == "/") {
return archive.getMainEntry();
}
}
throw zim::EntryNotFound("Cannot find entry for non empty path");
}
MimeCounterType parseArchiveCounter(const zim::Archive& archive) {
try {
auto counterContent = archive.getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
}
} // kiwix

View File

@@ -1,59 +0,0 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ARCHIVETOOLS_H
#define KIWIX_ARCHIVETOOLS_H
#include <zim/archive.h>
#include <tools/otherTools.h>
/**
* This file contains all the functions that would make handling data related to
* an archive easier.
**/
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name);
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
std::string getMetaFlavour(const zim::Archive& archive);
std::string getArchiveId(const zim::Archive& archive);
bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
std::string& content, std::string& mimeType);
unsigned int getArchiveMediaCount(const zim::Archive& archive);
unsigned int getArchiveFileSize(const zim::Archive& archive);
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
MimeCounterType parseArchiveCounter(const zim::Archive& archive);
}
#endif

View File

@@ -17,14 +17,8 @@
* MA 02110-1301, USA.
*/
// Implement function declared in tools.h and tools/otherTools.h
#include "tools.h"
#include "tools/otherTools.h"
#include <algorithm>
#include <iomanip>
#ifdef _WIN32
#include <windows.h>
#else
@@ -37,8 +31,6 @@
#include <sstream>
#include <pugixml.hpp>
#include <zim/uuid.h>
static std::map<std::string, std::string> codeisomapping {
{ "aa", "aar" },
@@ -288,95 +280,3 @@ bool kiwix::convertStrToBool(const std::string& value)
throw std::domain_error(ss.str());
}
namespace
{
// The counter metadata format is a list of item separated by a `;` :
// item0;item1;item2
// Each item is a "tuple" mimetype=number.
// However, the mimetype may contains parameters:
// text/html;raw=true;foo=bar
// So the final format may be complex to parse:
// key0=value0;key1;foo=bar=value1;key2=value2
typedef kiwix::MimeCounterType::value_type MimetypeAndCounter;
std::string readFullMimetypeAndCounterString(std::istream& in)
{
std::string mtcStr, params;
getline(in, mtcStr, ';');
if ( mtcStr.find('=') == std::string::npos )
{
do
{
if ( !getline(in, params, ';' ) )
return std::string();
mtcStr += ";" + params;
}
while ( std::count(params.begin(), params.end(), '=') != 2 );
}
return mtcStr;
}
MimetypeAndCounter parseASingleMimetypeCounter(const std::string& s)
{
const std::string::size_type k = s.find_last_of("=");
if ( k != std::string::npos )
{
const std::string mimeType = s.substr(0, k);
std::istringstream counterSS(s.substr(k+1));
unsigned int counter;
if (counterSS >> counter && counterSS.eof())
return MimetypeAndCounter{mimeType, counter};
}
return MimetypeAndCounter{"", 0};
}
} // unnamed namespace
kiwix::MimeCounterType kiwix::parseMimetypeCounter(const std::string& counterData)
{
kiwix::MimeCounterType counters;
std::istringstream ss(counterData);
while (ss)
{
const std::string mtcStr = readFullMimetypeAndCounterString(ss);
const MimetypeAndCounter mtc = parseASingleMimetypeCounter(mtcStr);
if ( !mtc.first.empty() )
counters.insert(mtc);
}
return counters;
}
std::string kiwix::gen_date_str()
{
auto now = std::time(0);
auto tm = std::localtime(&now);
std::stringstream is;
is << std::setw(2) << std::setfill('0')
<< 1900+tm->tm_year << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mon+1 << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
return is.str();
}
std::string kiwix::gen_uuid(const std::string& s)
{
return kiwix::to_string(zim::Uuid::generate(s));
}
std::string kiwix::render_template(const std::string& template_str, kainjow::mustache::data data)
{
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data urlencode{kainjow::mustache::lambda2{
[](const std::string& str,const kainjow::mustache::renderer& r) { return urlEncode(r(str), true); }}};
data.set("urlencoded", urlencode);
std::stringstream ss;
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}

View File

@@ -17,12 +17,8 @@
* MA 02110-1301, USA.
*/
// Implement method defined in <kiwix/tools.h> and "tools/pathTools.h"
#include "tools.h"
#include "tools/pathTools.h"
#include <stdexcept>
#ifdef __APPLE__
#include <limits.h>
#include <mach-o/dyld.h>
@@ -46,22 +42,19 @@
#include <algorithm>
#ifdef _WIN32
# define SEPARATOR "\\"
# include <io.h>
#define SEPARATOR "\\"
#else
# define SEPARATOR "/"
# include <unistd.h>
# include <sys/stat.h>
#define SEPARATOR "/"
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdlib.h>
#ifndef PATH_MAX
#define PATH_MAX 1024
#endif
#ifdef _WIN32
std::string WideToUtf8(const std::wstring& wstr)
{
@@ -80,22 +73,16 @@ std::wstring Utf8ToWide(const std::string& str)
}
#endif
bool kiwix::isRelativePath(const std::string& path)
bool isRelativePath(const std::string& path)
{
#ifdef _WIN32
if (path.size() < 3 ) {
return true;
}
if (path.substr(1, 2) == ":\\" || path.substr(0, 2) == "\\\\") {
return false;
}
return true;
return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
#else
return path.empty() || path.substr(0, 1) == "/" ? false : true;
#endif
}
std::vector<std::string> normalizeParts(std::vector<std::string>& parts, bool absolute)
std::vector<std::string> normalizeParts(std::vector<std::string> parts, bool absolute)
{
std::vector<std::string> ret;
#ifdef _WIN32
@@ -103,35 +90,10 @@ std::vector<std::string> normalizeParts(std::vector<std::string>& parts, bool ab
//Starts from there.
auto it = find_if(parts.rbegin(), parts.rend(),
[](const std::string& p) ->bool
{ return ((p.length() == 2 && p[1] == ':')
|| (p.length() > 2 && p[0] == '\\' && p[1] == '\\')); });
{ return p.length() == 2 && p[1] == ':'; });
if (it != parts.rend()) {
parts.erase(parts.begin(), it.base()-1);
}
// Special case for samba mount point starting with two "\\" ("\\\\samba\\foo")
if (parts.size() > 2 && parts[0].empty() && parts[1].empty()) {
parts.erase(parts.begin(), parts.begin()+2);
parts[0] = "\\\\" + parts[0];
}
// Special case if we have a samba drive not at first.
// Path is "..\\\\sambdadrive\\..\\.." So we will have an empty part.
auto previous_empty = false;
for (it = parts.rbegin(); it!=parts.rend(); it++) {
if(it->empty()) {
if (previous_empty) {
it++;
break;
} else {
previous_empty = true;
}
} else {
previous_empty = false;
}
}
if (it != parts.rend()) {
parts.erase(parts.begin(), it.base()-1);
parts[0] = "\\\\" + parts[0];
}
#endif
size_t index = 0;
@@ -175,12 +137,10 @@ std::vector<std::string> normalizeParts(std::vector<std::string>& parts, bool ab
return ret;
}
std::string kiwix::computeRelativePath(const std::string& path, const std::string& absolutePath)
std::string computeRelativePath(const std::string& path, const std::string& absolutePath)
{
auto parts = kiwix::split(path, SEPARATOR, false);
auto pathParts = normalizeParts(parts, false);
parts = kiwix::split(absolutePath, SEPARATOR, false);
auto absolutePathParts = normalizeParts(parts, true);
auto pathParts = normalizeParts(kiwix::split(path, SEPARATOR, false), false);
auto absolutePathParts = kiwix::split(absolutePath, SEPARATOR, false);
unsigned int commonCount = 0;
while (commonCount < pathParts.size()
@@ -200,27 +160,24 @@ std::string kiwix::computeRelativePath(const std::string& path, const std::strin
return ret;
}
std::string kiwix::computeAbsolutePath(const std::string& path, const std::string& relativePath)
std::string computeAbsolutePath(const std::string& path, const std::string& relativePath)
{
std::string absolutePath = path;
if (path.empty()) {
absolutePath = kiwix::getCurrentDirectory();
absolutePath = getCurrentDirectory();
}
auto parts = kiwix::split(absolutePath, SEPARATOR, false);
auto absoluteParts = normalizeParts(parts, true);
parts = kiwix::split(relativePath, SEPARATOR, false);
auto relativeParts = normalizeParts(parts, false);
auto absoluteParts = normalizeParts(kiwix::split(absolutePath, SEPARATOR, false), true);
auto relativeParts = kiwix::split(relativePath, SEPARATOR, false);
absoluteParts.insert(absoluteParts.end(), relativeParts.begin(), relativeParts.end());
auto ret = kiwix::join(normalizeParts(absoluteParts, true), SEPARATOR);
return ret;
}
std::string kiwix::removeLastPathElement(const std::string& path)
std::string removeLastPathElement(const std::string& path)
{
auto parts_ = kiwix::split(path, SEPARATOR, false);
auto parts = normalizeParts(parts_, false);
auto parts = normalizeParts(kiwix::split(path, SEPARATOR, false), false);
if (!parts.empty()) {
parts.pop_back();
}
@@ -228,7 +185,7 @@ std::string kiwix::removeLastPathElement(const std::string& path)
return ret;
}
std::string kiwix::appendToDirectory(const std::string& directoryPath, const std::string& filename)
std::string appendToDirectory(const std::string& directoryPath, const std::string& filename)
{
std::string newPath = directoryPath;
if (!directoryPath.empty() && directoryPath.back() != SEPARATOR[0]) {
@@ -238,10 +195,9 @@ std::string kiwix::appendToDirectory(const std::string& directoryPath, const std
return newPath;
}
std::string kiwix::getLastPathElement(const std::string& path)
std::string getLastPathElement(const std::string& path)
{
auto parts_ = kiwix::split(path, SEPARATOR);
auto parts = normalizeParts(parts_, false);
auto parts = normalizeParts(kiwix::split(path, SEPARATOR), false);
if (parts.empty()) {
return "";
}
@@ -269,40 +225,21 @@ std::string getFileSizeAsString(const std::string& path)
return convert.str();
}
std::string kiwix::getFileContent(const std::string& path)
std::string getFileContent(const std::string& path)
{
#ifdef _WIN32
auto wpath = Utf8ToWide(path);
auto fd = _wopen(wpath.c_str(), _O_RDONLY | _O_BINARY);
#else
auto fd = open(path.c_str(), O_RDONLY);
#endif
std::ifstream f(path, std::ios::in|std::ios::ate);
std::string content;
if (fd != -1) {
#ifdef _WIN32
auto size = _lseeki64(fd, 0, SEEK_END);
#else
auto size = lseek(fd, 0, SEEK_END);
#endif
content.resize(size);
#ifdef _WIN32
_lseeki64(fd, 0, SEEK_SET);
#else
lseek(fd, 0, SEEK_SET);
#endif
auto p = (char*)content.data();
while (size) {
auto readsize = size > 2048 ? 2048 : size;
readsize = ::read(fd, p, readsize);
p += readsize;
size -= readsize;
}
close(fd);
if (f.is_open()) {
auto size = f.tellg();
content.reserve(size);
f.seekg(0, std::ios::beg);
content.assign((std::istreambuf_iterator<char>(f)),
std::istreambuf_iterator<char>());
}
return content;
}
bool kiwix::fileExists(const std::string& path)
bool fileExists(const std::string& path)
{
#ifdef _WIN32
return PathFileExistsW(Utf8ToWide(path).c_str());
@@ -341,7 +278,7 @@ std::string makeTmpDirectory()
_wmkdir(ctmp);
return WideToUtf8(ctmp);
#else
char _template_array[] = {"/tmp/libkiwix_XXXXXX"};
char _template_array[] = {"/tmp/kiwix-lib_XXXXXX"};
std::string dir = mkdtemp(_template_array);
return dir;
#endif
@@ -350,25 +287,25 @@ std::string makeTmpDirectory()
/* Try to create a link and if does not work then make a copy */
bool copyFile(const std::string& sourcePath, const std::string& destPath)
{
#ifdef _WIN32
return CopyFileW(Utf8ToWide(sourcePath).c_str(), Utf8ToWide(destPath).c_str(), 1);
#else
try {
#ifndef _WIN32
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
#endif
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
outfile << infile.rdbuf();
#ifndef _WIN32
}
#endif
} catch (std::exception& e) {
std::cerr << e.what() << std::endl;
return false;
}
return true;
#endif
}
std::string kiwix::getExecutablePath(bool realPathOnly)
std::string getExecutablePath(bool realPathOnly)
{
if (!realPathOnly) {
char* cAppImage = ::getenv("APPIMAGE");
@@ -404,25 +341,14 @@ std::string kiwix::getExecutablePath(bool realPathOnly)
bool writeTextFile(const std::string& path, const std::string& content)
{
#ifdef _WIN32
auto wpath = Utf8ToWide(path);
auto fd = _wopen(wpath.c_str(), _O_WRONLY | _O_CREAT | _O_TRUNC, S_IWRITE);
#else
auto fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
#endif
if (fd == -1)
return false;
if (write(fd, content.c_str(), content.size()) != (long)content.size()) {
close(fd);
return false;
}
close(fd);
std::ofstream file;
file.open(path.c_str());
file << content;
file.close();
return true;
}
std::string kiwix::getCurrentDirectory()
std::string getCurrentDirectory()
{
#ifdef _WIN32
wchar_t* a_cwd = _wgetcwd(NULL, 0);
@@ -436,50 +362,32 @@ std::string kiwix::getCurrentDirectory()
return ret;
}
std::string kiwix::getDataDirectory()
std::string getDataDirectory()
{
// Try to get the dataDir from the `KIWIX_DATA_DIR` env var
#ifdef _WIN32
wchar_t* cDataDir = ::_wgetenv(L"KIWIX_DATA_DIR");
if (cDataDir != nullptr) {
return WideToUtf8(cDataDir);
}
char* cDataDir = ::getenv("APPDATA");
#else
char* cDataDir = ::getenv("KIWIX_DATA_DIR");
if (cDataDir != nullptr) {
return cDataDir;
}
#endif
// Compute the dataDir from the user directory.
std::string dataDir;
#ifdef _WIN32
cDataDir = ::_wgetenv(L"APPDATA");
if (cDataDir == nullptr)
cDataDir = ::_wgetenv(L"USERPROFILE");
if (cDataDir != nullptr)
dataDir = WideToUtf8(cDataDir);
#else
cDataDir = ::getenv("XDG_DATA_HOME");
if (cDataDir != nullptr) {
dataDir = cDataDir;
} else {
cDataDir = ::getenv("HOME");
if (cDataDir != nullptr) {
dataDir = cDataDir;
dataDir = appendToDirectory(dataDir, ".local");
dataDir = appendToDirectory(dataDir, "share");
}
}
#endif
std::string dataDir = cDataDir==nullptr ? "" : cDataDir;
if (!dataDir.empty()) {
dataDir = appendToDirectory(dataDir, "kiwix");
makeDirectory(dataDir);
return dataDir;
}
// Let's use the currentDirectory
return getCurrentDirectory();
#ifdef _WIN32
cDataDir = ::getenv("USERPROFILE");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
#else
cDataDir = ::getenv("XDG_DATA_HOME");
dataDir = cDataDir==nullptr ? "" : cDataDir;
if (dataDir.empty()) {
cDataDir = ::getenv("HOME");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
dataDir = appendToDirectory(dataDir, ".local");
dataDir = appendToDirectory(dataDir, "share");
}
#endif
auto ret = appendToDirectory(dataDir, "kiwix");
return ret;
}
static std::map<std::string, std::string> extMimeTypes = {
@@ -505,7 +413,7 @@ static std::map<std::string, std::string> extMimeTypes = {
};
/* Try to get the mimeType from the file extension */
std::string kiwix::getMimeTypeForFile(const std::string& filename)
std::string getMimeTypeForFile(const std::string& filename)
{
std::string mimeType = "text/plain";
auto pos = filename.find_last_of(".");
@@ -526,3 +434,4 @@ std::string kiwix::getMimeTypeForFile(const std::string& filename)
return mimeType;
}

View File

@@ -18,6 +18,7 @@
*/
#include <tools/regexTools.h>
#include <tools/lock.h>
#include <unicode/regex.h>
#include <unicode/ucnv.h>
@@ -25,10 +26,10 @@
#include <memory>
#include <map>
#include <stdexcept>
#include <mutex>
#include <pthread.h>
std::map<std::string, std::shared_ptr<icu::RegexPattern>> regexCache;
static std::mutex regexLock;
static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, icu::UnicodeString& content)
{
@@ -38,7 +39,7 @@ std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, icu::U
pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
// Redo the search with a lock to avoid race condition.
std::lock_guard<std::mutex> l(regexLock);
kiwix::Lock l(&regexLock);
try {
pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
@@ -94,22 +95,3 @@ std::string appendToFirstOccurence(const std::string& content,
return content;
}
std::string prependToFirstOccurence(const std::string& content,
const std::string& regex,
const std::string& replacement)
{
ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildMatcher(regex, ucontent);
if (matcher->find()) {
UErrorCode status = U_ZERO_ERROR;
ucontent.insert(matcher->start(status), ureplacement);
std::string tmp;
ucontent.toUTF8String(tmp);
return tmp;
}
return content;
}

View File

@@ -17,11 +17,9 @@
* MA 02110-1301, USA.
*/
// Implement function declared in tools.h and tools/stringTools.h
#include "tools.h"
#include "tools/stringTools.h"
#include <tools/stringTools.h>
#include "tools/pathTools.h"
#include <tools/pathTools.h>
#include <unicode/normlzr.h>
#include <unicode/rep.h>
#include <unicode/translit.h>
@@ -270,8 +268,7 @@ std::string kiwix::urlDecode(const std::string& value, bool component)
/* Split string in a token array */
std::vector<std::string> kiwix::split(const std::string& str,
const std::string& delims,
bool dropEmpty,
bool keepDelim)
bool trimEmpty)
{
std::string::size_type lastPos = 0;
std::string::size_type pos = 0;
@@ -279,17 +276,14 @@ std::vector<std::string> kiwix::split(const std::string& str,
while( (pos = str.find_first_of(delims, lastPos)) < str.length() )
{
auto token = str.substr(lastPos, pos - lastPos);
if (!dropEmpty || !token.empty()) {
if (!trimEmpty || !token.empty()) {
tokens.push_back(token);
}
if (keepDelim) {
tokens.push_back(str.substr(pos, 1));
}
lastPos = pos + 1;
}
auto token = str.substr(lastPos);
if (!dropEmpty || !token.empty()) {
if (!trimEmpty || !token.empty()) {
tokens.push_back(token);
}
return tokens;
@@ -397,11 +391,3 @@ bool kiwix::startsWith(const std::string& base, const std::string& start)
&& std::equal(start.begin(), start.end(), base.begin());
}
std::vector<std::string> kiwix::getTitleVariants(const std::string& title) {
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}

View File

@@ -69,7 +69,6 @@ GETTER(jstring, getDate)
GETTER(jstring, getUrl)
GETTER(jstring, getName)
GETTER(jstring, getFlavour)
GETTER(jstring, getCategory)
GETTER(jstring, getTags)
GETTER(jlong, getArticleCount)
GETTER(jlong, getMediaCount)

View File

@@ -28,7 +28,11 @@
#include "utils.h"
std::mutex globalLock;
#if __ANDROID__
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
#else
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
#endif
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIICU_setDataDirectory(
JNIEnv* env, jclass kclass, jstring dirStr)

View File

@@ -20,6 +20,7 @@
#include <jni.h>
#include <zim/file.h>
#include <exception>
#include "org_kiwix_kiwixlib_JNIKiwixReader.h"
@@ -45,72 +46,6 @@ JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
}
}
namespace
{
int jni2fd(const jobject& fdObj, JNIEnv* env)
{
jclass class_fdesc = env->FindClass("java/io/FileDescriptor");
jfieldID field_fd = env->GetFieldID(class_fdesc, "fd", "I");
if ( field_fd == NULL )
{
env->ExceptionClear();
// Under Android the (private) 'fd' field of java.io.FileDescriptor has been
// renamed to 'descriptor'. See, for example,
// https://android.googlesource.com/platform/libcore/+/refs/tags/android-8.1.0_r1/ojluni/src/main/java/java/io/FileDescriptor.java#55
field_fd = env->GetFieldID(class_fdesc, "descriptor", "I");
}
return env->GetIntField(fdObj, field_fd);
}
} // unnamed namespace
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderByFD(
JNIEnv* env, jobject obj, jobject fdObj)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderByFD() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderEmbedded(
JNIEnv* env, jobject obj, jobject fdObj, jlong offset, jlong size)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd, offset, size);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderEmbedded() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
{
@@ -391,22 +326,22 @@ JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
JNIEnv* env, jobject obj, jstring url)
{
jclass daiClass = env->FindClass("org/kiwix/kiwixlib/DirectAccessInfo");
jmethodID daiInitMethod = env->GetMethodID(daiClass, "<init>", "()V");
jobject dai = env->NewObject(daiClass, daiInitMethod);
setDaiObjValue("", 0, dai, env);
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair");
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V");
jobject pair = env->NewObject(classPair, midPairinit);
setPairObjValue("", 0, pair, env);
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
auto part_info = entry.getDirectAccessInfo();
setDaiObjValue(part_info.first, part_info.second, dai, env);
setPairObjValue(part_info.first, part_info.second, pair, env);
} catch (std::exception& e) {
LOG("Unable to get direct access info for url: %s", cUrl.c_str());
LOG(e.what());
}
return dai;
return pair;
}
JNIEXPORT jboolean JNICALL
@@ -420,12 +355,9 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
unsigned int cCount = jni2c(count, env);
try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
retVal = JNI_TRUE;
}
#pragma GCC diagnostic pop
} catch (std::exception& e) {
LOG("Unable to get search results for pattern: %s", cPrefix.c_str());
LOG(e.what());
@@ -445,14 +377,11 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
std::string cUrl;
try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->getNextSuggestion(cTitle, cUrl)) {
setStringObjValue(cTitle, titleObj, env);
setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE;
}
#pragma GCC diagnostic pop
} catch (std::exception& e) {
LOG("Unable to get next suggestion");
LOG(e.what());

View File

@@ -19,6 +19,7 @@
*/
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixSearcher.h"
#include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h"

View File

@@ -20,6 +20,7 @@
#include <jni.h>
#include <zim/file.h>
#include "org_kiwix_kiwixlib_JNIKiwixServer.h"
#include "tools/base64.h"
@@ -84,12 +85,6 @@ Java_org_kiwix_kiwixlib_JNIKiwixServer_setTaskbar(JNIEnv* env, jobject obj, jboo
SERVER->setTaskbar(withTaskbar, withLibraryButton);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setBlockExternalLinks(JNIEnv* env, jobject obj, jboolean blockExternalLinks)
{
SERVER->setBlockExternalLinks(blockExternalLinks);
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_start(JNIEnv* env, jobject obj)
{

View File

@@ -19,6 +19,7 @@
#include <jni.h>
#include <zim/file.h>
#include "org_kiwix_kiwixlib_Manager.h"
#include "manager.h"

View File

@@ -12,7 +12,7 @@ java_sources = files([
'org/kiwix/kiwixlib/JNIKiwixString.java',
'org/kiwix/kiwixlib/JNIKiwixBool.java',
'org/kiwix/kiwixlib/JNIKiwixException.java',
'org/kiwix/kiwixlib/DirectAccessInfo.java'
'org/kiwix/kiwixlib/Pair.java'
])
kiwix_jni = custom_target('jni',

View File

@@ -24,7 +24,6 @@ public class Book
public native String getUrl();
public native String getName();
public native String getFlavour();
public native String getCategory();
public native String getTags();
/**
* Return the value associated to the tag tagName

View File

@@ -24,8 +24,7 @@ import org.kiwix.kiwixlib.JNIKiwixException;
import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixInt;
import org.kiwix.kiwixlib.JNIKiwixSearcher;
import org.kiwix.kiwixlib.DirectAccessInfo;
import java.io.FileDescriptor;
import org.kiwix.kiwixlib.Pair;
public class JNIKiwixReader
{
@@ -103,13 +102,13 @@ public class JNIKiwixReader
* the zim file (or zim part) and directly read the content from it (and so
* bypassing the libzim).
*
* Return a `DirectAccessInfo` (filename, offset) where the content is located.
* Return a `Pair` (filename, offset) where the content is located.
*
* If the content cannot be directly accessed (content is compressed or zim
* file is cut in the middle of the content), the filename is an empty string
* and offset is zero.
*/
public native DirectAccessInfo getDirectAccessInformation(String url);
public native Pair getDirectAccessInformation(String url);
public native boolean searchSuggestions(String prefix, int count);
@@ -152,31 +151,11 @@ public class JNIKiwixReader
throw new JNIKiwixException("Cannot open zimfile "+filename);
}
}
public JNIKiwixReader(FileDescriptor fd) throws JNIKiwixException
{
nativeHandle = getNativeReaderByFD(fd);
if (nativeHandle == 0) {
throw new JNIKiwixException("Cannot open zimfile by fd "+fd.toString());
}
}
public JNIKiwixReader(FileDescriptor fd, long offset, long size)
throws JNIKiwixException
{
nativeHandle = getNativeReaderEmbedded(fd, offset, size);
if (nativeHandle == 0) {
throw new JNIKiwixException(String.format("Cannot open embedded zimfile (fd=%s, offset=%d, size=%d)", fd, offset, size));
}
}
public JNIKiwixReader() {
}
public native void dispose();
private native long getNativeReader(String filename);
private native long getNativeReaderByFD(FileDescriptor fd);
private native long getNativeReaderEmbedded(FileDescriptor fd, long offset, long size);
private long nativeHandle;
}

View File

@@ -34,8 +34,6 @@ public class JNIKiwixServer
public native void setTaskbar(boolean withTaskBar, boolean witLibraryButton);
public native void setBlockExternalLinks(boolean blockExternalLinks);
public native boolean start();
public native void stop();

View File

@@ -19,7 +19,7 @@
package org.kiwix.kiwixlib;
public class DirectAccessInfo
public class Pair
{
public String filename;
public long offset;

View File

@@ -1,19 +0,0 @@
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:opds="http://opds-spec.org/2010/catalog">
<id>00000000-0000-0000-0000-000000000000</id>
<entry>
<title>Test ZIM file</title>
<id>urn:uuid:86c91e51-55bf-8882-464e-072aca37a3e8</id>
<icon>/meta?name=favicon&amp;content=small</icon>
<updated>2020-11-27:00::00:Z</updated>
<language>en</language>
<summary>This is a ZIM file used in libzim unit-tests</summary>
<tags>unit;test</tags>
<link type="text/html" href="/small" />
<author>
<name>Kiwix</name>
</author>
<link rel="http://opds-spec.org/acquisition/open-access" type="application/x-zim" href="http://localhost/small.zim" length="78982" />
<link rel="http://opds-spec.org/image/thumbnail" type="image/png" href="/meta?name=favicon&amp;content=small" />
</entry>
</feed>

View File

@@ -1,37 +0,0 @@
#!/usr/bin/bash
# This script compiles and runs the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
die "You must give the path to the kiwixlib.jar as first argument"
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
die "You must give the path to directory containing libkiwix.so as second argument"
fi
KIWIX_LIB_JAR=$(readlink -f "$KIWIX_LIB_JAR")
KIWIX_LIB_DIR=$(readlink -f "$KIWIX_LIB_DIR")
TEST_SOURCE_DIR=$(dirname "$(readlink -f $0)")
cd "$TEST_SOURCE_DIR"
javac -g -d . -s . -cp "junit-4.13.jar:$KIWIX_LIB_JAR" test.java \
|| die "Compilation failed"
java -Djava.library.path="$KIWIX_LIB_DIR" \
-cp "junit-4.13.jar:hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:." \
org.junit.runner.JUnitCore test \
|| die "Unit test failed"

View File

@@ -0,0 +1,26 @@
#!/usr/bin/bash
# This script compile the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
echo "You must give the path to the kiwixlib.jar as first argument"
exit 1
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
echo "You must give the path to directory containing libkiwix.so as second argument"
exit 1
fi
TEST_SOURCE_DIR=$(dirname $(readlink -f $0))
javac -g -d . -s . -cp $TEST_SOURCE_DIR/junit-4.13.jar:$KIWIX_LIB_JAR $TEST_SOURCE_DIR/test.java
java -Djava.library.path=$KIWIX_LIB_DIR -cp $TEST_SOURCE_DIR/junit-4.13.jar:$TEST_SOURCE_DIR/hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:. org.junit.runner.JUnitCore test

View File

@@ -1,28 +0,0 @@
#!/usr/bin/env bash
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
cd "$(dirname "$0")"
rm -f small.zim
zimwriterfs --withoutFTIndex \
-w main.html \
-f favicon.png \
-l en \
-t "Test ZIM file" \
-d "N/A" \
-c "N/A" \
-p "N/A" \
small_zimfile_data \
small.zim \
&& echo 'small.zim was successfully created' \
|| die 'Failed to create small.zim'
printf "BEGINZIM" > small.zim.embedded \
&& cat small.zim >> small.zim.embedded \
&& printf "ENDZIM" >> small.zim.embedded \
&& echo 'small.zim.embedded was successfully created' \
|| die 'Failed to create small.zim.embedded'

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

View File

@@ -1,11 +0,0 @@
<html>
<head>
<meta charset="UTF-8">
<title>Test ZIM file</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
Test ZIM file
</body>
</html>

View File

@@ -10,143 +10,36 @@ static {
System.loadLibrary("kiwix");
}
private static byte[] getFileContent(String path)
private static String getCatalogContent()
throws IOException
{
File file = new File(path);
DataInputStream in = new DataInputStream(
new BufferedInputStream(
new FileInputStream(file)));
byte[] data = new byte[(int)file.length()];
in.read(data);
return data;
}
private static byte[] getFileContentPartial(String path, int offset, int size)
throws IOException
{
File file = new File(path);
DataInputStream in = new DataInputStream(
new BufferedInputStream(
new FileInputStream(file)));
byte[] data = new byte[size];
in.skipBytes(offset);
in.read(data, 0, size);
return data;
}
private static String getTextFileContent(String path)
throws IOException
{
return new String(getFileContent(path));
BufferedReader reader = new BufferedReader(new FileReader("catalog.xml"));
String line;
StringBuilder sb = new StringBuilder();
while ((line = reader.readLine()) != null)
{
sb.append(line + "\n");
}
reader.close();
return sb.toString();
}
@Test
public void testReader()
throws JNIKiwixException, IOException
{
JNIKiwixReader reader = new JNIKiwixReader("small.zim");
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderByFd()
throws JNIKiwixException, IOException
{
FileInputStream fis = new FileInputStream("small.zim");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderWithAnEmbeddedArchive()
throws JNIKiwixException, IOException
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim.embedded");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD(), 8, plainArchive.length());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testLibrary()
public void testSome()
throws IOException
{
Library lib = new Library();
Manager manager = new Manager(lib);
String content = getTextFileContent("catalog.xml");
manager.readOpds(content, "http://localhost");
assertEquals(lib.getBookCount(true, true), 1);
String content = getCatalogContent();
manager.readOpds(content, "https://library.kiwix.org");
assertEquals(lib.getBookCount(true, true), 10);
String[] bookIds = lib.getBooksIds();
assertEquals(bookIds.length, 1);
assertEquals(bookIds.length, 10);
Book book = lib.getBookById(bookIds[0]);
assertEquals(book.getTitle(), "Test ZIM file");
assertEquals(book.getTags(), "unit;test");
assertEquals(book.getFaviconUrl(), "http://localhost/meta?name=favicon&content=small");
assertEquals(book.getUrl(), "http://localhost/small.zim");
assertEquals(book.getTitle(), "Wikisource");
assertEquals(book.getTags(), "wikisource;_category:wikisource;_pictures:no;_videos:no;_details:yes;_ftindex:yes");
assertEquals(book.getFaviconUrl(), "https://library.kiwix.org/meta?name=favicon&content=wikisource_fr_all_nopic_2020-01");
assertEquals(book.getUrl(), "http://download.kiwix.org/zim/wikisource/wikisource_fr_all_nopic_2020-01.zim.meta4");
}
static

View File

@@ -24,7 +24,7 @@
#include <jni.h>
#include <mutex>
#include <pthread.h>
#include <string>
#include <vector>
#include <iostream>
@@ -36,7 +36,7 @@
#define LOG(...)
#endif
extern std::mutex globalLock;
extern pthread_mutex_t globalLock;
template<typename T>
void setPtr(JNIEnv* env, jobject thisObj, T* ptr)
@@ -88,10 +88,22 @@ inline jobjectArray createArray(JNIEnv* env, size_t length, const std::string& t
return env->NewObjectArray(length, c, NULL);
}
class Lock : public std::unique_lock<std::mutex>
class Lock
{
protected:
pthread_mutex_t* lock;
public:
Lock() : std::unique_lock<std::mutex>(globalLock) { }
Lock() : lock(&globalLock) { pthread_mutex_lock(lock); }
Lock(const Lock&) = delete;
Lock& operator=(const Lock&) = delete;
Lock(Lock&& other) : lock(&globalLock) { other.lock = nullptr; }
virtual ~Lock()
{
if (lock) {
pthread_mutex_unlock(lock);
}
}
};
template <class T>
@@ -246,7 +258,7 @@ inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
env->SetIntField(obj, objFid, c2jni(value, env));
}
inline void setDaiObjValue(const std::string& filename, const long offset,
inline void setPairObjValue(const std::string& filename, const long offset,
const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);

View File

@@ -3,7 +3,6 @@
#ifndef KIWIX_XMLRPC_H_
#define KIWIX_XMLRPC_H_
#include <stdexcept>
#include <tools/otherTools.h>
#include <pugixml.hpp>

View File

@@ -1,10 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>Zim catalog search</ShortName>
<Description>Search zim files in the catalog.</Description>
<Url type="application/atom+xml;profile=opds-catalog;kind=acquisition"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:k="http://kiwix.org/opensearchextension/1.0"
indexOffset="0"
template="{{endpoint_root}}/entries?q={searchTerms?}&lang={language?}&name={k:name?}&tag={k:tag?}&maxsize={k:maxsize?}&count={count?}&start={startIndex?}"/>
</OpenSearchDescription>

Some files were not shown because too many files have changed in this diff Show More