Compare commits

...

109 Commits

Author SHA1 Message Date
Matthieu Gautier
4cdae3ca98 New version 9.2.1 2020-06-02 10:18:12 +02:00
Matthieu Gautier
7dcaeed33a Merge pull request #360 from kiwix/http_byte_range 2020-06-01 17:41:59 +02:00
Veloman Yunkan
f52b220d01 Dropped RequestContext::has_range() 2020-05-26 14:10:26 +04:00
Veloman Yunkan
50a850f3a9 Fixed a comment 2020-05-26 14:04:18 +04:00
Veloman Yunkan
886ae17274 Fixed a CodeFactor issue 2020-05-26 13:59:47 +04:00
Veloman Yunkan
a9b6d481cc ServerTest.RangeHasPrecedenceOverCompression 2020-05-26 13:58:20 +04:00
Veloman Yunkan
85d6daabac Rolled back minor unneeded changes 2020-05-26 13:10:50 +04:00
Veloman Yunkan
5f1918d005 Split a long line 2020-05-26 13:04:03 +04:00
Veloman Yunkan
16bd79fa1b Final clean-up of byte_range.{h,cpp} 2020-05-26 12:50:08 +04:00
Veloman Yunkan
c2ebdefe8d Handling of unsatisfiable ranges 2020-05-26 02:11:26 +04:00
Veloman Yunkan
37032892a4 Fixed compilation error under win32_*
ERROR is a macro under Windows
2020-05-26 01:58:17 +04:00
Veloman Yunkan
6b43438b74 Fixed compilation error under native_dyn
MHD_HTTP_RANGE_NOT_SATISFIABLE is not defined in the older version of
libmicrohttpd (that is used under CI/Linux native_dyn).
2020-05-26 01:54:36 +04:00
Veloman Yunkan
7301bf89bb Some refactoring of byte-range parsing 2020-05-26 01:50:29 +04:00
Veloman Yunkan
ff23b28e7c Removed unnecessary qualifier 2020-05-26 01:41:37 +04:00
Veloman Yunkan
931e95f391 Invalid byte ranges result in 416 responses 2020-05-26 01:40:07 +04:00
Veloman Yunkan
f7571b5b69 Content-Range header is set only for partial content 2020-05-25 17:42:18 +04:00
Veloman Yunkan
801ad18a89 ByteRange::resolve() 2020-05-25 17:27:35 +04:00
Veloman Yunkan
67a347c0c4 Moved byte-range parsing to byte_range.cpp 2020-05-25 17:21:10 +04:00
Veloman Yunkan
693905eb68 Default constructed ByteRange is a full range 2020-05-25 17:17:56 +04:00
Veloman Yunkan
f3e79c6b4c Introduced src/server/byte_range.cpp 2020-05-25 16:43:44 +04:00
Veloman Yunkan
52f207eaa6 Support for single-ended byte ranges 2020-05-25 16:37:01 +04:00
Veloman Yunkan
67294217a8 ByteRange::Kind 2020-05-25 16:23:44 +04:00
Veloman Yunkan
d111a40ce8 Response::m_byteRange 2020-05-23 20:35:22 +04:00
Veloman Yunkan
0c5bb3fcfe Moved ByteRange to a header file of its own 2020-05-23 20:08:53 +04:00
Veloman Yunkan
3fba8c20a0 Converted RequestContext::ByteRange to a class
Also renamed the `range_pair` data member of `RequestContext` to `byteRange_`
2020-05-23 19:59:47 +04:00
Veloman Yunkan
54db6049b7 Byte-range parsing not exposed in the header file 2020-05-23 18:58:19 +04:00
Veloman Yunkan
81c38d6b2b parse_byte_range() without side-effects 2020-05-23 18:53:16 +04:00
Veloman Yunkan
e6a86c02ae Got rid of RequestContext::accept_range 2020-05-23 17:15:42 +04:00
Veloman Yunkan
a0f7f32570 Re-ordered function definitions 2020-05-23 17:11:26 +04:00
Veloman Yunkan
c39fce8839 RequestContext::parse_byte_range() 2020-05-23 17:09:51 +04:00
Veloman Yunkan
bd2d0bc489 Unit-test for valid single range requests 2020-05-22 17:39:00 +04:00
Veloman Yunkan
de37489c53 Range header starts with a unit spec
After this commit valid ranges of the form "bytes=firstbyte-lastbyte" should
be handled correctly.
2020-05-22 17:17:31 +04:00
Veloman Yunkan
2a35a86de6 Fixed the size value used creating a response
In case of a partial response the size of the response is different
from the served entry size.
2020-05-22 16:49:35 +04:00
Veloman Yunkan
0a30a77c08 Handling of out of bound byte ranges 2020-05-22 16:46:38 +04:00
Veloman Yunkan
1a99bacfe3 Byte ranges are inclusive
The second component of a byte range, if present, designates the
index of the last byte to be included in the partial response.
2020-05-22 16:30:43 +04:00
Matthieu Gautier
2bf35f1651 New version 9.2 2020-05-18 15:23:01 +02:00
Matthieu Gautier
48cc277468 Merge pull request #356 from kiwix/fix_test_httplib 2020-05-18 14:44:18 +02:00
Matthieu Gautier
d8498fd655 Do not build server test on windows.
On the appveyor CI, the link of the `server` test fails with
`libmicrohttpd`.

Do not build the test on windows to not break the CI.
2020-05-18 13:13:17 +02:00
Matthieu Gautier
7ec8e33b83 Fix include of httplib.h on windows.
On windows, `httplib.h` must be included before `windows.h`
We do not include directly `windows.h` in the test but it is included
indirectly by other headers.

Let's include httplib first.
2020-05-18 11:16:25 +02:00
Kelson
8eb2d0c2a1 Explain how to run automated tests 2020-05-18 08:37:51 +02:00
Kelson
5995cc276d Merge pull request #355 from kiwix/more-often-http-compression
Add two OPDS related mime-types to compress for HTTP
2020-05-18 08:33:44 +02:00
Kelson
94c2ab4395 Add two OPDS related mime-types to compress for HTTP 2020-05-18 08:19:51 +02:00
Kelson
15179db945 Merge pull request #354 from kiwix/small-http-header-beautification
Small HTTP header beautification
2020-05-17 21:50:26 +02:00
Kelson
0f07cab920 Small HTTP header beautification 2020-05-17 20:19:19 +02:00
Kelson
4f8b397081 Merge pull request #352 from kiwix/small-compilation-error-fix
Fix small compilation error in tests
2020-05-17 18:37:53 +02:00
Kelson
2df74d9755 Fix small compilation error in tests 2020-05-17 16:47:21 +02:00
Matthieu Gautier
0b25492edc Merge pull request #348 from kiwix/http_head_and_etag 2020-05-15 13:55:50 +02:00
Veloman Yunkan
5f0a9d0b08 Added a comment clarifying a non-obvious case 2020-05-15 15:17:04 +04:00
Veloman Yunkan
54f5dbbd35 Handling of If-None-Match conditional requests 2020-05-14 17:01:22 +04:00
Veloman Yunkan
95a5cde359 ETags are set in the response as needed
Also added server-unit tests related to ETags in the response.
2020-05-14 17:01:22 +04:00
Veloman Yunkan
3d08ef43f2 HEAD request is not rejected
libmicrohttpd handles HEAD requests by dropping the body of the response
(if any). Hence letting a HEAD request through into the code that
processes GET requests is safe.

Also added server unit-tests related to the handling of HEAD requests.
2020-05-14 17:01:22 +04:00
Matthieu Gautier
381ff186cb Merge pull request #349 from kiwix/fix_python 2020-05-04 10:55:55 +02:00
Matthieu Gautier
e32fa28a6c Use python3 instead of python.
`python` binary is not installed on all platform.
But `python3` is (because meson is python3).
And the script we launch is python3 so use the correct version.
2020-05-04 10:52:11 +02:00
Matthieu Gautier
1842e8f51c Merge pull request #345 from kiwix/server_refactoring 2020-04-29 17:38:41 +02:00
Veloman Yunkan
bfa51c2d87 Refactoring: got rid of duplicate get_mime_type() 2020-04-29 18:33:25 +04:00
Veloman Yunkan
81e781133d Refactoring: utilized is_compressible_mime_type() 2020-04-29 18:33:01 +04:00
Veloman Yunkan
9ec7757efe Refactoring: smart Response::set_entry()
Response::set_entry() was upgraded from a simple setter to a method
performing certain business logic that was previously taken care of by
InternalServer::handle_content().
2020-04-29 18:22:15 +04:00
Veloman Yunkan
7bd7ec4937 Refactoring: preparing to move some code 2020-04-29 18:22:15 +04:00
Veloman Yunkan
14d8583c83 Refactoring in InternalServer::handle_content()
Deduplicated common code found in the two branches of the last
if(){}else{} statement in InternalServer::handle_content().
2020-04-29 18:22:15 +04:00
Veloman Yunkan
a004d96cd7 Refactoring: extracted get_range_len() 2020-04-29 18:22:15 +04:00
Veloman Yunkan
21c6de2f80 Refactoring: split Response::create_mhd_response()
The changes are easier to understand in ignore-white-space mode
(git diff -w, git show -w).
2020-04-29 18:22:15 +04:00
Veloman Yunkan
a8e78f27e1 Refactoring: extracted Response::create_mhd_response() 2020-04-29 18:22:15 +04:00
Veloman Yunkan
6c7ab6ff54 Refactoring: moved local variable declarations 2020-04-29 18:21:40 +04:00
Veloman Yunkan
659ee6ba71 Refactoring: extracted InternalServer::build_redirect() 2020-04-29 16:08:10 +04:00
Veloman Yunkan
83ee8dec15 Made InternalServer::get_default_response() const 2020-04-29 16:08:10 +04:00
Veloman Yunkan
87cbbed9e3 Refactoring: extracted is_compressible_mime_type() 2020-04-29 16:08:10 +04:00
Veloman Yunkan
a058520628 Refactoring: extracted get_mime_type() 2020-04-29 16:08:10 +04:00
Veloman Yunkan
1ef5ebfb52 Refactoring: extracted InternalServer::get_reader() 2020-04-29 16:08:10 +04:00
Veloman Yunkan
bbc06931ad Refactoring: extracted get_book_name() 2020-04-29 16:08:10 +04:00
Veloman Yunkan
2d3bf9b981 Refactoring: extracted InternalServer::homepage_data()
Also typedef'ed kainjow::mustache::data as MustacheData
2020-04-29 16:08:10 +04:00
Veloman Yunkan
fd80f2a89f Refactoring: extracted fullURL2LocalURL()
Also dropped RequestContex::valid_url
2020-04-29 16:08:10 +04:00
Veloman Yunkan
abb3dec700 Refactoring: extracted str2RequestMethod() 2020-04-29 16:08:10 +04:00
Kelson
80fcbceeb3 Merge pull request #344 from kiwix/server_unittests
Server unittests
2020-04-29 14:02:57 +02:00
Veloman Yunkan
9a893a854e Revert "Server can be started on a random free port"
This change failed to build under the following platforms
due to an older version of libmicrohttpd missing support for
the MHD_DAEMON_INFO_BIND_PORT query:
- Linux (native_dyn)
- Linux (win32_dyn)
2020-04-29 15:40:02 +04:00
Veloman Yunkan
b0f65a02f2 Server can be started on a random free port
If the server is started with a port value of 0, it binds to a random
free port. The bound port can be obtained with a `getPort()` method.
2020-04-29 15:40:02 +04:00
Veloman Yunkan
9bf6d0621f Introducing 1st real unit test of the server
Added a new unit test 'test/server.cpp'. The pre-existing unit test
test/kiwixserve.cpp tests kiwixlib indirectly by running the kiwix-serve
executable which may be built with another version of the library.

Included with this commit is the cpp-httplib C++ HTTP/HTTPS library
(https://github.com/yhirose/cpp-httplib, v0.5.11).
The new file test/httplib.h was copied from
https://raw.githubusercontent.com/yhirose/cpp-httplib/v0.5.11/httplib.h
2020-04-29 15:39:18 +04:00
Veloman Yunkan
fcadacb0ad Resources are compiled as needed
Correct dependencies are set up for resource compilation and
build_always_stale is set to false.
2020-04-28 19:46:14 +04:00
Matthieu Gautier
8f07689c57 Merge pull request #346 from kiwix/unit-test 2020-04-28 11:04:58 +02:00
luddens
0630298fb9 Manager::addBookFromPathAndGetId unit test 2020-04-28 10:56:12 +02:00
Matthieu Gautier
9f61301423 New version 9.1.2 2020-04-20 15:31:14 +02:00
Matthieu Gautier
9c101daad7 Merge pull request #341 from kiwix/fix-addBookFromPathAndGetId 2020-04-20 15:27:57 +02:00
luddens
0586ef6d41 fix open external zim
Check if the parameter `pathToSave` is empty before use it otherwise the
book path is empty too, which causes crash on opening external zim files
2020-04-20 15:22:36 +02:00
Matthieu Gautier
8fc42558d3 New version 9.1.1 2020-04-17 17:36:11 +02:00
Matthieu Gautier
5d98a6964e Merge pull request #343 from kiwix/fix_datadir_windows
Fix datadir windows
2020-04-15 10:18:51 +02:00
Matthieu Gautier
9d8bf8ddcb Create the dataDirectory before returning its path. 2020-04-15 08:24:55 +02:00
Matthieu Gautier
4c8aad0e68 Do not use std::fstream has it doesn't support wchar path.
This is surprising, but C++11 fstream doesn't have a constructor
that take wchar as path.
So, on windows, we cannot open a stream on a path containing non ascii
char. VC++ provide an extension for that, but it is not standard and
g++ mingwin doesn't provide it.

So move all our write/read tools function to the plain old c versions,
using _wopen to open wide path on windows.
2020-04-14 18:13:35 +02:00
Matthieu Gautier
eb6f0f710c Correctly detetect the dataDir on windows.
We must use the wide version of the getenv to correctly handle the case
we have accents in the user directory.

This also change the default dataDirectory on windows from $APPDATA to
$APPDATA/kiwix.
2020-04-14 12:12:34 +02:00
Matthieu Gautier
d9d2a702ef Merge pull request #342 from kiwix/fix_iterator
Adapt to new libzim api.
2020-04-13 16:18:57 +02:00
Matthieu Gautier
cbf5bd57a8 Adapt to new libzim api.
It is not possible to create a iterator without argument anymore.
2020-04-13 16:06:17 +02:00
Matthieu Gautier
2bf6b04726 Update minimum version of libzim. 2020-04-08 18:03:18 +02:00
Matthieu Gautier
12a0660342 New version 9.1.0 2020-04-08 15:38:25 +02:00
Matthieu Gautier
ab478ed4cf Merge pull request #337 from kiwix/fix_book_articlecount
Fix book articlecount
2020-04-07 12:46:56 +02:00
Matthieu Gautier
533541cf45 Write the articleCount and mediaCount metadata in the OPDS stream.
This is not standard OPDS. But clients need this information.
2020-04-07 12:22:44 +02:00
Matthieu Gautier
bab0fd3d4f Correctly initialize members' value of book.
Fix #333
2020-04-07 12:21:36 +02:00
Kelson
375792a686 Merge pull request #335 from kiwix/fix_taskbar
attach taskbar to <head> instead of <head>\n
2020-04-03 16:56:55 +02:00
renaud gaudin
7155c788e2 attach taskbar to <head> instead of <head>\n
Fixed a regression introduced in block-external-links feature.

For cleaner source, the taskbar (and the block-external JS file) were both
attached to `<head>\n`.
Unfortunately, this isn't safe enough as some ZIM files might have all kinds of HTML
syntax. Sotoki for instance have no CR after head, rendering the attachment impossible.

Note: realizing this method is somehow fragile as any HTML content with extra attribute
on the `<head>` tag or without a `<head>` tag would break the taskbar and the block external feature.
2020-04-03 16:53:43 +02:00
Kelson
3ac926b39b Merge pull request #331 from kiwix/readme-mustache
add required info to README
2020-04-03 16:41:37 +02:00
renaud gaudin
ea3444e2fe add required info to README
Mustache 4 was release in late october 2019 and is not compatible with kiwix-lib as is.
Replacement in taskbar is messed-up and breaks suggestions.
Until this is fixed/adapted, inform users to use version 3.
2020-04-03 16:38:16 +02:00
Kelson
083c06d170 Merge pull request #336 from kiwix/better-ci-event
Run Github actions on any push (not only in PR)
2020-04-03 16:37:01 +02:00
Kelson
286fbba9f6 Run Github actions on any push (not only in PR) 2020-04-03 16:32:49 +02:00
Kelson
778603e1bf Use Github Actions CI badge (instead of Travis CI) 2020-04-03 16:30:42 +02:00
Kelson
411d4b0779 Merge pull request #334 from kiwix/fix-external
fix external link blocking for nested elems in link
2020-04-01 20:05:24 +02:00
renaud gaudin
bce0e8c37c fix external link blocking for nested elems in link
if a link contains nested elements like `<a href="http://somewhere"><strong>goto</strong></a>`
then the link is trigger by the `<strong />` element which has no `href` attribute.
We were thus releasing the event in this case, resulting in legitimate external links
not blocked.

We are now looking for the closest `<a />` parent (might be self) to retrieve the `href`
attribute and capture if necessary.
2020-04-01 17:50:54 +00:00
Matthieu Gautier
507002d34e Merge pull request #330 from kiwix/block-external-links
Add external links blocking in serve
2020-03-30 16:57:08 +02:00
renaud gaudin
34a8144f51 cleaned-up external link blocker page
- removed useless JS/CSS
- set specific title
2020-03-30 14:42:51 +00:00
renaud gaudin
4709a42f4f disable external links blocking on 500 handler 2020-03-30 14:42:37 +00:00
renaud gaudin
d04d9bf7f3 Unblock external link in catch page in JS code
Instead of disabling the blocking for the handler, the JS code detects it is
displaying the handler and allows external links to go through
2020-03-27 12:26:22 +00:00
renaud gaudin
412f0d9c61 moved blockExternalLink outside of taskbar
- `setBlockExternalLinks()` on server
- zero-dependency JS code
- JS script added in `inject_externallinks_blocker()`
- changed URL to `/catch/external?source=<source>`
2020-03-27 11:25:39 +00:00
renaud gaudin
0ad8bf45fc Add external links blocking in serve
In many use cases, it is not wanted to have user accidentaly click on external links
and leave the served ZIM content.
This could be because the result is unpredictible (reader not implementing this properly)
or because the serve user knows there's no backup internet connexion or because there is
an induced cost behind external links that doesn't affect served content.

using a new flag (`blockExternalLinks`) on `Response`/`setTaskBar`, a piece of JS code
is injected into the taskbar code.
This code adds a JS handler on all link click events and verifies the destination.
If the destination appears to be an external link (1), the link target is changed to
a specific URL:

```
/external?source=<original_uri>
```

(1) external is a link that's not on the same origin and starts with either `http:` `https:` or `//`.

Server implements a new handler on `/external` that displays a new page (`captured_external.html`)
which returns a generic message explaining the situation and offering to click on the link
again should the user really want to.
This is done by specifically asking `set_taskbar` to not block external requests on that page.

This approach allows integrators using a reverse proxy to handle that endpoint differently (rebrand it)

1. `Server` now has an `m_blockExternalLinks` defaulting to `false`
1. `Server.setTaskbar` is extended to support an additional bool to set the variable.
1. `Response` now has an `m_blockExternalLinks`
1. `Response` constr expects an additional bool for `blockExternalLinks`.
1. `Response.set_taskbar` is extended to support an additional bool to set the variable.
1. JNI/Java Wrapper reflects the extensions.
1. New resource file `templates/block_external.js` (included in head_part). Should it be in skin?
1. New resource file `templates/captured_external.html` for `handle_captured_external()`
1. Added a comment on `head_part.html` to help with JS insertion at the right place
1. `introduce_taskbar()` conditionnaly inserts the JS inside the taskbar
2020-03-26 12:06:36 +00:00
35 changed files with 6750 additions and 316 deletions

View File

@@ -1,6 +1,6 @@
name: CI
on: [pull_request]
on: [push]
jobs:
Macos:
@@ -85,25 +85,20 @@ jobs:
container:
image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-26"
steps:
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
- name: Checkout code
shell: python
run: |
import json
from subprocess import check_call
from os import environ
with open(environ['GITHUB_EVENT_PATH'], 'r') as f:
content = f.read()
event_data = json.loads(content)
try:
branch_ref = event_data['ref'].split('/')[-1]
except KeyError:
branch_ref = event_data['pull_request']['head']['ref']
print("Cloning branch", branch_ref)
command = [
'git', 'clone',
'https://github.com/${{github.repository}}',
'--depth=1',
'--branch', branch_ref
'--branch', '${{steps.extract_branch.outputs.branch}}'
]
check_call(command, cwd=environ['HOME'])
- name: Install deps

View File

@@ -1,3 +1,35 @@
kiwix-lib 9.2.1
===============
* Fix support of byte range request.
kiwix-lib 9.2
=============
* Add tests
* Refactoring server code.
* [SERVER] Add HEAD, Etag and If-None-Match support.
* [SERVER] Compress opds catalog answers.
kiwix-lib 9.1.2
===============
* Do not use the pathToSave if it is empty.
kiwix-lib 9.1.1
===============
* Fix the detection of the dataDirectory on windows.
kiwix-lib 9.1.0
===============
* [JAVA] Add a method to get the size of an article.
* Add a new method to the libray to get the book by path.
* Add a option to make the server blocks external link.
Links are intercepted by js an redirected to a "portail" page.
* [ODPS] Correctly handle book's articleCount and mediaCount.
kiwix-lib 9.0.1
===============

View File

@@ -7,7 +7,7 @@ GNU/Linux, macOS, Android, iOS, ...).
[![Download](https://api.bintray.com/packages/kiwix/kiwix/kiwixlib/images/download.svg)](https://bintray.com/kiwix/kiwix/kiwixlib/_latestVersion)
[![AUR version](https://img.shields.io/aur/version/kiwix-lib)](https://aur.archlinux.org/packages/kiwix-lib/)
[![Build Status](https://travis-ci.com/kiwix/kiwix-lib.svg?branch=master)](https://travis-ci.com/kiwix/kiwix-lib)
[![Build Status](https://github.com/kiwix/kiwix-lib/workflows/CI/badge.svg?query=branch%3Amaster)](https://github.com/kiwix/kiwix-lib/actions?query=branch%3Amaster)
[![CodeFactor](https://www.codefactor.io/repository/github/kiwix/kiwix-lib/badge)](https://www.codefactor.io/repository/github/kiwix/kiwix-lib)
[![Codecov](https://codecov.io/gh/kiwix/kiwix-lib/branch/master/graph/badge.svg)](https://codecov.io/gh/kiwix/kiwix-lib)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
@@ -41,7 +41,7 @@ libraries need to be available:
* [Aria2](https://aria2.github.io/) (package `aria2` on Ubuntu)
* [Mustache](https://github.com/kainjow/Mustache) (Just copy the
header `mustache.hpp` somewhere it can be found by the compiler and/or
set CPPFLAGS with correct `-I` option)
set CPPFLAGS with correct `-I` option). Use Mustache version 3 only.
These dependencies may or may not be packaged by your operating
system. They may also be packaged but only in an older version. The
@@ -85,6 +85,15 @@ Meson. If you want statically linked libraries, you can add
Depending of you system, `ninja` may be called `ninja-build`.
Testing
-------
To run the automated tests:
```bash
cd build
meson test
```
Installation
------------

View File

@@ -26,7 +26,7 @@ task writePom {
project {
groupId 'org.kiwix.kiwixlib'
artifactId 'kiwixlib'
version '9.0.1' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
version '9.2.1' + (System.env.KIWIXLIB_BUILDVERSION == null ? '' : '-'+System.env.KIWIXLIB_BUILDVERSION)
packaging 'aar'
name 'kiwixlib'
url 'https://github.com/kiwix/kiwix-lib'

View File

@@ -98,7 +98,7 @@ class Book
std::string m_id;
std::string m_downloadId;
std::string m_path;
bool m_pathValid;
bool m_pathValid = false;
std::string m_title;
std::string m_description;
std::string m_language;
@@ -110,10 +110,10 @@ class Book
std::string m_flavour;
std::string m_tags;
std::string m_origId;
uint64_t m_articleCount;
uint64_t m_mediaCount;
bool m_readOnly;
uint64_t m_size;
uint64_t m_articleCount = 0;
uint64_t m_mediaCount = 0;
bool m_readOnly = false;
uint64_t m_size = 0;
mutable std::string m_favicon;
std::string m_faviconUrl;
std::string m_faviconMimeType;

View File

@@ -56,7 +56,9 @@ namespace kiwix
void setNbThreads(int threads) { m_nbThreads = threads; }
void setVerbose(bool verbose) { m_verbose = verbose; }
void setTaskbar(bool withTaskbar, bool withLibraryButton)
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
{ m_withTaskbar = withTaskbar; m_withLibraryButton = withLibraryButton; }
void setBlockExternalLinks(bool blockExternalLinks)
{ m_blockExternalLinks = blockExternalLinks; }
protected:
Library* mp_library;
@@ -68,6 +70,7 @@ namespace kiwix
bool m_verbose = false;
bool m_withTaskbar = true;
bool m_withLibraryButton = true;
bool m_blockExternalLinks = false;
std::unique_ptr<InternalServer> mp_server;
};
}

View File

@@ -1,5 +1,5 @@
project('kiwix-lib', 'cpp',
version : '9.0.1', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
version : '9.2.1', # Also change this in android-kiwix-lib-publisher/kiwixLibAndroid/build.gradle
license : 'GPL',
default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true'])
@@ -20,7 +20,7 @@ endif
thread_dep = dependency('threads')
libicu_dep = dependency('icu-i18n', static:static_deps)
libzim_dep = dependency('libzim', version : '>=5.0.0', static:static_deps)
libzim_dep = dependency('libzim', version : '>=6.1.1', static:static_deps)
pugixml_dep = dependency('pugixml', static:static_deps)
libcurl_dep = dependency('libcurl', static:static_deps)
microhttpd_dep = dependency('libmicrohttpd', static:static_deps)

View File

@@ -156,6 +156,8 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags");
m_articleCount = strtoull(VALUE("articleCount"), 0, 0);
m_mediaCount = strtoull(VALUE("mediaCount"), 0, 0);
for(auto linkNode = node.child("link"); linkNode;
linkNode = linkNode.next_sibling("link")) {
std::string rel = linkNode.attribute("rel").value();

View File

@@ -175,7 +175,7 @@ std::string Manager::addBookFromPathAndGetId(const std::string& pathToOpen,
kiwix::Book book;
if (this->readBookFromPath(pathToOpen, &book)) {
if (pathToSave != pathToOpen) {
if (!pathToSave.empty() && pathToSave != pathToOpen) {
book.setPath(isRelativePath(pathToSave)
? computeAbsolutePath(
removeLastPathElement(writableLibraryPath),

View File

@@ -21,6 +21,8 @@ kiwix_sources = [
'tools/otherTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',
'server/etag.cpp',
'server/request_context.cpp',
'server/response.cpp'
]

View File

@@ -78,6 +78,8 @@ pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_TEXT_ENTRY(entry_node, "name", book.getName());
ADD_TEXT_ENTRY(entry_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(entry_node, "tags", book.getTags());
ADD_TEXT_ENTRY(entry_node, "articleCount", to_string(book.getArticleCount()));
ADD_TEXT_ENTRY(entry_node, "mediaCount", to_string(book.getMediaCount()));
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());
auto content_node = entry_node.append_child("link");

View File

@@ -714,7 +714,6 @@ bool Reader::searchSuggestions(const string& prefix,
const bool reset)
{
bool retVal = false;
zim::File::const_iterator articleItr;
/* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
@@ -732,7 +731,7 @@ bool Reader::searchSuggestions(const string& prefix,
return false;
}
for (articleItr = zimFileHandler->findByTitle('A', prefix);
for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end()
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
&& this->suggestions.size() < suggestionsCount;

View File

@@ -75,6 +75,8 @@ namespace kiwix {
static IdNameMapper defaultNameMapper;
typedef kainjow::mustache::data MustacheData;
static int staticHandlerCallback(void* cls,
struct MHD_Connection* connection,
const char* url,
@@ -95,7 +97,8 @@ class InternalServer {
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton);
bool withLibraryButton,
bool blockExternalLinks);
virtual ~InternalServer() = default;
int handlerCallback(struct MHD_Connection* connection,
@@ -108,10 +111,12 @@ class InternalServer {
bool start();
void stop();
private:
private: // functions
Response handle_request(const RequestContext& request);
Response build_500(const std::string& msg);
Response build_404(const RequestContext& request, const std::string& zimName);
Response build_304(const RequestContext& request, const ETag& etag) const;
Response build_redirect(const std::string& bookName, const kiwix::Entry& entry) const;
Response build_homepage(const RequestContext& request);
Response handle_skin(const RequestContext& request);
Response handle_catalog(const RequestContext& request);
@@ -119,11 +124,18 @@ class InternalServer {
Response handle_search(const RequestContext& request);
Response handle_suggest(const RequestContext& request);
Response handle_random(const RequestContext& request);
Response handle_captured_external(const RequestContext& request);
Response handle_content(const RequestContext& request);
kainjow::mustache::data get_default_data();
Response get_default_response();
MustacheData get_default_data() const;
MustacheData homepage_data() const;
Response get_default_response() const;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const;
private: // data
std::string m_addr;
int m_port;
std::string m_root;
@@ -131,10 +143,13 @@ class InternalServer {
std::atomic_bool m_verbose;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
struct MHD_Daemon* mp_daemon;
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_server_id;
};
@@ -157,7 +172,8 @@ bool Server::start() {
m_nbThreads,
m_verbose,
m_withTaskbar,
m_withLibraryButton));
m_withLibraryButton,
m_blockExternalLinks));
return mp_server->start();
}
@@ -186,7 +202,8 @@ InternalServer::InternalServer(Library* library,
int nbThreads,
bool verbose,
bool withTaskbar,
bool withLibraryButton) :
bool withLibraryButton,
bool blockExternalLinks) :
m_addr(addr),
m_port(port),
m_root(root),
@@ -194,6 +211,7 @@ InternalServer::InternalServer(Library* library,
m_verbose(verbose),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
@@ -239,6 +257,8 @@ bool InternalServer::start() {
<< std::endl;
return false;
}
auto server_start_time = std::chrono::system_clock::now().time_since_epoch();
m_server_id = kiwix::to_string(server_start_time.count());
return true;
}
@@ -288,7 +308,8 @@ int InternalServer::handlerCallback(struct MHD_Connection* connection,
}
/* Unexpected method */
if (request.get_method() != RequestMethod::GET
&& request.get_method() != RequestMethod::POST) {
&& request.get_method() != RequestMethod::POST
&& request.get_method() != RequestMethod::HEAD) {
printf("Reject request because of unhandled request method.\n");
printf("----------------------\n");
return MHD_NO;
@@ -305,6 +326,9 @@ int InternalServer::handlerCallback(struct MHD_Connection* connection,
}
}
if (response.getReturnCode() == MHD_HTTP_OK && !etag_not_needed(request))
response.set_server_id(m_server_id);
auto ret = response.send(request, connection);
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@@ -315,6 +339,14 @@ int InternalServer::handlerCallback(struct MHD_Connection* connection,
return ret;
}
Response InternalServer::build_304(const RequestContext& request, const ETag& etag) const
{
auto response = get_default_response();
response.set_code(MHD_HTTP_NOT_MODIFIED);
response.set_etag(etag);
response.set_content("");
return response;
}
Response InternalServer::handle_request(const RequestContext& request)
{
@@ -322,6 +354,10 @@ Response InternalServer::handle_request(const RequestContext& request)
if (! request.is_valid_url())
return build_404(request, "");
const ETag etag = get_matching_if_none_match_etag(request);
if ( etag )
return build_304(request, etag);
if (kiwix::startsWith(request.get_url(), "/skin/"))
return handle_skin(request);
@@ -340,6 +376,9 @@ Response InternalServer::handle_request(const RequestContext& request)
if (request.get_url() == "/random")
return handle_random(request);
if (request.get_url() == "/catch/external")
return handle_captured_external(request);
return handle_content(request);
} catch (std::exception& e) {
fprintf(stderr, "===== Unhandled error : %s\n", e.what());
@@ -350,23 +389,23 @@ Response InternalServer::handle_request(const RequestContext& request)
}
}
kainjow::mustache::data InternalServer::get_default_data()
MustacheData InternalServer::get_default_data() const
{
kainjow::mustache::data data;
MustacheData data;
data.set("root", m_root);
return data;
}
Response InternalServer::get_default_response()
Response InternalServer::get_default_response() const
{
return Response(m_root, m_verbose.load(), m_withTaskbar, m_withLibraryButton);
return Response(m_root, m_verbose.load(), m_withTaskbar, m_withLibraryButton, m_blockExternalLinks);
}
Response InternalServer::build_404(const RequestContext& request,
const std::string& bookName)
{
kainjow::mustache::data results;
MustacheData results;
results.set("url", request.get_full_url());
auto response = get_default_response();
@@ -381,24 +420,24 @@ Response InternalServer::build_404(const RequestContext& request,
Response InternalServer::build_500(const std::string& msg)
{
kainjow::mustache::data data;
MustacheData data;
data.set("error", msg);
Response response(m_root, true, false, false);
Response response(m_root, true, false, false, false);
response.set_template(RESOURCE::templates::_500_html, data);
response.set_mimeType("text/html");
response.set_code(MHD_HTTP_INTERNAL_SERVER_ERROR);
return response;
}
Response InternalServer::build_homepage(const RequestContext& request)
MustacheData InternalServer::homepage_data() const
{
auto data = get_default_data();
kainjow::mustache::data books{kainjow::mustache::data::type::list};
MustacheData books{MustacheData::type::list};
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto& currentBook = mp_library->getBookById(bookId);
kainjow::mustache::data book;
MustacheData book;
book.set("name", mp_nameMapper->getNameForId(bookId));
book.set("title", currentBook.getTitle());
book.set("description", currentBook.getDescription());
@@ -408,9 +447,34 @@ Response InternalServer::build_homepage(const RequestContext& request)
}
data.set("books", books);
return data;
}
bool InternalServer::etag_not_needed(const RequestContext& request) const
{
const std::string url = request.get_url();
return kiwix::startsWith(url, "/catalog")
|| url == "/search"
|| url == "/suggest"
|| url == "/random"
|| url == "/catch/external";
}
ETag
InternalServer::get_matching_if_none_match_etag(const RequestContext& r) const
{
try {
const std::string etag_list = r.get_header(MHD_HTTP_HEADER_IF_NONE_MATCH);
return ETag::match(etag_list, m_server_id);
} catch (const std::out_of_range&) {
return ETag();
}
}
Response InternalServer::build_homepage(const RequestContext& request)
{
auto response = get_default_response();
response.set_template(RESOURCE::templates::index_html, data);
response.set_template(RESOURCE::templates::index_html, homepage_data());
response.set_mimeType("text/html; charset=utf-8");
response.set_compress(true);
response.set_taskbar("", "");
@@ -465,7 +529,7 @@ Response InternalServer::handle_meta(const RequestContext& request)
response.set_content(content);
response.set_mimeType(mimeType);
response.set_compress(false);
response.set_cache(true);
response.set_cacheable();
return response;
}
@@ -498,14 +562,14 @@ Response InternalServer::handle_suggest(const RequestContext& request)
printf("Searching suggestions for: \"%s\"\n", term.c_str());
}
kainjow::mustache::data results{kainjow::mustache::data::type::list};
MustacheData results{MustacheData::type::list};
bool first = true;
if (reader != nullptr) {
/* Get the suggestions */
reader->searchSuggestionsSmart(term, maxSuggestionCount);
while (reader->getNextSuggestion(suggestion)) {
kainjow::mustache::data result;
MustacheData result;
result.set("label", suggestion);
result.set("value", suggestion);
result.set("first", first);
@@ -517,7 +581,7 @@ Response InternalServer::handle_suggest(const RequestContext& request)
/* Propose the fulltext search if possible */
if (reader->hasFulltextIndex()) {
kainjow::mustache::data result;
MustacheData result;
result.set("label", "containing '" + term + "'...");
result.set("value", term + " ");
result.set("first", first);
@@ -549,7 +613,7 @@ Response InternalServer::handle_skin(const RequestContext& request)
}
response.set_mimeType(getMimeTypeForFile(resourceName));
response.set_compress(true);
response.set_cache(true);
response.set_cacheable();
return response;
}
@@ -702,15 +766,32 @@ Response InternalServer::handle_random(const RequestContext& request)
try {
auto entry = reader->getRandomPage();
entry = entry.getFinalEntry();
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath()));
return response;
return build_redirect(bookName, entry.getFinalEntry());
} catch(kiwix::NoEntry& e) {
return build_404(request, bookName);
}
}
Response InternalServer::handle_captured_external(const RequestContext& request)
{
std::string source = "";
try {
source = kiwix::urlDecode(request.get_argument("source"));
} catch (const std::out_of_range& e) {}
if (source.empty())
return build_404(request, "");
auto data = get_default_data();
data.set("source", source);
auto response = get_default_response();
response.set_template(RESOURCE::templates::captured_external_html, data);
response.set_mimeType("text/html; charset=utf-8");
response.set_compress(true);
response.set_taskbar("", "");
return response;
}
Response InternalServer::handle_catalog(const RequestContext& request)
{
if (m_verbose.load()) {
@@ -743,7 +824,7 @@ Response InternalServer::handle_catalog(const RequestContext& request)
opdsDumper.setRootLocation(m_root);
opdsDumper.setSearchDescriptionUrl("catalog/searchdescription.xml");
opdsDumper.setLibrary(mp_library);
response.set_mimeType("application/atom+xml;profile=opds-catalog;kind=acquisition; charset=utf-8");
response.set_mimeType("application/atom+xml; profile=opds-catalog; kind=acquisition; charset=utf-8");
std::vector<std::string> bookIdsToDump;
if (url == "root.xml") {
opdsDumper.setTitle("All zims");
@@ -795,36 +876,52 @@ Response InternalServer::handle_catalog(const RequestContext& request)
return response;
}
namespace
{
std::string get_book_name(const RequestContext& request)
{
try {
return request.get_url_part(0);
} catch (const std::out_of_range& e) {
return std::string();
}
}
} // unnamed namespace
std::shared_ptr<Reader>
InternalServer::get_reader(const std::string& bookName) const
{
std::shared_ptr<Reader> reader;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
}
return reader;
}
Response
InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const
{
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" +
kiwix::urlEncode(entry.getPath()));
return response;
}
Response InternalServer::handle_content(const RequestContext& request)
{
if (m_verbose.load()) {
printf("** running handle_content\n");
}
std::string baseUrl;
std::string content;
std::string mimeType;
kiwix::Entry entry;
std::string bookName;
try {
bookName = request.get_url_part(0);
} catch (const std::out_of_range& e) {
return build_homepage(request);
}
const std::string bookName = get_book_name(request);
if (bookName.empty())
return build_homepage(request);
std::string bookId;
std::shared_ptr<Reader> reader;
try {
bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
return build_404(request, bookName);
}
const std::shared_ptr<Reader> reader = get_reader(bookName);
if (reader == nullptr) {
return build_404(request, bookName);
}
@@ -834,16 +931,14 @@ Response InternalServer::handle_content(const RequestContext& request)
urlStr = urlStr.substr(1);
}
kiwix::Entry entry;
try {
entry = reader->getEntryFromPath(urlStr);
if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page.
entry = entry.getFinalEntry();
auto response = get_default_response();
response.set_redirection(m_root + "/" + bookName + "/" +
kiwix::urlEncode(entry.getPath()));
return response;
return build_redirect(bookName, entry.getFinalEntry());
}
} catch(kiwix::NoEntry& e) {
if (m_verbose.load())
@@ -852,47 +947,19 @@ Response InternalServer::handle_content(const RequestContext& request)
return build_404(request, bookName);
}
try {
mimeType = entry.getMimetype();
} catch (exception& e) {
mimeType = "application/octet-stream";
}
auto response = get_default_response();
response.set_entry(entry, request);
if (m_verbose.load()) {
printf("Found %s\n", urlStr.c_str());
printf("mimeType: %s\n", mimeType.c_str());
printf("Found %s\n", entry.getPath().c_str());
printf("mimeType: %s\n", response.get_mimeType().c_str());
}
if (mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/json") != string::npos) {
zim::Blob raw_content = entry.getBlob();
content = string(raw_content.data(), raw_content.size());
auto response = get_default_response();
if (response.get_mimeType().find("text/html") != string::npos)
response.set_taskbar(bookName, reader->getTitle());
if (mimeType.find("text/html") != string::npos)
response.set_taskbar(bookName, reader->getTitle());
response.set_mimeType(mimeType);
response.set_content(content);
response.set_compress(true);
response.set_cache(true);
return response;
} else {
int range_len;
if (request.get_range().second == -1) {
range_len = entry.getSize() - request.get_range().first;
} else {
range_len = request.get_range().second - request.get_range().first;
}
auto response = get_default_response();
response.set_entry(entry);
response.set_mimeType(mimeType);
response.set_range_first(request.get_range().first);
response.set_range_len(range_len);
response.set_cache(true);
return response;
}
return response;
}
}

126
src/server/byte_range.cpp Normal file
View File

@@ -0,0 +1,126 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "byte_range.h"
#include "tools/stringTools.h"
#include <cassert>
namespace kiwix {
namespace {
ByteRange parseByteRange(const std::string& rangeStr)
{
std::istringstream iss(rangeStr);
int64_t start, end = INT64_MAX;
if (iss >> start) {
if ( start < 0 ) {
if ( iss.eof() )
return ByteRange(-start);
} else {
char c;
if (iss >> c && c=='-') {
iss >> end; // if this fails, end is not modified, which is OK
if (iss.eof() && start <= end)
return ByteRange(ByteRange::PARSED, start, end);
}
}
}
return ByteRange(ByteRange::INVALID, 0, INT64_MAX);
}
} // unnamed namespace
ByteRange::ByteRange()
: kind_(NONE)
, first_(0)
, last_(INT64_MAX)
{}
ByteRange::ByteRange(Kind kind, int64_t first, int64_t last)
: kind_(kind)
, first_(first)
, last_(last)
{
assert(kind != NONE);
assert(first >= 0);
assert(last >= first);
}
ByteRange::ByteRange(int64_t suffix_length)
: kind_(PARSED)
, first_(-suffix_length)
, last_(INT64_MAX)
{
assert(suffix_length > 0);
}
int64_t ByteRange::first() const
{
assert(kind_ > PARSED);
return first_;
}
int64_t ByteRange::last() const
{
assert(kind_ > PARSED);
return last_;
}
int64_t ByteRange::length() const
{
assert(kind_ > PARSED);
return last_ + 1 - first_;
}
ByteRange ByteRange::parse(const std::string& rangeStr)
{
const std::string byteUnitSpec("bytes=");
if ( ! kiwix::startsWith(rangeStr, byteUnitSpec) )
return ByteRange(INVALID, 0, INT64_MAX);
return parseByteRange(rangeStr.substr(byteUnitSpec.size()));
}
ByteRange ByteRange::resolve(int64_t contentSize) const
{
if ( kind() == NONE )
return ByteRange(RESOLVED_FULL_CONTENT, 0, contentSize-1);
if ( kind() == INVALID )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
const int64_t resolved_first = first_ < 0
? std::max(int64_t(0), contentSize + first_)
: first_;
const int64_t resolved_last = std::min(contentSize-1, last_);
if ( resolved_first > resolved_last )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
return ByteRange(RESOLVED_PARTIAL_CONTENT, resolved_first, resolved_last);
}
} // namespace kiwix

86
src/server/byte_range.h Normal file
View File

@@ -0,0 +1,86 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_BYTE_RANGE_H
#define KIWIXLIB_SERVER_BYTE_RANGE_H
#include <cstdint>
#include <string>
namespace kiwix {
class ByteRange
{
public: // types
// ByteRange is parsed in a request, then it must be resolved (taking
// into account the actual size of the requested resource) before
// being applied in the response.
// The Kind enum represents possible states in such a lifecycle.
enum Kind {
// The request is not a range request (no Range header)
NONE,
// The value of the Range header is not a valid continuous
// range. Note that a valid (according to RFC7233) sequence of multiple
// byte ranges is considered invalid in the current implementation
// (i.e. only single-range partial requests are supported).
INVALID,
// This byte-range has been successfully parsed from the request
PARSED,
// This is a response to a regular (non-range) request
RESOLVED_FULL_CONTENT,
// The range request is invalid or unsatisfiable
RESOLVED_UNSATISFIABLE,
// This is a response to a (satisfiable) range request
RESOLVED_PARTIAL_CONTENT,
};
public: // functions
// Constructs a ByteRange object of NONE kind
ByteRange();
// Constructs a ByteRange object of the given kind (except NONE)
ByteRange(Kind kind, int64_t first, int64_t last);
// Constructs a ByteRange object of PARSED kind corresponding to a
// range request of the form "Range: bytes=-suffix_length"
explicit ByteRange(int64_t suffix_length);
Kind kind() const { return kind_; }
int64_t first() const;
int64_t last() const;
int64_t length() const;
static ByteRange parse(const std::string& rangeStr);
ByteRange resolve(int64_t contentSize) const;
private: // data
Kind kind_;
int64_t first_;
int64_t last_;
};
} // namespace kiwix
#endif //KIWIXLIB_SERVER_BYTE_RANGE_H

135
src/server/etag.cpp Normal file
View File

@@ -0,0 +1,135 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "etag.h"
#include "tools/stringTools.h"
#include <algorithm>
#include <sstream>
namespace kiwix {
namespace {
// Characters in the options part of the ETag could in principle be picked up
// from the latin alphabet in natural order (the character corresponding to
// ETag::Option opt would be 'a'+opt; that would somewhat simplify the code in
// this file). However it is better to have some mnemonics in the option names,
// hence below variable: all_options[opt] corresponds to the character going
// into the ETag for ETag::Option opt.
// IMPORTANT: The characters in all_options must come in sorted order (so that
// IMPORTANT: isValidOptionsString() works correctly).
const char all_options[] = "cz";
static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, "");
bool isValidServerId(const std::string& s)
{
return !s.empty() && s.find_first_of("\"/") == std::string::npos;
}
bool isSubsequenceOf(const std::string& s, const std::string& sortedString)
{
std::string::size_type i = 0;
for ( const char c : s )
{
const std::string::size_type j = sortedString.find(c, i);
if ( j == std::string::npos )
return false;
i = j+1;
}
return true;
}
bool isValidOptionsString(const std::string& s)
{
return isSubsequenceOf(s, all_options);
}
} // namespace
void ETag::set_option(Option opt)
{
if ( ! get_option(opt) )
{
m_options.push_back(all_options[opt]);
std::sort(m_options.begin(), m_options.end());
}
}
bool ETag::get_option(Option opt) const
{
return m_options.find(all_options[opt]) != std::string::npos;
}
std::string ETag::get_etag() const
{
if ( m_serverId.empty() )
return std::string();
return "\"" + m_serverId + "/" + m_options + "\"";
}
ETag::ETag(const std::string& serverId, const std::string& options)
{
if ( isValidServerId(serverId) && isValidOptionsString(options) )
{
m_serverId = serverId;
m_options = options;
}
}
ETag ETag::parse(std::string s)
{
if ( kiwix::startsWith("W/", s) )
s = s.substr(2);
if ( s.front() != '"' || s.back() != '"' )
return ETag();
s = s.substr(1, s.size()-2);
const std::string::size_type i = s.find('/');
if ( i == std::string::npos )
return ETag();
return ETag(s.substr(0, i), s.substr(i+1));
}
ETag ETag::match(const std::string& etags, const std::string& server_id)
{
std::istringstream ss(etags);
std::string etag_str;
while ( ss >> etag_str )
{
if ( etag_str.back() == ',' )
etag_str.pop_back();
const ETag etag = parse(etag_str);
if ( etag && etag.m_serverId == server_id )
return etag;
}
return ETag();
}
} // namespace kiwix

85
src/server/etag.h Normal file
View File

@@ -0,0 +1,85 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_ETAG_H
#define KIWIXLIB_SERVER_ETAG_H
#include <string>
namespace kiwix {
// The ETag string used by Kiwix server (more precisely, its value inside the
// double quotes) consists of two parts:
//
// 1. ServerId - The string obtained on server start up
//
// 2. Options - Zero or more characters encoding the values of some of the
// headers of the response
//
// The two parts are separated with a slash (/) symbol (which is always present,
// even when the the options part is empty). Neither portion of a Kiwix ETag
// may contain the slash symbol.
// Examples of valid Kiwix server ETags (including the double quotes):
//
// "abcdefghijklmn/"
// "1234567890/z"
// "1234567890/cz"
//
// The options part of the Kiwix ETag allows to correctly set the required
// headers when responding to a conditional If-None-Match request with a 304
// (Not Modified) response without following the full code path that would
// discover the necessary options.
class ETag
{
public: // types
enum Option {
CACHEABLE_ENTITY,
COMPRESSED_CONTENT,
OPTION_COUNT
};
public: // functions
ETag() {}
void set_server_id(const std::string& id) { m_serverId = id; }
void set_option(Option opt);
explicit operator bool() const { return !m_serverId.empty(); }
bool get_option(Option opt) const;
std::string get_etag() const;
static ETag match(const std::string& etags, const std::string& server_id);
private: // functions
ETag(const std::string& serverId, const std::string& options);
static ETag parse(std::string s);
private: // data
std::string m_serverId;
std::string m_options;
};
} // namespace kiwix
#endif // KIWIXLIB_SERVER_ETAG_H

View File

@@ -30,92 +30,68 @@ namespace kiwix {
static std::atomic_ullong s_requestIndex(0);
namespace {
RequestMethod str2RequestMethod(const std::string& method) {
if (method == "GET") return RequestMethod::GET;
else if (method == "HEAD") return RequestMethod::HEAD;
else if (method == "POST") return RequestMethod::POST;
else if (method == "PUT") return RequestMethod::PUT;
else if (method == "DELETE") return RequestMethod::DELETE_;
else if (method == "CONNECT") return RequestMethod::CONNECT;
else if (method == "OPTIONS") return RequestMethod::OPTIONS;
else if (method == "TRACE") return RequestMethod::TRACE;
else if (method == "PATCH") return RequestMethod::PATCH;
else return RequestMethod::OTHER;
}
std::string
fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation)
{
if (rootLocation.empty()) {
// nothing special to handle.
return full_url;
} else if (full_url == rootLocation) {
return "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
return full_url.substr(rootLocation.size());
} else {
return "";
}
}
} // unnamed namespace
RequestContext::RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& _url,
const std::string& method,
const std::string& _method,
const std::string& version) :
full_url(_url),
url(_url),
valid_url(true),
url(fullURL2LocalURL(_url, rootLocation)),
method(str2RequestMethod(_method)),
version(version),
requestIndex(s_requestIndex++),
acceptEncodingDeflate(false),
accept_range(false),
range_pair(0, -1)
byteRange_()
{
if (method == "GET") {
this->method = RequestMethod::GET;
} else if (method == "HEAD") {
this->method = RequestMethod::HEAD;
} else if (method == "POST") {
this->method = RequestMethod::POST;
} else if (method == "PUT") {
this->method = RequestMethod::PUT;
} else if (method == "DELETE") {
this->method = RequestMethod::DELETE_;
} else if (method == "CONNECT") {
this->method = RequestMethod::CONNECT;
} else if (method == "OPTIONS") {
this->method = RequestMethod::OPTIONS;
} else if (method == "TRACE") {
this->method = RequestMethod::TRACE;
} else if (method == "PATCH") {
this->method = RequestMethod::PATCH;
} else {
this->method = RequestMethod::OTHER;
}
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
valid_url = true;
if (rootLocation.empty()) {
// nothing special to handle.
url = full_url;
} else {
if (full_url == rootLocation) {
url = "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
url = full_url.substr(rootLocation.size());
} else {
valid_url = false;
}
}
try {
acceptEncodingDeflate =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
} catch (const std::out_of_range&) {}
/*Check if range is requested. */
try {
auto range = get_header(MHD_HTTP_HEADER_RANGE);
int start = 0;
int end = -1;
std::istringstream iss(range);
char c;
iss >> start >> c;
if (iss.good() && c=='-') {
iss >> end;
if (iss.fail()) {
// Something went wrong will extracting.
end = -1;
}
if (iss.eof()) {
accept_range = true;
range_pair = std::pair<int, int>(start, end);
}
}
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
} catch (const std::out_of_range&) {}
}
RequestContext::~RequestContext()
{}
int RequestContext::fill_header(void *__this, enum MHD_ValueKind kind,
const char *key, const char *value)
{
@@ -147,10 +123,11 @@ void RequestContext::print_debug_info() const {
printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str());
}
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("has_range : %d\n", accept_range);
printf("is_valid_url : %d\n", valid_url);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n");
}
@@ -188,15 +165,11 @@ std::string RequestContext::get_full_url() const {
}
bool RequestContext::is_valid_url() const {
return valid_url;
return !url.empty();
}
bool RequestContext::has_range() const {
return accept_range;
}
std::pair<int, int> RequestContext::get_range() const {
return range_pair;
ByteRange RequestContext::get_range() const {
return byteRange_;
}
template<>

View File

@@ -27,6 +27,8 @@
#include <map>
#include <stdexcept>
#include "byte_range.h"
extern "C" {
#include <microhttpd.h>
}
@@ -51,7 +53,7 @@ class IndexError: public std::runtime_error {};
class RequestContext {
public:
public: // functions
RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& url,
@@ -78,26 +80,24 @@ class RequestContext {
std::string get_url_part(int part) const;
std::string get_full_url() const;
bool has_range() const;
std::pair<int, int> get_range() const;
ByteRange get_range() const;
bool can_compress() const { return acceptEncodingDeflate; }
private:
private: // data
std::string full_url;
std::string url;
bool valid_url;
RequestMethod method;
std::string version;
unsigned long long requestIndex;
bool acceptEncodingDeflate;
bool accept_range;
std::pair<int, int> range_pair;
ByteRange byteRange_;
std::map<std::string, std::string> headers;
std::map<std::string, std::string> arguments;
private: // functions
static int fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static int fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
};

View File

@@ -17,7 +17,32 @@
namespace kiwix {
Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton)
namespace
{
// some utilities
std::string get_mime_type(const kiwix::Entry& entry)
{
try {
return entry.getMimetype();
} catch (exception& e) {
return "application/octet-stream";
}
}
bool is_compressible_mime_type(const std::string& mimeType)
{
return mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/atom") != string::npos
|| mimeType.find("application/opensearchdescription") != string::npos
|| mimeType.find("application/json") != string::npos;
}
} // unnamed namespace
Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks)
: m_verbose(verbose),
m_root(root),
m_content(""),
@@ -25,11 +50,9 @@ Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool
m_returnCode(MHD_HTTP_OK),
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_useCache(false),
m_blockExternalLinks(blockExternalLinks),
m_addTaskbar(false),
m_bookName(""),
m_startRange(0),
m_lenRange(0)
m_bookName("")
{
}
@@ -127,88 +150,158 @@ void Response::introduce_taskbar()
}
int Response::send(const RequestContext& request, MHD_Connection* connection)
void Response::inject_externallinks_blocker()
{
MHD_Response* response = nullptr;
switch (m_mode) {
case ResponseMode::RAW_CONTENT : {
if (m_addTaskbar) {
introduce_taskbar();
}
kainjow::mustache::data data;
data.set("root", m_root);
auto script_tag = render_template(RESOURCE::templates::external_blocker_part_html, data);
m_content = appendToFirstOccurence(
m_content,
"<head>",
script_tag);
}
bool shouldCompress = m_compress && request.can_compress();
shouldCompress &= m_mimeType.find("text/") != string::npos
|| m_mimeType.find("application/javascript") != string::npos
|| m_mimeType.find("application/json") != string::npos;
bool
Response::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
shouldCompress &= (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
MHD_Response*
Response::create_error_response(const RequestContext& request) const
{
MHD_Response* response = MHD_create_response_from_buffer(0, NULL, MHD_RESPMEM_PERSISTENT);
if ( m_returnCode == 416 ) {
std::ostringstream oss;
oss << "bytes */" << m_byteRange.length();
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
} else {
shouldCompress = false;
}
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
return response;
}
response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
MHD_Response*
Response::create_raw_content_mhd_response(const RequestContext& request)
{
if (m_addTaskbar) {
introduce_taskbar();
}
if ( m_blockExternalLinks ) {
inject_externallinks_blocker();
}
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
break;
}
case ResponseMode::REDIRECTION : {
response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_MUST_COPY);
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, m_content.c_str());
break;
}
case ResponseMode::ENTRY : {
response = MHD_create_response_from_callback(m_entry.getSize(),
16384,
callback_reader_from_entry,
new RunningResponse(m_entry, m_startRange),
callback_free_response);
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
std::ostringstream oss;
oss << "bytes " << m_startRange << "-" << m_startRange + m_lenRange - 1
<< "/" << m_entry.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(m_lenRange).c_str());
break;
bool shouldCompress = m_compress && can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_useCache ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
if (m_returnCode == MHD_HTTP_OK && request.has_range())
// At shis point m_etag.get_option(ETag::COMPRESSED_CONTENT) and
// shouldCompress can have different values. This can happen for a 304 (Not
// Modified) response generated while handling a conditional If-None-Match
// request. In that case the m_etag (together with its COMPRESSED_CONTENT
// option) is obtained from the ETag list of the If-None-Match header and the
// response has no body (which shouldn't be compressed).
if ( m_etag.get_option(ETag::COMPRESSED_CONTENT) ) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
return response;
}
MHD_Response*
Response::create_redirection_mhd_response() const
{
MHD_Response* response = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_MUST_COPY);
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, m_content.c_str());
return response;
}
MHD_Response*
Response::create_entry_mhd_response() const
{
const auto content_length = m_byteRange.length();
MHD_Response* response = MHD_create_response_from_callback(content_length,
16384,
callback_reader_from_entry,
new RunningResponse(m_entry, m_byteRange.first()),
callback_free_response);
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
std::ostringstream oss;
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
<< "/" << m_entry.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str());
return response;
}
MHD_Response*
Response::create_mhd_response(const RequestContext& request)
{
switch (m_mode) {
case ResponseMode::ERROR_RESPONSE:
return create_error_response(request);
case ResponseMode::RAW_CONTENT :
return create_raw_content_mhd_response(request);
case ResponseMode::REDIRECTION :
return create_redirection_mhd_response();
case ResponseMode::ENTRY :
return create_entry_mhd_response();
}
return nullptr;
}
int Response::send(const RequestContext& request, MHD_Connection* connection)
{
MHD_Response* response = create_mhd_response(request);
if ( m_mode != ResponseMode::ERROR_RESPONSE ) {
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
}
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
if (m_verbose)
@@ -234,9 +327,27 @@ void Response::set_redirection(const std::string& url) {
m_returnCode = MHD_HTTP_FOUND;
}
void Response::set_entry(const Entry& entry) {
void Response::set_entry(const Entry& entry, const RequestContext& request) {
m_entry = entry;
m_mode = ResponseMode::ENTRY;
const std::string mimeType = get_mime_type(entry);
set_mimeType(mimeType);
set_cacheable();
m_byteRange = request.get_range().resolve(entry.getSize());
const bool noRange = m_byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if ( noRange && is_compressible_mime_type(mimeType) ) {
zim::Blob raw_content = entry.getBlob();
const std::string content = string(raw_content.data(), raw_content.size());
set_content(content);
set_compress(true);
} else if ( m_byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE ) {
set_code(416);
set_content("");
m_mode = ResponseMode::ERROR_RESPONSE;
}
}
void Response::set_taskbar(const std::string& bookName, const std::string& bookTitle)

View File

@@ -24,7 +24,9 @@
#include <string>
#include <mustache.hpp>
#include "byte_range.h"
#include "entry.h"
#include "etag.h"
extern "C" {
#include <microhttpd.h>
@@ -33,6 +35,7 @@ extern "C" {
namespace kiwix {
enum class ResponseMode {
ERROR_RESPONSE,
RAW_CONTENT,
REDIRECTION,
ENTRY
@@ -42,7 +45,7 @@ class RequestContext;
class Response {
public:
Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton);
Response(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks);
~Response() = default;
int send(const RequestContext& request, MHD_Connection* connection);
@@ -50,22 +53,33 @@ class Response {
void set_template(const std::string& template_str, kainjow::mustache::data data);
void set_content(const std::string& content);
void set_redirection(const std::string& url);
void set_entry(const Entry& entry);
void set_entry(const Entry& entry, const RequestContext& request);
void set_mimeType(const std::string& mimeType) { m_mimeType = mimeType; }
void set_code(int code) { m_returnCode = code; }
void set_cache(bool cache) { m_useCache = cache; }
void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); }
void set_server_id(const std::string& id) { m_etag.set_server_id(id); }
void set_etag(const ETag& etag) { m_etag = etag; }
void set_compress(bool compress) { m_compress = compress; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
void set_range_first(uint64_t start) { m_startRange = start; }
void set_range_len(uint64_t len) { m_lenRange = len; }
int getReturnCode() { return m_returnCode; }
int getReturnCode() const { return m_returnCode; }
std::string get_mimeType() const { return m_mimeType; }
void introduce_taskbar();
void inject_externallinks_blocker();
private:
bool can_compress(const RequestContext& request) const;
private: // functions
MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_error_response(const RequestContext& request) const;
MHD_Response* create_raw_content_mhd_response(const RequestContext& request);
MHD_Response* create_redirection_mhd_response() const;
MHD_Response* create_entry_mhd_response() const;
private: // data
bool m_verbose;
ResponseMode m_mode;
std::string m_root;
@@ -75,13 +89,13 @@ class Response {
int m_returnCode;
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_useCache;
bool m_blockExternalLinks;
bool m_compress;
bool m_addTaskbar;
std::string m_bookName;
std::string m_bookTitle;
uint64_t m_startRange;
uint64_t m_lenRange;
ByteRange m_byteRange;
ETag m_etag;
};
}

View File

@@ -18,6 +18,7 @@
*/
#include "tools/pathTools.h"
#include <stdexcept>
#ifdef __APPLE__
#include <limits.h>
@@ -42,12 +43,16 @@
#include <algorithm>
#ifdef _WIN32
#define SEPARATOR "\\"
# define SEPARATOR "\\"
# include <io.h>
#else
#define SEPARATOR "/"
#include <unistd.h>
# define SEPARATOR "/"
# include <unistd.h>
# include <sys/stat.h>
#endif
#include <fcntl.h>
#include <stdlib.h>
#ifndef PATH_MAX
@@ -227,14 +232,33 @@ std::string getFileSizeAsString(const std::string& path)
std::string getFileContent(const std::string& path)
{
std::ifstream f(path, std::ios::in|std::ios::ate);
#ifdef _WIN32
auto wpath = Utf8ToWide(path);
auto fd = _wopen(wpath.c_str(), _O_RDONLY | _O_BINARY);
#else
auto fd = open(path.c_str(), O_RDONLY);
#endif
std::string content;
if (f.is_open()) {
auto size = f.tellg();
content.reserve(size);
f.seekg(0, std::ios::beg);
content.assign((std::istreambuf_iterator<char>(f)),
std::istreambuf_iterator<char>());
if (fd != -1) {
#ifdef _WIN32
auto size = _lseeki64(fd, 0, SEEK_END);
#else
auto size = lseek(fd, 0, SEEK_END);
#endif
content.resize(size);
#ifdef _WIN32
_lseeki64(fd, 0, SEEK_SET);
#else
lseek(fd, 0, SEEK_SET);
#endif
auto p = (char*)content.data();
while (size) {
auto readsize = size > 2048 ? 2048 : size;
readsize = ::read(fd, p, readsize);
p += readsize;
size -= readsize;
}
close(fd);
}
return content;
}
@@ -287,22 +311,22 @@ std::string makeTmpDirectory()
/* Try to create a link and if does not work then make a copy */
bool copyFile(const std::string& sourcePath, const std::string& destPath)
{
#ifdef _WIN32
return CopyFileW(Utf8ToWide(sourcePath).c_str(), Utf8ToWide(destPath).c_str(), 1);
#else
try {
#ifndef _WIN32
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
#endif
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
outfile << infile.rdbuf();
#ifndef _WIN32
}
#endif
} catch (std::exception& e) {
std::cerr << e.what() << std::endl;
return false;
}
return true;
#endif
}
std::string getExecutablePath(bool realPathOnly)
@@ -341,10 +365,21 @@ std::string getExecutablePath(bool realPathOnly)
bool writeTextFile(const std::string& path, const std::string& content)
{
std::ofstream file;
file.open(path.c_str());
file << content;
file.close();
#ifdef _WIN32
auto wpath = Utf8ToWide(path);
auto fd = _wopen(wpath.c_str(), _O_WRONLY | _O_CREAT | _O_TRUNC, S_IWRITE);
#else
auto fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
#endif
if (fd == -1)
return false;
if (write(fd, content.c_str(), content.size()) != (long)content.size()) {
close(fd);
return false;
}
close(fd);
return true;
}
@@ -364,30 +399,48 @@ std::string getCurrentDirectory()
std::string getDataDirectory()
{
// Try to get the dataDir from the `KIWIX_DATA_DIR` env var
#ifdef _WIN32
char* cDataDir = ::getenv("APPDATA");
wchar_t* cDataDir = ::_wgetenv(L"KIWIX_DATA_DIR");
if (cDataDir != nullptr) {
return WideToUtf8(cDataDir);
}
#else
char* cDataDir = ::getenv("KIWIX_DATA_DIR");
#endif
std::string dataDir = cDataDir==nullptr ? "" : cDataDir;
if (!dataDir.empty()) {
return dataDir;
if (cDataDir != nullptr) {
return cDataDir;
}
#endif
// Compute the dataDir from the user directory.
std::string dataDir;
#ifdef _WIN32
cDataDir = ::getenv("USERPROFILE");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
cDataDir = ::_wgetenv(L"APPDATA");
if (cDataDir == nullptr)
cDataDir = ::_wgetenv(L"USERPROFILE");
if (cDataDir != nullptr)
dataDir = WideToUtf8(cDataDir);
#else
cDataDir = ::getenv("XDG_DATA_HOME");
dataDir = cDataDir==nullptr ? "" : cDataDir;
if (dataDir.empty()) {
if (cDataDir != nullptr) {
dataDir = cDataDir;
} else {
cDataDir = ::getenv("HOME");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
dataDir = appendToDirectory(dataDir, ".local");
dataDir = appendToDirectory(dataDir, "share");
if (cDataDir != nullptr) {
dataDir = cDataDir;
dataDir = appendToDirectory(dataDir, ".local");
dataDir = appendToDirectory(dataDir, "share");
}
}
#endif
auto ret = appendToDirectory(dataDir, "kiwix");
return ret;
if (!dataDir.empty()) {
dataDir = appendToDirectory(dataDir, "kiwix");
makeDirectory(dataDir);
return dataDir;
}
// Let's use the currentDirectory
return getCurrentDirectory();
}
static std::map<std::string, std::string> extMimeTypes = {

View File

@@ -85,6 +85,12 @@ Java_org_kiwix_kiwixlib_JNIKiwixServer_setTaskbar(JNIEnv* env, jobject obj, jboo
SERVER->setTaskbar(withTaskbar, withLibraryButton);
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_setBlockExternalLinks(JNIEnv* env, jobject obj, jboolean blockExternalLinks)
{
SERVER->setBlockExternalLinks(blockExternalLinks);
}
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixServer_start(JNIEnv* env, jobject obj)
{

View File

@@ -34,6 +34,8 @@ public class JNIKiwixServer
public native void setTaskbar(boolean withTaskBar, boolean witLibraryButton);
public native void setBlockExternalLinks(boolean blockExternalLinks);
public native boolean start();
public native void stop();

View File

@@ -1,3 +1,8 @@
resource_files = run_command(find_program('python3'),
'-c',
'import sys; f=open(sys.argv[1]); print(f.read())',
files('resources_list.txt')
).stdout().strip().split('\n')
lib_resources = custom_target('resources',
input: 'resources_list.txt',
@@ -7,5 +12,5 @@ lib_resources = custom_target('resources',
'--hfile', '@OUTPUT1@',
'--source_dir', '@OUTDIR@',
'@INPUT@'],
build_always_stale: true
depend_files: resource_files
)

View File

@@ -20,6 +20,7 @@ skin/jquery-ui/jquery-ui.min.css
skin/caret.png
skin/taskbar.js
skin/taskbar.css
skin/block_external.js
templates/search_result.html
templates/no_search_result.html
templates/404.html
@@ -28,4 +29,6 @@ templates/index.html
templates/suggestion.json
templates/head_part.html
templates/taskbar_part.html
templates/external_blocker_part.html
templates/captured_external.html
opensearchdescription.xml

View File

@@ -0,0 +1,72 @@
var block_path = "/catch/external";
// called only on external links
function capture_event(e, target) { target.setAttribute("href", encodeURI(block_path + "?source=" + target.href)); }
// called on all link clicks. filters external and call capture_event
function on_click_event(e) {
var target = findParent("a", e.target);
if (target !== null && "href" in target) {
var href = target.href;
if (window.location.pathname.indexOf(block_path) == 0) // already in catch page
return;
if (href.indexOf(window.location.origin) == 0)
return;
if (href.substr(0, 2) == "//")
return capture_event(e, target);
if (href.substr(0, 5) == "http:")
return capture_event(e, target);
if (href.substr(0, 6) == "https:")
return capture_event(e, target);
return;
}
}
// script entrypoint (called on document ready)
function run() { live('a', 'click', on_click_event); }
// find first parent with tagname
function findParent(tagname, el) {
while (el) {
if ((el.nodeName || el.tagName).toLowerCase() === tagname.toLowerCase()) {
return el;
}
el = el.parentNode;
}
return null;
}
// matches polyfill
this.Element && function(ElementPrototype) {
ElementPrototype.matches = ElementPrototype.matches ||
ElementPrototype.matchesSelector ||
ElementPrototype.webkitMatchesSelector ||
ElementPrototype.msMatchesSelector ||
function(selector) {
var node = this, nodes = (node.parentNode || node.document).querySelectorAll(selector), i = -1;
while (nodes[++i] && nodes[i] != node);
return !!nodes[i];
}
}(Element.prototype);
// helper for enabling IE 8 event bindings
function addEvent(el, type, handler) {
if (el.attachEvent) el.attachEvent('on'+type, handler); else el.addEventListener(type, handler);
}
// live binding helper using matchesSelector
function live(selector, event, callback, context) {
addEvent(context || document, event, function(e) {
var found, el = e.target || e.srcElement;
while (el && el.matches && el !== context && !(found = el.matches(selector))) el = el.parentElement;
if (found) callback.call(el, e);
});
}
// in case the document is already rendered
if (document.readyState!='loading') run();
// modern browsers
else if (document.addEventListener) document.addEventListener('DOMContentLoaded', run);
// IE <= 8
else document.attachEvent('onreadystatechange', function(){
if (document.readyState=='complete') run();
});

View File

@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8" />
<title>External link blocked</title>
</head>
<body class="kiwix">
<h1>External link blocked</h1>
<p>This instance of Kiwix protects you from accidentaly going to external (out-of ZIM) links.</p>
<p>If you intend to go to such locations, please click the link below.</p>
<p><a href="{{ source }}">Go to {{ source }}</a></p>
<div id="kiwixfooter">Powered by <a href="https://kiwix.org">Kiwix</a></div>
</body>
</html>

View File

@@ -0,0 +1 @@
<script type="text/javascript" src="{{root}}/skin/block_external.js"></script>

BIN
test/data/example.zim Normal file
View File

Binary file not shown.

View File

Binary file not shown.

5101
test/httplib.h Normal file
View File

File diff suppressed because it is too large Load Diff

25
test/manager.cpp Normal file
View File

@@ -0,0 +1,25 @@
#include "gtest/gtest.h"
#include "../include/manager.h"
#include "../include/library.h"
#include "../include/book.h"
#include <iostream>
#include <fstream>
TEST(ManagerTest, addBookFromPathAndGetIdTest)
{
kiwix::Library lib;
kiwix::Manager manager = kiwix::Manager(&lib);
auto bookId = manager.addBookFromPathAndGetId("./test/example.zim");
EXPECT_NE(bookId, "");
kiwix::Book book = lib.getBookById(bookId);
EXPECT_EQ(book.getPath(), computeAbsolutePath("", "./test/example.zim"));
const std::string pathToSave = "./pathToSave";
const std::string url = "url";
bookId = manager.addBookFromPathAndGetId("./test/example.zim", pathToSave, url, true);
book = lib.getBookById(bookId);
auto savedPath = computeAbsolutePath(removeLastPathElement(manager.writableLibraryPath), pathToSave);
EXPECT_EQ(book.getPath(), savedPath);
EXPECT_EQ(book.getUrl(), url);
}

View File

@@ -1,4 +1,6 @@
configure_file(input : 'data/example.zim',
output : 'example.zim',
copy: true )
tests = [
'parseUrl',
@@ -8,9 +10,15 @@ tests = [
'stringTools',
'pathTools',
'kiwixserve',
'book'
'book',
'manager',
]
if build_machine.system() != 'windows'
tests += ['server']
endif
gtest_dep = dependency('gtest',
main:true,
@@ -18,8 +26,15 @@ gtest_dep = dependency('gtest',
required:false)
if gtest_dep.found() and not meson.is_cross_build()
configure_file(input : 'data/wikipedia_en_ray_charles_mini_2020-03.zim',
output : 'zimfile.zim',
copy: true )
foreach test_name : tests
# XXX: implicit_include_directories must be set to false, otherwise
# XXX: '#include <regex>' includes the regex unit test binary
test_exe = executable(test_name, [test_name+'.cpp'],
implicit_include_directories: false,
link_with : kiwixlib,
link_args: extra_link_args,
dependencies : all_deps + [gtest_dep],

496
test/server.cpp Normal file
View File

@@ -0,0 +1,496 @@
#include "./httplib.h"
#include "gtest/gtest.h"
#include "../include/manager.h"
#include "../include/server.h"
#include "../include/name_mapper.h"
using TestContextImpl = std::vector<std::pair<std::string, std::string> >;
struct TestContext : TestContextImpl {
TestContext(const std::initializer_list<value_type>& il)
: TestContextImpl(il)
{}
};
std::ostream& operator<<(std::ostream& out, const TestContext& ctx)
{
out << "Test context:\n";
for ( const auto& kv : ctx )
out << "\t" << kv.first << ": " << kv.second << "\n";
out << std::endl;
return out;
}
bool is_valid_etag(const std::string& etag)
{
return etag.size() >= 2 &&
etag.front() == '"' &&
etag.back() == '"';
}
template<class T1, class T2>
T1 concat(T1 a, const T2& b)
{
a.insert(a.end(), b.begin(), b.end());
return a;
}
typedef httplib::Headers Headers;
Headers invariantHeaders(Headers headers)
{
headers.erase("Date");
return headers;
}
class ZimFileServer
{
public: // types
typedef std::shared_ptr<httplib::Response> Response;
public: // functions
ZimFileServer(int serverPort, std::string zimpath);
~ZimFileServer();
Response GET(const char* path, const Headers& headers = Headers())
{
return client->Get(path, headers);
}
Response HEAD(const char* path, const Headers& headers = Headers())
{
return client->Head(path, headers);
}
private: // data
kiwix::Library library;
kiwix::Manager manager;
std::unique_ptr<kiwix::HumanReadableNameMapper> nameMapper;
std::unique_ptr<kiwix::Server> server;
std::unique_ptr<httplib::Client> client;
};
ZimFileServer::ZimFileServer(int serverPort, std::string zimpath)
: manager(&this->library)
{
if (!manager.addBookFromPath(zimpath, zimpath, "", false))
throw std::runtime_error("Unable to add the ZIM file '" + zimpath + "'");
const std::string address = "127.0.0.1";
nameMapper.reset(new kiwix::HumanReadableNameMapper(library, false));
server.reset(new kiwix::Server(&library, nameMapper.get()));
server->setAddress(address);
server->setPort(serverPort);
server->setNbThreads(2);
server->setVerbose(false);
if ( !server->start() )
throw std::runtime_error("ZimFileServer failed to start");
client.reset(new httplib::Client(address, serverPort));
}
ZimFileServer::~ZimFileServer()
{
server->stop();
}
class ServerTest : public ::testing::Test
{
protected:
std::unique_ptr<ZimFileServer> zfs1_;
const int PORT = 8001;
const std::string ZIMFILE = "./test/zimfile.zim";
protected:
void SetUp() override {
zfs1_.reset(new ZimFileServer(PORT, ZIMFILE));
}
void TearDown() override {
zfs1_.reset();
}
};
const bool WITH_ETAG = true;
const bool NO_ETAG = false;
struct Resource
{
bool etag_expected;
const char* url;
};
std::ostream& operator<<(std::ostream& out, const Resource& r)
{
out << "url: " << r.url;
return out;
}
typedef std::vector<Resource> ResourceCollection;
const ResourceCollection resources200Compressible{
{ WITH_ETAG, "/" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.structure.min.css" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.js" },
{ WITH_ETAG, "/skin/jquery-ui/external/jquery/jquery.js" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.theme.min.css" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.css" },
{ WITH_ETAG, "/skin/taskbar.js" },
{ WITH_ETAG, "/skin/taskbar.css" },
{ WITH_ETAG, "/skin/block_external.js" },
{ NO_ETAG, "/catalog/root.xml" },
{ NO_ETAG, "/catalog/searchdescription.xml" },
{ NO_ETAG, "/catalog/search" },
{ NO_ETAG, "/search?content=zimfile&pattern=abcd" },
{ NO_ETAG, "/suggest?content=zimfile&term=ray" },
{ NO_ETAG, "/catch/external?source=www.example.com" },
{ WITH_ETAG, "/zimfile/A/index" },
{ WITH_ETAG, "/zimfile/A/Ray_Charles" },
};
const ResourceCollection resources200Uncompressible{
{ WITH_ETAG, "/skin/jquery-ui/images/animated-overlay.gif" },
{ WITH_ETAG, "/skin/caret.png" },
{ WITH_ETAG, "/meta?content=zimfile&name=title" },
{ WITH_ETAG, "/meta?content=zimfile&name=description" },
{ WITH_ETAG, "/meta?content=zimfile&name=language" },
{ WITH_ETAG, "/meta?content=zimfile&name=name" },
{ WITH_ETAG, "/meta?content=zimfile&name=tags" },
{ WITH_ETAG, "/meta?content=zimfile&name=date" },
{ WITH_ETAG, "/meta?content=zimfile&name=creator" },
{ WITH_ETAG, "/meta?content=zimfile&name=publisher" },
{ WITH_ETAG, "/meta?content=zimfile&name=favicon" },
{ WITH_ETAG, "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg" },
};
ResourceCollection all200Resources()
{
return concat(resources200Compressible, resources200Uncompressible);
}
TEST_F(ServerTest, 200)
{
for ( const Resource& res : all200Resources() )
EXPECT_EQ(200, zfs1_->GET(res.url)->status) << "res.url: " << res.url;
}
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
{
for ( const Resource& res : resources200Compressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res;
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
}
}
TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
{
for ( const Resource& res : resources200Uncompressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
}
}
const char* urls404[] = {
"/non-existent-item",
"/skin/non-existent-skin-resource",
"/catalog",
"/catalog/non-existent-item",
"/meta",
"/meta?content=zimfile",
"/meta?content=zimfile&name=non-existent-item",
"/meta?content=non-existent-book&name=title",
"/random",
"/random?content=non-existent-book",
"/search",
"/suggest",
"/suggest?content=zimfile",
"/suggest?content=non-existent-book&term=abcd",
"/catch/external",
"/zimfile/A/non-existent-article",
};
TEST_F(ServerTest, 404)
{
for ( const char* url : urls404 )
EXPECT_EQ(404, zfs1_->GET(url)->status) << "url: " << url;
}
TEST_F(ServerTest, SuccessfulSearchForAnArticleTitleRedirectsToTheArticle)
{
auto g = zfs1_->GET("/search?content=zimfile&pattern=ray%20charles" );
ASSERT_EQ(302, g->status);
ASSERT_TRUE(g->has_header("Location"));
ASSERT_EQ("/zimfile/A/Ray_Charles", g->get_header_value("Location"));
}
TEST_F(ServerTest, RandomPageRedirectsToAnExistingArticle)
{
auto g = zfs1_->GET("/random?content=zimfile");
ASSERT_EQ(302, g->status);
ASSERT_TRUE(g->has_header("Location"));
ASSERT_TRUE(g->get_header_value("Location").find("/zimfile/A/") != std::string::npos);
}
TEST_F(ServerTest, BookMainPageIsRedirectedToArticleIndex)
{
auto g = zfs1_->GET("/zimfile");
ASSERT_EQ(302, g->status);
ASSERT_TRUE(g->has_header("Location"));
ASSERT_EQ("/zimfile/A/index", g->get_header_value("Location"));
}
TEST_F(ServerTest, HeadMethodIsSupported)
{
for ( const Resource& res : all200Resources() )
EXPECT_EQ(200, zfs1_->HEAD(res.url)->status) << res;
}
TEST_F(ServerTest, TheResponseToHeadRequestHasNoBody)
{
for ( const Resource& res : all200Resources() )
EXPECT_TRUE(zfs1_->HEAD(res.url)->body.empty()) << res;
}
TEST_F(ServerTest, HeadersAreTheSameInResponsesToHeadAndGetRequests)
{
for ( const Resource& res : all200Resources() ) {
httplib::Headers g = zfs1_->GET(res.url)->headers;
httplib::Headers h = zfs1_->HEAD(res.url)->headers;
EXPECT_EQ(invariantHeaders(g), invariantHeaders(h)) << res;
}
}
TEST_F(ServerTest, ETagHeaderIsSetAsNeeded)
{
for ( const Resource& res : all200Resources() ) {
const auto responseToGet = zfs1_->GET(res.url);
EXPECT_EQ(res.etag_expected, responseToGet->has_header("ETag")) << res;
if ( res.etag_expected ) {
EXPECT_TRUE(is_valid_etag(responseToGet->get_header_value("ETag")));
}
}
}
TEST_F(ServerTest, ETagIsTheSameInResponsesToDifferentRequestsOfTheSameURL)
{
for ( const Resource& res : all200Resources() ) {
const auto h1 = zfs1_->HEAD(res.url);
const auto h2 = zfs1_->HEAD(res.url);
EXPECT_EQ(h1->get_header_value("ETag"), h2->get_header_value("ETag"));
}
}
TEST_F(ServerTest, ETagIsTheSameAcrossHeadAndGet)
{
for ( const Resource& res : all200Resources() ) {
const auto g = zfs1_->GET(res.url);
const auto h = zfs1_->HEAD(res.url);
EXPECT_EQ(h->get_header_value("ETag"), g->get_header_value("ETag"));
}
}
TEST_F(ServerTest, DifferentServerInstancesProduceDifferentETags)
{
ZimFileServer zfs2(PORT + 1, ZIMFILE);
for ( const Resource& res : all200Resources() ) {
if ( !res.etag_expected ) continue;
const auto h1 = zfs1_->HEAD(res.url);
const auto h2 = zfs2.HEAD(res.url);
EXPECT_NE(h1->get_header_value("ETag"), h2->get_header_value("ETag"));
}
}
TEST_F(ServerTest, CompressionInfluencesETag)
{
for ( const Resource& res : resources200Compressible ) {
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag"));
EXPECT_NE(etag, g3->get_header_value("ETag"));
}
}
TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
{
for ( const Resource& res : resources200Uncompressible ) {
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
}
}
// Pick from the response those headers that are required to be present in the
// 304 (Not Modified) response if they would be set in the 200 (OK) response.
// NOTE: The "Date" header (which should belong to that list as required
// NOTE: by RFC 7232) is not included (since the result of this function
// NOTE: will be used to check the equality of headers from the 200 and 304
// NOTe: responses).
Headers special304Headers(const httplib::Response& r)
{
Headers result;
std::copy_if(
r.headers.begin(), r.headers.end(),
std::inserter(result, result.end()),
[](const Headers::value_type& x) {
return x.first == "Cache-Control"
|| x.first == "Content-Location"
|| x.first == "ETag"
|| x.first == "Expires"
|| x.first == "Vary";
});
return result;
}
// make a list of three etags with the given one in the middle
std::string make_etag_list(const std::string& etag)
{
return "\"x" + etag.substr(1) + ", "
+ etag + ", "
+ etag.substr(0, etag.size()-2) + "\"";
}
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
{
const char* const encodings[] = { "", "deflate" };
for ( const Resource& res : all200Resources() ) {
for ( const char* enc: encodings ) {
if ( ! res.etag_expected ) continue;
const TestContext ctx{ {"url", res.url}, {"encoding", enc} };
const auto g = zfs1_->GET(res.url, { {"Accept-Encoding", enc} });
const auto etag = g->get_header_value("ETag");
const std::string etags = make_etag_list(etag);
const Headers headers{{"If-None-Match", etags}, {"Accept-Encoding", enc}};
const auto g2 = zfs1_->GET(res.url, headers );
const auto h = zfs1_->HEAD(res.url, headers );
EXPECT_EQ(304, h->status) << ctx;
EXPECT_EQ(304, g2->status) << ctx;
EXPECT_EQ(special304Headers(*g), special304Headers(*g2)) << ctx;
EXPECT_EQ(special304Headers(*g2), special304Headers(*h)) << ctx;
EXPECT_TRUE(g2->body.empty()) << ctx;
}
}
}
TEST_F(ServerTest, IfNoneMatchRequestsWithMismatchingETagResultIn200Responses)
{
for ( const Resource& res : all200Resources() ) {
const auto g = zfs1_->GET(res.url);
const auto etag = g->get_header_value("ETag");
const auto etag2 = etag.substr(0, etag.size() - 1) + "x\"";
const auto h = zfs1_->HEAD(res.url, { {"If-None-Match", etag2} } );
const auto g2 = zfs1_->GET(res.url, { {"If-None-Match", etag2} } );
EXPECT_EQ(200, h->status);
EXPECT_EQ(200, g2->status);
}
}
TEST_F(ServerTest, ValidSingleRangeByteRangeRequestsAreHandledProperly)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const auto full = zfs1_->GET(url);
EXPECT_FALSE(full->has_header("Content-Range"));
EXPECT_EQ("bytes", full->get_header_value("Accept-Ranges"));
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=0-100000"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body, p->body);
EXPECT_EQ("bytes 0-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=0-10"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ("bytes 0-10/20077", p->get_header_value("Content-Range"));
EXPECT_EQ(11, p->body.size());
EXPECT_EQ(full->body.substr(0, 11), p->body);
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=123-456"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ("bytes 123-456/20077", p->get_header_value("Content-Range"));
EXPECT_EQ(334, p->body.size());
EXPECT_EQ(full->body.substr(123, 334), p->body);
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=20000-"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body.substr(20000), p->body);
EXPECT_EQ("bytes 20000-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=-100"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body.substr(19977), p->body);
EXPECT_EQ("bytes 19977-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
}
TEST_F(ServerTest, InvalidAndMultiRangeByteRangeRequestsResultIn416Responses)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const char* invalidRanges[] = {
"0-10", "bytes=", "bytes=123", "bytes=-10-20", "bytes=10-20xxx",
"bytes=10-0", // reversed range
"bytes=10-20, 30-40", // multi-range
"bytes=1000000-", "bytes=30000-30100" // unsatisfiable ranges
};
for( const char* range : invalidRanges )
{
const TestContext ctx{ {"Range", range} };
const auto p = zfs1_->GET(url, { {"Range", range } } );
EXPECT_EQ(416, p->status) << ctx;
EXPECT_TRUE(p->body.empty()) << ctx;
EXPECT_EQ("bytes */20077", p->get_header_value("Content-Range")) << ctx;
}
}
TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const Headers onlyRange{ {"Range", "bytes=123-456"} };
Headers rangeAndCompression(onlyRange);
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
const auto p1 = zfs1_->GET(url, onlyRange);
const auto p2 = zfs1_->GET(url, rangeAndCompression);
EXPECT_EQ(p1->status, p2->status);
EXPECT_EQ(invariantHeaders(p1->headers), invariantHeaders(p2->headers));
EXPECT_EQ(p1->body, p2->body);
}