Compare commits
237 Commits
imported_f
...
2.0.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4378c52c27 | ||
|
|
790fa99143 | ||
|
|
db6717e199 | ||
|
|
bf2188af14 | ||
|
|
fd9b6569af | ||
|
|
3cf58b5f5b | ||
|
|
182be5d124 | ||
|
|
dbcc9140b9 | ||
|
|
d46aff00d1 | ||
|
|
d61580f599 | ||
|
|
3227b29c90 | ||
|
|
4cb55e1eef | ||
|
|
9ec3358119 | ||
|
|
cf21f1793c | ||
|
|
c0d5e091d3 | ||
|
|
620f1b5e13 | ||
|
|
1e8e897f4a | ||
|
|
76ca4b0cee | ||
|
|
709baae934 | ||
|
|
ea8cd9f1a9 | ||
|
|
452e7f8883 | ||
|
|
a66b178633 | ||
|
|
0c26b08dce | ||
|
|
2a03147662 | ||
|
|
1164cf7444 | ||
|
|
6ef2d5ff4b | ||
|
|
3a00c4d671 | ||
|
|
5025ee4963 | ||
|
|
9aaf82a36d | ||
|
|
2e38aa796f | ||
|
|
fa99cce68d | ||
|
|
fc6a0bcea2 | ||
|
|
622d2fc23d | ||
|
|
48933a3b3e | ||
|
|
c0b1c6013e | ||
|
|
433a47c3fe | ||
|
|
e9ab074b5d | ||
|
|
45a000edaa | ||
|
|
e216c44034 | ||
|
|
59661626e9 | ||
|
|
6b0d2788aa | ||
|
|
1b49c632b3 | ||
|
|
68665693c5 | ||
|
|
1dd828e79c | ||
|
|
135028c16a | ||
|
|
1f3fcd85a0 | ||
|
|
6e13d44459 | ||
|
|
47ce044e3e | ||
|
|
1f091da3f4 | ||
|
|
d4fefd1a57 | ||
|
|
9f86b59d1d | ||
|
|
2164faba44 | ||
|
|
b48428e443 | ||
|
|
ad92af928b | ||
|
|
ee51c470b4 | ||
|
|
5398d69231 | ||
|
|
c0bc2ed111 | ||
|
|
10893ae19f | ||
|
|
ec097ab267 | ||
|
|
32ad40a5b0 | ||
|
|
d686de7ec3 | ||
|
|
8d6f1196de | ||
|
|
a216ad5a6f | ||
|
|
3849f0ae8b | ||
|
|
f2413f6680 | ||
|
|
8ae388562e | ||
|
|
a55824acc7 | ||
|
|
58395d266c | ||
|
|
313f6731b0 | ||
|
|
e23949a9fa | ||
|
|
ee6831d665 | ||
|
|
14653c6958 | ||
|
|
f8a2e4c503 | ||
|
|
57a197d38d | ||
|
|
cc38d0e5e4 | ||
|
|
b6ba10af2a | ||
|
|
f93f50087b | ||
|
|
63339793d2 | ||
|
|
5ee5929714 | ||
|
|
683b5249a2 | ||
|
|
698578ee73 | ||
|
|
6adf95c329 | ||
|
|
9fc840b377 | ||
|
|
97bcf57d53 | ||
|
|
3c614ae47f | ||
|
|
f303c7502d | ||
|
|
0c8c19a6fb | ||
|
|
16bd34e6a6 | ||
|
|
5a953f191b | ||
|
|
c947cceac8 | ||
|
|
35859a3689 | ||
|
|
9b3da52f00 | ||
|
|
dee482b2dc | ||
|
|
281b136ea8 | ||
|
|
41c92cfc3c | ||
|
|
64dc5131c0 | ||
|
|
189c972d17 | ||
|
|
28b0588df4 | ||
|
|
2357af8f58 | ||
|
|
4e5d9f0360 | ||
|
|
2125cd65fa | ||
|
|
520c1edf31 | ||
|
|
d2f7503cfa | ||
|
|
7a59779b77 | ||
|
|
766b64dddc | ||
|
|
e2f16f6030 | ||
|
|
b9ac7084ac | ||
|
|
0bd2a15651 | ||
|
|
0e8c8f68c5 | ||
|
|
382655d83c | ||
|
|
f0bcb1960b | ||
|
|
d4f0344d9d | ||
|
|
48078c809b | ||
|
|
3134ab6b56 | ||
|
|
41e3707f1b | ||
|
|
d801ff36f6 | ||
|
|
5623fedfd0 | ||
|
|
25a05cc64a | ||
|
|
192a249d23 | ||
|
|
5c118a87a1 | ||
|
|
ba35f097d9 | ||
|
|
093e8c0498 | ||
|
|
8b90221866 | ||
|
|
5c2280e7c7 | ||
|
|
ebd3f622ff | ||
|
|
cf93c8719f | ||
|
|
a794849993 | ||
|
|
1ff1bf6168 | ||
|
|
b6e51055a3 | ||
|
|
d17e94fd9c | ||
|
|
44a282fa4c | ||
|
|
d3acae1fd2 | ||
|
|
cbb1018a02 | ||
|
|
1d1dfbf4da | ||
|
|
b163351b2e | ||
|
|
e531c353a6 | ||
|
|
c363933bf4 | ||
|
|
5d46f28926 | ||
|
|
9fa2cfc66b | ||
|
|
b6a58d1684 | ||
|
|
e3780a2d77 | ||
|
|
473b62c9b8 | ||
|
|
bc5f4f5de4 | ||
|
|
9cc329dbd2 | ||
|
|
3991e648ed | ||
|
|
8d39b0b343 | ||
|
|
4a51dd9e00 | ||
|
|
c56e1f0446 | ||
|
|
d0371cd133 | ||
|
|
57720ca57b | ||
|
|
c5b291e1ed | ||
|
|
baf254f1aa | ||
|
|
64cc69f6ae | ||
|
|
6da3604df6 | ||
|
|
89afabc4cd | ||
|
|
80f6d0bf46 | ||
|
|
f76e9d2dbf | ||
|
|
a205ff00c8 | ||
|
|
96f199a327 | ||
|
|
0be3aa9d38 | ||
|
|
4f57e765e5 | ||
|
|
2bcd43af98 | ||
|
|
eb2c750431 | ||
|
|
7132775d67 | ||
|
|
c44b2acb56 | ||
|
|
0343c23f82 | ||
|
|
7005b65901 | ||
|
|
d360b9143c | ||
|
|
9963c73150 | ||
|
|
41d6f9884c | ||
|
|
8823880348 | ||
|
|
ac169558c4 | ||
|
|
2e43b7e82d | ||
|
|
4485cc8d0f | ||
|
|
3be4d92c53 | ||
|
|
44a77f5846 | ||
|
|
9abdc6ce02 | ||
|
|
5ca419bee7 | ||
|
|
37f29da63e | ||
|
|
94670847ef | ||
|
|
93b53cc6d0 | ||
|
|
cf273a06b4 | ||
|
|
43e9763091 | ||
|
|
ef661a2e25 | ||
|
|
7baa1b9e62 | ||
|
|
e28dbe7c7e | ||
|
|
2906202056 | ||
|
|
ce6c782b66 | ||
|
|
9771506985 | ||
|
|
b8d950c1a0 | ||
|
|
998db0eb2b | ||
|
|
46fab22a73 | ||
|
|
72e41082ca | ||
|
|
c06a041100 | ||
|
|
cecb65e314 | ||
|
|
62d26c27ff | ||
|
|
074c1bcffa | ||
|
|
9be2abedf3 | ||
|
|
83d27255cf | ||
|
|
72a6b578e6 | ||
|
|
22d9117a56 | ||
|
|
92dc6b3065 | ||
|
|
43703a9d58 | ||
|
|
aafe9a4435 | ||
|
|
9616530648 | ||
|
|
84796abb4c | ||
|
|
7b76d432a7 | ||
|
|
8e8724b6be | ||
|
|
094e05ac8a | ||
|
|
50071d1053 | ||
|
|
7a03cb7712 | ||
|
|
a3d01b6303 | ||
|
|
5a9fd265d3 | ||
|
|
c15339a972 | ||
|
|
ffb1f40671 | ||
|
|
121693bcfa | ||
|
|
d2c3eeb337 | ||
|
|
25a55acdae | ||
|
|
ed800e4f00 | ||
|
|
317b13b56b | ||
|
|
88f6b3a0f9 | ||
|
|
12f148974f | ||
|
|
72f1be3dbd | ||
|
|
753a39beb0 | ||
|
|
d9798414c3 | ||
|
|
d5191a18f0 | ||
|
|
3bfcd89e22 | ||
|
|
4afe3e8ddc | ||
|
|
72971bba2a | ||
|
|
772bda7056 | ||
|
|
12b6a50dba | ||
|
|
baf4c9050f | ||
|
|
4895af59e8 | ||
|
|
97bdc17651 | ||
|
|
cba71b4e75 | ||
|
|
8b34414458 | ||
|
|
8ce1fb0ba8 |
12
.clang-format
Normal file
@@ -0,0 +1,12 @@
|
||||
BasedOnStyle: Google
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
BreakBeforeBinaryOperators: All
|
||||
BreakBeforeBraces: Linux
|
||||
DerivePointerAlignment: false
|
||||
SpacesInContainerLiterals: false
|
||||
Standard: Cpp11
|
||||
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
36
.travis.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
language: cpp
|
||||
dist: trusty
|
||||
sudo: false
|
||||
cache: ccache
|
||||
before_install:
|
||||
- PATH=$PATH:$HOME/bin
|
||||
install: travis/install_deps.sh
|
||||
script: travis/compile.sh
|
||||
env:
|
||||
matrix:
|
||||
- PLATFORM="native_static"
|
||||
- PLATFORM="native_dyn"
|
||||
- PLATFORM="win32_static"
|
||||
- PLATFORM="win32_dyn"
|
||||
- PLATFORM="android_arm"
|
||||
- PLATFORM="android_arm64"
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- cmake
|
||||
- python3-pip
|
||||
- libbz2-dev
|
||||
- ccache
|
||||
- zlib1g-dev
|
||||
- uuid-dev
|
||||
- libctpp2-dev
|
||||
- ctpp2-utils
|
||||
- libmicrohttpd-dev
|
||||
- g++-mingw-w64-i686
|
||||
- gcc-mingw-w64-i686
|
||||
- gcc-mingw-w64-base
|
||||
- mingw-w64-tools
|
||||
matrix:
|
||||
include:
|
||||
- env: PLATFORM="native_dyn"
|
||||
os: osx
|
||||
17
AUTHORS
Normal file
@@ -0,0 +1,17 @@
|
||||
Automactic <christopherliqd@gmail.com>
|
||||
Ayoub DARDORY <ayoubuto@gmail.com>
|
||||
Cristian Patrasciuc <cristip@google.com>
|
||||
Dattaz <taz@dattaz.fr>
|
||||
Elad Keyshawn <elad.keyshawn@gmail.com>
|
||||
Emmanuel Engelhart <kelson@kiwix.org>
|
||||
Isaac <mhutti1@gmail.com>
|
||||
jleow00 <leow.yonghan.jerome@gmail.com>
|
||||
Julian Harty <julianharty@gmail.com>
|
||||
Kiran Mathew Koshy <kiranmathewkoshy@gmail.com>
|
||||
Kunal Mehta <legoktm@member.fsf.org>
|
||||
Matthieu Gautier <mgautier@kymeria.fr>
|
||||
Rashiq Ahmad <rashiq.z@gmail.com>
|
||||
Renaud Gaudin <reg@kiwix.org>
|
||||
Shivam <ssarodia@gmail.com>
|
||||
Steve Wills <steve@mouf.net>
|
||||
Synhershko <synhershko@users.sourceforge.net>
|
||||
676
COPYING
Normal file
@@ -0,0 +1,676 @@
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
||||
|
||||
91
ChangeLog
Normal file
@@ -0,0 +1,91 @@
|
||||
kiwix-lib 2.0.2
|
||||
===============
|
||||
|
||||
* [Android] Forward c++ errors message de Java world.
|
||||
* Follow redirection of favicon.
|
||||
* Make aria2 dependency optional.
|
||||
* Inculde unistd.h only on unix platform.
|
||||
|
||||
kiwix-lib 2.0.1
|
||||
===============
|
||||
|
||||
* Fix parsing of url.
|
||||
* Remove unused static resources.
|
||||
* Correctly decode reserved characters in URLs.
|
||||
* Explicitly use icu namespace to allow use of packaged icu lib.
|
||||
|
||||
kiwix-lib 2.0.0
|
||||
===============
|
||||
|
||||
* Introduce a new API to retrive content from a reader.
|
||||
* Introduce the `Entry` class.
|
||||
* Reader's methods return an `Entry`.
|
||||
* Content and other information can be retrieved from the `Entry`.
|
||||
* Older Reader's methods are depreciated.
|
||||
* Add an `OPDSDumper` class to dump a whole `Library` as an OPDS feed.
|
||||
* Add a tool function to get the content of a file.
|
||||
* Add a tool function to create a tempory directory.
|
||||
* Add a `Downloader` class to download a file.
|
||||
* Allow the manager to populate a `Library` from an OPDS feed.
|
||||
* Try to locate libctpp2 in default system libdir and then fallback in 'lib'
|
||||
directory.
|
||||
* Build kiwix-lib setting RPATH.
|
||||
* Build kiwix-lib without warning (werror=true)
|
||||
* Build kiwix-lib on macos.
|
||||
|
||||
kiwix-lib 1.1.1
|
||||
===============
|
||||
|
||||
* Correct the name of kiwix-lib (from `kiwixlib`) in meson.build to generate
|
||||
dist archive with the correct name.
|
||||
* Libzim version need to be at least 3.2.0
|
||||
|
||||
kiwix-lib 1.1.0
|
||||
===============
|
||||
|
||||
* Allow for more than 70 search result per page in html results rendering
|
||||
(kiwix/kiwix-tools#92)
|
||||
* Add a small api to do geo queries.
|
||||
* Add multi-search support in the JNI (#67)
|
||||
* Add an API to get only one part of an article.
|
||||
* Add an API to get direct location of an article content in the zim file.
|
||||
* Improve urlencoding
|
||||
* Fix pagination in html results rendering.
|
||||
* Compile using gcc-5 on Travis.
|
||||
* Allow JNI to access search snippets.
|
||||
* JNI throw an exception instead of returning an invalid object if something
|
||||
goes wrong.
|
||||
* Add doctext documentation. (#116)
|
||||
* Various bug fixes.
|
||||
|
||||
kiwix-lib 1.0.0
|
||||
===============
|
||||
|
||||
* Correctly regenerate template resource using cttp2c at compilation time.
|
||||
* Suggestion use xapian database when available
|
||||
* Support multi-zim search in kiwix-lib (a search can now search on several
|
||||
embedded database in zims in the same time)
|
||||
* Fix some wording
|
||||
* Fix license issues
|
||||
* Add out argument to jni getContent* method to get the title of article in
|
||||
the same time we get the content
|
||||
* Rename `compile_resources.py` script to `kiwix-compile-resources`
|
||||
* Use static lib when building for android or in "static mode"
|
||||
* Make the ResourceNotFound exception public
|
||||
|
||||
kiwix-lib 0.2.0
|
||||
===============
|
||||
|
||||
* Generate the snippet from the article content if the snippet is not
|
||||
directly in the database.
|
||||
This provide better snippets as they now depending of the query.
|
||||
* Use the stopwords and the language stored in the fulltext index database to
|
||||
parse the user query.
|
||||
* Remove the indexer functionnality.
|
||||
* Move to C++11 standard.
|
||||
* Use the fulltext search of the zimlib.
|
||||
We still have the fulltext search code in kiwix-lib to be able to search in
|
||||
fulltext index by side of a zim file. (To be remove in the future)
|
||||
* Few API hanges
|
||||
* Change a lot of `Reader` methods to const methods.
|
||||
* Fix some crashes.
|
||||
145
README.md
Normal file
@@ -0,0 +1,145 @@
|
||||
Kiwix library
|
||||
=============
|
||||
|
||||
The Kiwix library provides the Kiwix software core. It contains the
|
||||
code shared by all Kiwix ports (Windows, Linux, OSX, Android, ...).
|
||||
|
||||
Disclaimer
|
||||
----------
|
||||
|
||||
This document assumes you have a little knowledge about software
|
||||
compilation. If you experience difficulties with the dependencies or
|
||||
with the Kiwix libary compilation itself, we recommend to have a look
|
||||
to [kiwix-build](https://github.com/kiwix/kiwix-build).
|
||||
|
||||
Preamble
|
||||
--------
|
||||
|
||||
Although the Kiwix library can be (cross-)compiled on/for many
|
||||
sytems, the following documentation explains how to do it on POSIX
|
||||
ones. It is primarly thought for GNU/Linux systems and has been tested
|
||||
on recent releases of Ubuntu and Fedora.
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
The Kiwix library relies on many third parts software libraries. They
|
||||
are prerequisites to the Kiwix library compilation. Following
|
||||
libraries need to be available:
|
||||
|
||||
* ICU ................................... http://site.icu-project.org/
|
||||
(package libicu-dev on Ubuntu)
|
||||
* ZIM ........................................ http://www.openzim.org/
|
||||
(package libzim-dev on Ubuntu)
|
||||
* Pugixml ........................................ http://pugixml.org/
|
||||
(package libpugixml-dev on Ubuntu)
|
||||
* ctpp2 ........................................ http://ctpp.havoc.ru/
|
||||
(package libctpp2-dev on Ubuntu)
|
||||
* Xapian ......................................... https://xapian.org/
|
||||
(package libxapian-dev on Ubuntu)
|
||||
* libaria2 .................................. https://aria2.github.io/
|
||||
(no package on Ubuntu)
|
||||
|
||||
These dependencies may or may not be packaged by your operating
|
||||
system. They may also be packaged but only in an older version. The
|
||||
compilation script will tell you if one of them is missing or too old.
|
||||
In the worse case, you will have to download and compile bleeding edge
|
||||
version by hand.
|
||||
|
||||
If you want to install these dependencies locally, then use the
|
||||
kiwix-lib directory as install prefix.
|
||||
|
||||
If you compile ctpp2 from source and want to compile the Kiwix library
|
||||
statically then you will probably need to rename ctpp2 static library
|
||||
from ctpp2-st.a to ctpp2.a.
|
||||
|
||||
Environment
|
||||
-------------
|
||||
|
||||
The Kiwix library builds using [Meson](http://mesonbuild.com/) version
|
||||
0.39 or higher. Meson relies itself on Ninja, pkg-config and few other
|
||||
compilation tools.
|
||||
|
||||
Install first the few common compilation tools:
|
||||
* Meson
|
||||
* Ninja
|
||||
* Pkg-config
|
||||
|
||||
These tools should be packaged if you use a cutting edge operating
|
||||
system. If not, have a look to the "Troubleshooting" section.
|
||||
|
||||
Compilation
|
||||
-----------
|
||||
|
||||
Once all dependencies are installed, you can compile the Kiwix library
|
||||
with:
|
||||
```
|
||||
meson . build
|
||||
ninja -C build
|
||||
```
|
||||
|
||||
By default, it will compile dynamic linked libraries. All binary files
|
||||
will be created in the "build" directory created automatically by
|
||||
Meson. If you want statically linked libraries, you can add
|
||||
`--default-library=static` option to the Meson command.
|
||||
|
||||
Depending of you system, `ninja` may be called `ninja-build`.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
If you want to install the Kiwix library and the headers you just have
|
||||
compiled on your system, here we go:
|
||||
|
||||
```
|
||||
ninja -C build install
|
||||
```
|
||||
|
||||
You might need to run the command as root (or using 'sudo'), depending
|
||||
where you want to install the libraries. After the installation
|
||||
succeeded, you may need to run ldconfig (as root).
|
||||
|
||||
Uninstallation
|
||||
------------
|
||||
|
||||
If you want to uninstall the Kiwix library:
|
||||
|
||||
```
|
||||
ninja -C build uninstall
|
||||
```
|
||||
|
||||
Like for the installation, you might need to run the command as root
|
||||
(or using 'sudo').
|
||||
|
||||
Troubleshooting
|
||||
---------------
|
||||
|
||||
If you need to install Meson "manually":
|
||||
```
|
||||
virtualenv -p python3 ./ # Create virtualenv
|
||||
source bin/activate # Activate the virtualenv
|
||||
pip3 install meson # Install Meson
|
||||
hash -r # Refresh bash paths
|
||||
```
|
||||
|
||||
If you need to install Ninja "manually":
|
||||
```
|
||||
git clone git://github.com/ninja-build/ninja.git
|
||||
cd ninja
|
||||
git checkout release
|
||||
./configure.py --bootstrap
|
||||
mkdir ../bin
|
||||
cp ninja ../bin
|
||||
cd ..
|
||||
```
|
||||
|
||||
If the compilation still fails, you might need to get a more recent
|
||||
version of a dependency than the one packaged by your Linux
|
||||
distribution. Try then with a source tarball distributed by the
|
||||
problematic upstream project or even directly from the source code
|
||||
repository.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPLv3 or later, see COPYING for more details.
|
||||
36
format_code.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
files=(
|
||||
"include/library.h"
|
||||
"include/common/stringTools.h"
|
||||
"include/common/pathTools.h"
|
||||
"include/common/otherTools.h"
|
||||
"include/common/regexTools.h"
|
||||
"include/common/networkTools.h"
|
||||
"include/manager.h"
|
||||
"include/reader.h"
|
||||
"include/kiwix.h"
|
||||
"include/xapianSearcher.h"
|
||||
"include/searcher.h"
|
||||
"src/library.cpp"
|
||||
"src/android/kiwix.cpp"
|
||||
"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java"
|
||||
"src/android/org/kiwix/kiwixlib/JNIKiwix.java"
|
||||
"src/android/org/kiwix/kiwixlib/JNIKiwixString.java"
|
||||
"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java"
|
||||
"src/searcher.cpp"
|
||||
"src/common/pathTools.cpp"
|
||||
"src/common/regexTools.cpp"
|
||||
"src/common/otherTools.cpp"
|
||||
"src/common/networkTools.cpp"
|
||||
"src/common/stringTools.cpp"
|
||||
"src/xapianSearcher.cpp"
|
||||
"src/manager.cpp"
|
||||
"src/reader.cpp"
|
||||
)
|
||||
|
||||
for i in "${files[@]}"
|
||||
do
|
||||
echo $i
|
||||
clang-format -i -style=file $i
|
||||
done
|
||||
24
include/common.h
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
#ifndef _KIWIX_COMMON_H_
|
||||
#define _KIWIX_COMMON_H_
|
||||
|
||||
#include <zim/zim.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define DEPRECATED __attribute__((deprecated))
|
||||
#elif defined(_MSC_VER)
|
||||
#define DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
#praga message("WARNING: You need to implement DEPRECATED for this compiler")
|
||||
#define DEPRECATED
|
||||
#endif
|
||||
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
typedef zim::size_type size_type;
|
||||
typedef zim::offset_type offset_type;
|
||||
|
||||
}
|
||||
|
||||
#endif //_KIWIX_COMMON_H_
|
||||
@@ -24,25 +24,26 @@
|
||||
#include <winsock2.h>
|
||||
#include <ws2tcpip.h>
|
||||
#else
|
||||
#include <net/if.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <net/if.h>
|
||||
#include <netdb.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace kiwix {
|
||||
std::map<std::string, std::string> getNetworkInterfaces();
|
||||
std::string getBestPublicIp();
|
||||
namespace kiwix
|
||||
{
|
||||
std::map<std::string, std::string> getNetworkInterfaces();
|
||||
std::string getBestPublicIp();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -21,13 +21,14 @@
|
||||
#define KIWIX_OTHERTOOLS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace kiwix {
|
||||
void sleep(unsigned int milliseconds);
|
||||
namespace kiwix
|
||||
{
|
||||
void sleep(unsigned int milliseconds);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -20,41 +20,44 @@
|
||||
#ifndef KIWIX_PATHTOOLS_H
|
||||
#define KIWIX_PATHTOOLS_H
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <fstream>
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <ios>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
#include <stringTools.h>
|
||||
#include "stringTools.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool isRelativePath(const string &path);
|
||||
bool isRelativePath(const string& path);
|
||||
string computeAbsolutePath(const string path, const string relativePath);
|
||||
string computeRelativePath(const string path, const string absolutePath);
|
||||
string removeLastPathElement(const string path, const bool removePreSeparator = false,
|
||||
const bool removePostSeparator = false);
|
||||
string appendToDirectory(const string &directoryPath, const string &filename);
|
||||
string removeLastPathElement(const string path,
|
||||
const bool removePreSeparator = false,
|
||||
const bool removePostSeparator = false);
|
||||
string appendToDirectory(const string& directoryPath, const string& filename);
|
||||
|
||||
unsigned int getFileSize(const string &path);
|
||||
string getFileSizeAsString(const string &path);
|
||||
bool fileExists(const string &path);
|
||||
bool makeDirectory(const string &path);
|
||||
bool copyFile(const string &sourcePath, const string &destPath);
|
||||
string getLastPathElement(const string &path);
|
||||
unsigned int getFileSize(const string& path);
|
||||
string getFileSizeAsString(const string& path);
|
||||
string getFileContent(const string& path);
|
||||
bool fileExists(const string& path);
|
||||
bool makeDirectory(const string& path);
|
||||
string makeTmpDirectory();
|
||||
bool copyFile(const string& sourcePath, const string& destPath);
|
||||
string getLastPathElement(const string& path);
|
||||
string getExecutablePath();
|
||||
string getCurrentDirectory();
|
||||
bool writeTextFile(const string &path, const string &content);
|
||||
bool writeTextFile(const string& path, const string& content);
|
||||
#endif
|
||||
@@ -22,11 +22,15 @@
|
||||
|
||||
#include <unicode/regex.h>
|
||||
#include <unicode/ucnv.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
bool matchRegex(const std::string &content, const std::string ®ex);
|
||||
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string ®ex);
|
||||
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement);
|
||||
bool matchRegex(const std::string& content, const std::string& regex);
|
||||
std::string replaceRegex(const std::string& content,
|
||||
const std::string& replacement,
|
||||
const std::string& regex);
|
||||
std::string appendToFirstOccurence(const std::string& content,
|
||||
const std::string regex,
|
||||
const std::string& replacement);
|
||||
|
||||
#endif
|
||||
69
include/common/stringTools.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_STRINGTOOLS_H
|
||||
#define KIWIX_STRINGTOOLS_H
|
||||
|
||||
#include <unicode/unistr.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "pathTools.h"
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
#ifndef __ANDROID__
|
||||
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
std::string beautifyFileSize(const unsigned int number);
|
||||
void printStringInHexadecimal(const char* s);
|
||||
void printStringInHexadecimal(icu::UnicodeString s);
|
||||
void stringReplacement(std::string& str,
|
||||
const std::string& oldStr,
|
||||
const std::string& newStr);
|
||||
std::string encodeDiples(const std::string& str);
|
||||
|
||||
#endif
|
||||
|
||||
std::string removeAccents(const std::string& text);
|
||||
void loadICUExternalTables();
|
||||
|
||||
std::string urlEncode(const std::string& value, bool encodeReserved = false);
|
||||
std::string urlDecode(const std::string& value, bool component = false);
|
||||
|
||||
std::vector<std::string> split(const std::string&, const std::string&);
|
||||
std::vector<std::string> split(const char*, const char*);
|
||||
std::vector<std::string> split(const std::string&, const char*);
|
||||
std::vector<std::string> split(const char*, const std::string&);
|
||||
|
||||
std::string ucAll(const std::string& word);
|
||||
std::string lcAll(const std::string& word);
|
||||
std::string ucFirst(const std::string& word);
|
||||
std::string lcFirst(const std::string& word);
|
||||
std::string toTitle(const std::string& word);
|
||||
|
||||
std::string normalize(const std::string& word);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -20,12 +20,12 @@
|
||||
#ifndef _CTPP2_VM_STRING_LOADER_HPP__
|
||||
#define _CTPP2_VM_STRING_LOADER_HPP__ 1
|
||||
|
||||
#include "ctpp2/CTPP2VMLoader.hpp"
|
||||
#include "ctpp2/CTPP2Util.hpp"
|
||||
#include "ctpp2/CTPP2Exception.hpp"
|
||||
#include "ctpp2/CTPP2VMExecutable.hpp"
|
||||
#include "ctpp2/CTPP2VMInstruction.hpp"
|
||||
#include "ctpp2/CTPP2VMMemoryCore.hpp"
|
||||
#include <ctpp2/CTPP2VMLoader.hpp>
|
||||
#include <ctpp2/CTPP2Util.hpp>
|
||||
#include <ctpp2/CTPP2Exception.hpp>
|
||||
#include <ctpp2/CTPP2VMExecutable.hpp>
|
||||
#include <ctpp2/CTPP2VMInstruction.hpp>
|
||||
#include <ctpp2/CTPP2VMMemoryCore.hpp>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
@@ -34,7 +34,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <resourceTools.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
73
include/downloader.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_DOWNLOADER_H
|
||||
#define KIWIX_DOWNLOADER_H
|
||||
|
||||
#include <string>
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
# include <aria2/aria2.h>
|
||||
#endif
|
||||
#include <pthread.h>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
|
||||
struct DownloadedFile {
|
||||
DownloadedFile()
|
||||
: success(false) {}
|
||||
bool success;
|
||||
std::string path;
|
||||
};
|
||||
|
||||
/**
|
||||
* A tool to download things.
|
||||
*
|
||||
*/
|
||||
class Downloader
|
||||
{
|
||||
public:
|
||||
Downloader();
|
||||
~Downloader();
|
||||
|
||||
/**
|
||||
* Download a content.
|
||||
*
|
||||
* @param url the url to download
|
||||
* @return the content downloaded.
|
||||
*/
|
||||
DownloadedFile download(const std::string& url);
|
||||
|
||||
private:
|
||||
static pthread_mutex_t globalLock;
|
||||
|
||||
std::string tmpDir;
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
DownloadedFile* fileHandle;
|
||||
aria2::Session* session;
|
||||
static int downloadEventCallback(aria2::Session* session,
|
||||
aria2::DownloadEvent event,
|
||||
aria2::A2Gid gid,
|
||||
void* userData);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
191
include/entry.h
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_ENTRY_H
|
||||
#define KIWIX_ENTRY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zim/article.h>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include "common.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
|
||||
class NoEntry : public std::exception {};
|
||||
|
||||
/**
|
||||
* A entry represent an.. entry in a zim file.
|
||||
*/
|
||||
class Entry
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
*
|
||||
* Construct an invalid entry.
|
||||
*/
|
||||
Entry() = default;
|
||||
|
||||
/**
|
||||
* Construct an entry making reference to an zim article.
|
||||
*
|
||||
* @param article a zim::Article object
|
||||
*/
|
||||
Entry(zim::Article article);
|
||||
virtual ~Entry() = default;
|
||||
|
||||
/**
|
||||
* Get the path of the entry.
|
||||
*
|
||||
* The path is the "key" of an entry.
|
||||
*
|
||||
* @return the path of the entry.
|
||||
*/
|
||||
std::string getPath() const;
|
||||
|
||||
/**
|
||||
* Get the title of the entry.
|
||||
*
|
||||
* @return the title of the entry.
|
||||
*/
|
||||
std::string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the content of the entry.
|
||||
*
|
||||
* The string is a copy of the content.
|
||||
* If you don't want to do a copy, use get_blob.
|
||||
*
|
||||
* @return the content of the entry.
|
||||
*/
|
||||
std::string getContent() const;
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
*
|
||||
* A blob make reference to the content without copying it.
|
||||
*
|
||||
* @param offset The starting offset of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset = 0) const;
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
*
|
||||
* A blob make reference to the content without copying it.
|
||||
*
|
||||
* @param offset The starting offset of the blob.
|
||||
* @param size The size of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset, size_type size) const;
|
||||
|
||||
/**
|
||||
* Get the info for direct access to the content of the entry.
|
||||
*
|
||||
* Some entry (ie binary ones) have their content plain stored
|
||||
* in the zim file. Knowing the offset where the content is stored
|
||||
* an user can directly read the content in the zim file bypassing the
|
||||
* kiwix-lib/libzim.
|
||||
*
|
||||
* @return A pair specifying where to read the content.
|
||||
* The string is the real file to read (may be different that .zim
|
||||
* file if zim is cut).
|
||||
* The offset is the offset to read in the file.
|
||||
* Return <"",0> if is not possible to read directly.
|
||||
*/
|
||||
std::pair<std::string, offset_type> getDirectAccessInfo() const;
|
||||
|
||||
/**
|
||||
* Get the size of the entry.
|
||||
*
|
||||
* @return the size of the entry.
|
||||
*/
|
||||
size_type getSize() const;
|
||||
|
||||
/**
|
||||
* Get the mime_type of the entry.
|
||||
*
|
||||
* @return the mime_type of the entry.
|
||||
*/
|
||||
std::string getMimetype() const;
|
||||
|
||||
|
||||
/**
|
||||
* Get if the entry is a redirect entry.
|
||||
*
|
||||
* @return True if the entry is a redirect.
|
||||
*/
|
||||
bool isRedirect() const;
|
||||
|
||||
/**
|
||||
* Get if the entry is a link target entry.
|
||||
*
|
||||
* @return True if the entry is a link target.
|
||||
*/
|
||||
bool isLinkTarget() const;
|
||||
|
||||
/**
|
||||
* Get if the entry is a deleted entry.
|
||||
*
|
||||
* @return True if the entry is a deleted entry.
|
||||
*/
|
||||
bool isDeleted() const;
|
||||
|
||||
/**
|
||||
* Get the entry pointed by this entry.
|
||||
*
|
||||
* @return the entry pointed.
|
||||
* @throw NoEntry if the entry is not a redirected entry.
|
||||
*/
|
||||
Entry getRedirectEntry() const;
|
||||
|
||||
/**
|
||||
* Get the final entry pointed by this entry.
|
||||
*
|
||||
* Follow the redirection until a "not redirecting" entry is found.
|
||||
* If the entry is not a redirected entry, return the entry itself.
|
||||
*
|
||||
* @return the final entry.
|
||||
*/
|
||||
Entry getFinalEntry() const;
|
||||
|
||||
/**
|
||||
* Convert the entry to a boolean value.
|
||||
*
|
||||
* @return True if the entry is valid.
|
||||
*/
|
||||
explicit operator bool() const { return good(); }
|
||||
|
||||
private:
|
||||
zim::Article article;
|
||||
mutable zim::Article final_article;
|
||||
|
||||
bool good() const { return article.good(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // KIWIX_ENTRY_H
|
||||
@@ -17,28 +17,9 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_COMPONENTTOOLS_H
|
||||
#define KIWIX_COMPONENTTOOLS_H
|
||||
#ifndef KIWIX_H
|
||||
#define KIWIX_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <mozilla/Char16.h>
|
||||
#endif
|
||||
#include "library.h"
|
||||
|
||||
#include<string>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "nsStringAPI.h"
|
||||
#include "nsEmbedString.h"
|
||||
|
||||
const char *nsStringToCString(const nsAString &str);
|
||||
std::string nsStringToString(const nsEmbedString &str);
|
||||
const char *nsStringToUTF8(const nsAString &str);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
130
include/library.h
Normal file
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_LIBRARY_H
|
||||
#define KIWIX_LIBRARY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/regexTools.h"
|
||||
#include "common/stringTools.h"
|
||||
|
||||
#define KIWIX_LIBRARY_VERSION "20110515"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
enum supportedIndexType { UNKNOWN, XAPIAN };
|
||||
|
||||
|
||||
/**
|
||||
* A class to store information about a book (a zim file)
|
||||
*/
|
||||
class Book
|
||||
{
|
||||
public:
|
||||
Book();
|
||||
~Book();
|
||||
|
||||
static bool sortByLastOpen(const Book& a, const Book& b);
|
||||
static bool sortByTitle(const Book& a, const Book& b);
|
||||
static bool sortBySize(const Book& a, const Book& b);
|
||||
static bool sortByDate(const Book& a, const Book& b);
|
||||
static bool sortByCreator(const Book& a, const Book& b);
|
||||
static bool sortByPublisher(const Book& a, const Book& b);
|
||||
static bool sortByLanguage(const Book& a, const Book& b);
|
||||
string getHumanReadableIdFromPath();
|
||||
|
||||
string id;
|
||||
string path;
|
||||
string pathAbsolute;
|
||||
string last;
|
||||
string indexPath;
|
||||
string indexPathAbsolute;
|
||||
supportedIndexType indexType;
|
||||
string title;
|
||||
string description;
|
||||
string language;
|
||||
string creator;
|
||||
string publisher;
|
||||
string date;
|
||||
string url;
|
||||
string name;
|
||||
string tags;
|
||||
string origId;
|
||||
string articleCount;
|
||||
string mediaCount;
|
||||
bool readOnly;
|
||||
string size;
|
||||
string favicon;
|
||||
string faviconMimeType;
|
||||
};
|
||||
|
||||
/**
|
||||
* A Library store several books.
|
||||
*/
|
||||
class Library
|
||||
{
|
||||
public:
|
||||
Library();
|
||||
~Library();
|
||||
|
||||
string version;
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* If a book already exist in the library with the same id, update
|
||||
* the existing book instead of adding a new one.
|
||||
*
|
||||
* @param book The book to add.
|
||||
* @return True if the book has been added.
|
||||
* False if a book has been updated.
|
||||
*/
|
||||
bool addBook(const Book& book);
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param bookIndex the index of the book to remove.
|
||||
* @return True
|
||||
*/
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
vector<kiwix::Book> books;
|
||||
|
||||
/*
|
||||
* 'current' is the variable storing the current content/book id
|
||||
* in the library. This is used to be able to load per default a
|
||||
* content. As Kiwix may work with many library XML files, you may
|
||||
* have "current" defined many time with different values. The
|
||||
* last XML file read has the priority, Although we do not have an
|
||||
* library object for each file, we want to be able to fallback to
|
||||
* an 'old' current book if the one which should be load
|
||||
* failed. That is the reason why we need a stack here
|
||||
*/
|
||||
stack<string> current;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
325
include/manager.h
Normal file
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_MANAGER_H
|
||||
#define KIWIX_MANAGER_H
|
||||
|
||||
#include <time.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <pugixml.hpp>
|
||||
|
||||
#include "common/base64.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "common/regexTools.h"
|
||||
#include "library.h"
|
||||
#include "reader.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
|
||||
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
||||
|
||||
/**
|
||||
* A tool to manage a `Library`.
|
||||
*
|
||||
* A `Manager` handle a internal `Library`.
|
||||
* This `Library` can be retrived with `cloneLibrary` method.
|
||||
*/
|
||||
class Manager
|
||||
{
|
||||
public:
|
||||
Manager();
|
||||
~Manager();
|
||||
|
||||
/**
|
||||
* Read a `library.xml` and add book in the file to the library.
|
||||
*
|
||||
* @param path The path to the `library.xml`.
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @return True if file has been properly parsed.
|
||||
*/
|
||||
bool readFile(const string path, const bool readOnly = true);
|
||||
|
||||
/**
|
||||
* Read a `library.xml` and add book in the file to the library.
|
||||
*
|
||||
* @param nativePath The path of the `library.xml`
|
||||
* @param UTF8Path The utf8 version (?) of the path. Also the path where the
|
||||
* library will be writen i readOnly is False.
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @return True if file has been properly parsed.
|
||||
*/
|
||||
bool readFile(const string nativePath,
|
||||
const string UTF8Path,
|
||||
const bool readOnly = true);
|
||||
|
||||
/**
|
||||
* Load a library content store in the string.
|
||||
*
|
||||
* @param xml The content corresponding of the library xml
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @param libraryPath The library path (used to resolve relative path)
|
||||
* @return True if the content has been properly parsed.
|
||||
*/
|
||||
bool readXml(const string& xml,
|
||||
const bool readOnly = true,
|
||||
const string libraryPath = "");
|
||||
|
||||
/**
|
||||
* Load a library content stored in a OPDS stream.
|
||||
*
|
||||
* @param content The content of the OPDS stream.
|
||||
* @param readOnly Set if the library path could be overwritten later with
|
||||
* updated content.
|
||||
* @param libraryPath The library path (used to resolve relative path)
|
||||
* @return True if the content has been properly parsed.
|
||||
*/
|
||||
bool readOpds(const string& content, const std::string& urlHost);
|
||||
|
||||
/**
|
||||
* Write the library to a file.
|
||||
*
|
||||
* @param path the path of the file to write.
|
||||
* @return True.
|
||||
*/
|
||||
bool writeFile(const string path);
|
||||
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param bookIndex the index of the book to remove
|
||||
* @return True
|
||||
*/
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param id the id of the book to remove.
|
||||
* @return True if the book were in the library.
|
||||
*/
|
||||
bool removeBookById(const string id);
|
||||
|
||||
/**
|
||||
* Set the current book.
|
||||
*
|
||||
* @param id The id to add to the stack of current books.
|
||||
* If id is empty, remove the current book from the stack.
|
||||
* @return True
|
||||
*/
|
||||
bool setCurrentBookId(const string id);
|
||||
|
||||
/**
|
||||
* Get the current book id.
|
||||
*
|
||||
* @return The id of the current book (or empty string if no current book).
|
||||
*/
|
||||
string getCurrentBookId() const;
|
||||
|
||||
/**
|
||||
* Set the path of the external fulltext index associated to a book.
|
||||
*
|
||||
* @param id The id of the book to set.
|
||||
* @param path The path of the external fullext index.
|
||||
* @param supportedIndexType The type of the fulltext index.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool setBookIndex(const string id,
|
||||
const string path,
|
||||
const supportedIndexType type = XAPIAN);
|
||||
|
||||
/**
|
||||
* Set the path of the zim file associated to a book.
|
||||
*
|
||||
* @param id The id of the book to set.
|
||||
* @param path The path of the zim file.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool setBookPath(const string id, const string path);
|
||||
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* @param pathToOpen The path to the zim file to add.
|
||||
* @param pathToSave The path to store in the library in place of pathToOpen.
|
||||
* @param url The url of the book to store in the library.
|
||||
* @param checMetaData Tell if we check metadata before adding book to the
|
||||
* library.
|
||||
* @return The id of the book if the book has been added to the library.
|
||||
* Else, an empty string.
|
||||
*/
|
||||
string addBookFromPathAndGetId(const string pathToOpen,
|
||||
const string pathToSave = "",
|
||||
const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* @param pathToOpen The path to the zim file to add.
|
||||
* @param pathToSave The path to store in the library in place of pathToOpen.
|
||||
* @param url The url of the book to store in the library.
|
||||
* @param checMetaData Tell if we check metadata before adding book to the
|
||||
* library.
|
||||
* @return True if the book has been added to the library.
|
||||
*/
|
||||
|
||||
bool addBookFromPath(const string pathToOpen,
|
||||
const string pathToSave = "",
|
||||
const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
|
||||
/**
|
||||
* Clone and return the internal library.
|
||||
*
|
||||
* @return A clone of the library.
|
||||
*/
|
||||
Library cloneLibrary();
|
||||
|
||||
/**
|
||||
* Get the book corresponding to an id.
|
||||
*
|
||||
* @param[in] id The id of the book
|
||||
* @param[out] book The book corresponding to the id.
|
||||
* @return True if the book has been found.
|
||||
*/
|
||||
bool getBookById(const string id, Book& book);
|
||||
|
||||
/**
|
||||
* Get the current book.
|
||||
*
|
||||
* @param[out] The current book.
|
||||
* @return True if there is a current book.
|
||||
*/
|
||||
bool getCurrentBook(Book& book);
|
||||
|
||||
/**
|
||||
* Get the number of book in the library.
|
||||
*
|
||||
* @param localBooks If we must count local books (books with a path).
|
||||
* @param remoteBooks If we must count remote books (books with an url)
|
||||
* @return The number of books.
|
||||
*/
|
||||
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
||||
|
||||
/**
|
||||
* Update the "last open date" of a book
|
||||
*
|
||||
* @param id the id of the book.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool updateBookLastOpenDateById(const string id);
|
||||
|
||||
/**
|
||||
* Remove (set to empty) paths of all books in the library.
|
||||
*/
|
||||
void removeBookPaths();
|
||||
|
||||
/**
|
||||
* List books in the library.
|
||||
*
|
||||
* The books list will be available in public vector member `bookIdList`.
|
||||
*
|
||||
* @param mode The mode of listing :
|
||||
* - LASTOPEN sort by last opened book.
|
||||
* - LOCAL list only local file.
|
||||
* - REMOTE list only remote file.
|
||||
* @param sortBy Attribute to sort by the book list.
|
||||
* @param maxSize Do not list book bigger than maxSize MiB.
|
||||
* Set to 0 to cancel this filter.
|
||||
* @param language List only books in this language.
|
||||
* @param creator List only books of this creator.
|
||||
* @param publisher List only books of this publisher.
|
||||
* @param search List only books with search in the title, description or
|
||||
* language.
|
||||
* @return True
|
||||
*/
|
||||
bool listBooks(const supportedListMode mode,
|
||||
const supportedListSortBy sortBy,
|
||||
const unsigned int maxSize,
|
||||
const string language,
|
||||
const string creator,
|
||||
const string publisher,
|
||||
const string search);
|
||||
|
||||
/**
|
||||
* Filter the library and generate a new one with the keep elements.
|
||||
*
|
||||
* @param search List only books with search in the title or description.
|
||||
* @return A `Library`.
|
||||
*/
|
||||
Library filter(const string& search);
|
||||
|
||||
|
||||
/**
|
||||
* Get all langagues of the books in the library.
|
||||
*
|
||||
* @return A list of languages.
|
||||
*/
|
||||
vector<string> getBooksLanguages();
|
||||
|
||||
/**
|
||||
* Get all book creators of the books in the library.
|
||||
*
|
||||
* @return A list of book creators.
|
||||
*/
|
||||
vector<string> getBooksCreators();
|
||||
|
||||
/**
|
||||
* Get all book publishers of the books in the library.
|
||||
*
|
||||
* @return A list of book publishers.
|
||||
*/
|
||||
vector<string> getBooksPublishers();
|
||||
|
||||
/**
|
||||
* Get all book ids of the books in the library.
|
||||
*
|
||||
* @return A list of book ids.
|
||||
*/
|
||||
vector<string> getBooksIds();
|
||||
|
||||
string writableLibraryPath;
|
||||
|
||||
vector<std::string> bookIdList;
|
||||
|
||||
protected:
|
||||
kiwix::Library library;
|
||||
|
||||
bool readBookFromPath(const string path, Book* book = NULL);
|
||||
bool parseXmlDom(const pugi::xml_document& doc,
|
||||
const bool readOnly,
|
||||
const string libraryPath);
|
||||
bool parseOpdsDom(const pugi::xml_document& doc,
|
||||
const std::string& urlHost);
|
||||
|
||||
private:
|
||||
void checkAndCleanBookPaths(Book& book, const string& libraryPath);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
34
include/meson.build
Normal file
@@ -0,0 +1,34 @@
|
||||
headers = [
|
||||
'common.h',
|
||||
'library.h',
|
||||
'manager.h',
|
||||
'opds_dumper.h',
|
||||
'downloader.h',
|
||||
'reader.h',
|
||||
'entry.h',
|
||||
'searcher.h'
|
||||
]
|
||||
|
||||
if xapian_dep.found()
|
||||
headers += ['xapianSearcher.h']
|
||||
endif
|
||||
|
||||
install_headers(headers, subdir:'kiwix')
|
||||
|
||||
install_headers(
|
||||
'common/base64.h',
|
||||
'common/networkTools.h',
|
||||
'common/otherTools.h',
|
||||
'common/pathTools.h',
|
||||
'common/regexTools.h',
|
||||
'common/stringTools.h',
|
||||
subdir:'kiwix/common'
|
||||
)
|
||||
|
||||
if has_ctpp2_dep
|
||||
install_headers(
|
||||
'ctpp2/CTPP2VMStringLoader.hpp',
|
||||
subdir:'kiwix/ctpp2'
|
||||
)
|
||||
endif
|
||||
|
||||
107
include/opds_dumper.h
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_OPDS_DUMPER_H
|
||||
#define KIWIX_OPDS_DUMPER_H
|
||||
|
||||
#include <time.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <pugixml.hpp>
|
||||
|
||||
#include "common/base64.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "common/regexTools.h"
|
||||
#include "library.h"
|
||||
#include "reader.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
/**
|
||||
* A tool to dump a `Library` into a opds stream.
|
||||
*
|
||||
*/
|
||||
class OPDSDumper
|
||||
{
|
||||
public:
|
||||
OPDSDumper() = default;
|
||||
OPDSDumper(Library library);
|
||||
~OPDSDumper();
|
||||
|
||||
/**
|
||||
* Dump the OPDS feed.
|
||||
*
|
||||
* @param id The id of the library.
|
||||
* @return The OPDS feed.
|
||||
*/
|
||||
std::string dumpOPDSFeed();
|
||||
|
||||
/**
|
||||
* Set the id of the opds stream.
|
||||
*
|
||||
* @param id the id to use.
|
||||
*/
|
||||
void setId(const std::string& id) { this->id = id;}
|
||||
|
||||
/**
|
||||
* Set the title oft the opds stream.
|
||||
*
|
||||
* @param title the title to use.
|
||||
*/
|
||||
void setTitle(const std::string& title) { this->title = title; }
|
||||
|
||||
/**
|
||||
* Set the root location used when generating url.
|
||||
*
|
||||
* @param rootLocation the root location to use.
|
||||
*/
|
||||
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
|
||||
|
||||
/**
|
||||
* Set the search url.
|
||||
*
|
||||
* @param searchUrl the search url to use.
|
||||
*/
|
||||
void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; }
|
||||
|
||||
/**
|
||||
* Set the library to dump.
|
||||
*
|
||||
* @param library The library to dump.
|
||||
*/
|
||||
void setLibrary(Library library) { this->library = library; }
|
||||
|
||||
protected:
|
||||
kiwix::Library library;
|
||||
std::string id;
|
||||
std::string title;
|
||||
std::string date;
|
||||
std::string rootLocation;
|
||||
std::string searchDescriptionUrl;
|
||||
|
||||
private:
|
||||
pugi::xml_node handleBook(Book book, pugi::xml_node root_node);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // KIWIX_OPDS_DUMPER_H
|
||||
505
include/reader.h
Normal file
@@ -0,0 +1,505 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_READER_H
|
||||
#define KIWIX_READER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <zim/zim.h>
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "common.h"
|
||||
#include "entry.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "common/stringTools.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
/**
|
||||
* The Reader class is the class who allow to get an entry content from a zim
|
||||
* file.
|
||||
*/
|
||||
class Reader
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Create a Reader to read a zim file specified by zimFilePath.
|
||||
*
|
||||
* @param zimFilePath The path to the zim file to read.
|
||||
* The zim file can be splitted (.zimaa, .zimab, ...).
|
||||
* In this case, the file path must still point to the
|
||||
* unsplitted path as if the file were not splitted
|
||||
* (.zim extesion).
|
||||
*/
|
||||
Reader(const string zimFilePath);
|
||||
~Reader();
|
||||
|
||||
/**
|
||||
* Get the number of "displayable" entries in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* entries with the 'text/html' MIMEtype specified in the metadata.
|
||||
* Else return the number of entries in the 'A' namespace.
|
||||
*/
|
||||
unsigned int getArticleCount() const;
|
||||
|
||||
/**
|
||||
* Get the number of media in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
|
||||
* the metadata.
|
||||
* Else return the number of entries in the 'I' namespace.
|
||||
*/
|
||||
unsigned int getMediaCount() const;
|
||||
|
||||
/**
|
||||
* Get the number of all entries in the zim file.
|
||||
*
|
||||
* @return Return the number of all the entries, whatever their MIMEtype or
|
||||
* their namespace.
|
||||
*/
|
||||
unsigned int getGlobalCount() const;
|
||||
|
||||
/**
|
||||
* Get the path of the zim file.
|
||||
*
|
||||
* @return the path of the zim file as given in the constructor.
|
||||
*/
|
||||
string getZimFilePath() const;
|
||||
|
||||
/**
|
||||
* Get the Id of the zim file.
|
||||
*
|
||||
* @return The uuid stored in the zim file.
|
||||
*/
|
||||
string getId() const;
|
||||
|
||||
/**
|
||||
* Get the url of a random page.
|
||||
*
|
||||
* Deprecated : Use `getRandomPage` instead.
|
||||
*
|
||||
* @return Url of a random page. The page is picked from all entries in
|
||||
* the 'A' namespace.
|
||||
* The main page is excluded from the potential results.
|
||||
*/
|
||||
DEPRECATED string getRandomPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get a random page.
|
||||
*
|
||||
* @return A random Entry. The entry is picked from all entries in
|
||||
* the 'A' namespace.
|
||||
* The main entry is excluded from the potential results.
|
||||
*/
|
||||
Entry getRandomPage() const;
|
||||
|
||||
/**
|
||||
* Get the url of the first page.
|
||||
*
|
||||
* Deprecated : Use `getFirstPage` instead.
|
||||
*
|
||||
* @return Url of the first entry in the 'A' namespace.
|
||||
*/
|
||||
DEPRECATED string getFirstPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the first page.
|
||||
*
|
||||
* @return The first entry in the 'A' namespace.
|
||||
*/
|
||||
Entry getFirstPage() const;
|
||||
|
||||
/**
|
||||
* Get the url of the main page.
|
||||
*
|
||||
* Deprecated : Use `getMainPage` instead.
|
||||
*
|
||||
* @return Url of the main page as specified in the zim file.
|
||||
*/
|
||||
DEPRECATED string getMainPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the main page.
|
||||
*
|
||||
* @return Entry of the main page as specified in the zim file.
|
||||
*/
|
||||
Entry getMainPage() const;
|
||||
|
||||
/**
|
||||
* Get the content of a metadata.
|
||||
*
|
||||
* @param[in] name The name of the metadata.
|
||||
* @param[out] value The value will be set to the content of the metadata.
|
||||
* @return True if it was possible to get the content of the metadata.
|
||||
*/
|
||||
bool getMetatag(const string& name, string& value) const;
|
||||
|
||||
/**
|
||||
* Get the title of the zim file.
|
||||
*
|
||||
* @return The title of zim file as specified in the zim metadata.
|
||||
* If no title has been set, return a title computed from the
|
||||
* file path.
|
||||
*/
|
||||
string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the description of the zim file.
|
||||
*
|
||||
* @return The description of the zim file as specified in the zim metadata.
|
||||
* If no description has been set, return the subtitle.
|
||||
*/
|
||||
string getDescription() const;
|
||||
|
||||
/**
|
||||
* Get the language of the zim file.
|
||||
*
|
||||
* @return The language of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getLanguage() const;
|
||||
|
||||
/**
|
||||
* Get the name of the zim file.
|
||||
*
|
||||
* @return The name of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getName() const;
|
||||
|
||||
/**
|
||||
* Get the tags of the zim file.
|
||||
*
|
||||
* @return The tags of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getTags() const;
|
||||
|
||||
/**
|
||||
* Get the date of the zim file.
|
||||
*
|
||||
* @return The date of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getDate() const;
|
||||
|
||||
/**
|
||||
* Get the creator of the zim file.
|
||||
*
|
||||
* @return The creator of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getCreator() const;
|
||||
|
||||
/**
|
||||
* Get the publisher of the zim file.
|
||||
*
|
||||
* @return The publisher of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getPublisher() const;
|
||||
|
||||
/**
|
||||
* Get the origId of the zim file.
|
||||
*
|
||||
* The origId is only used in the case of patch zim file and is the Id
|
||||
* of the original zim file.
|
||||
*
|
||||
* @return The origId of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getOrigId() const;
|
||||
|
||||
/**
|
||||
* Get the favicon of the zim file.
|
||||
*
|
||||
* @param[out] content The content of the favicon.
|
||||
* @param[out] mimeType The mimeType of the favicon.
|
||||
* @return True if a favicon has been found.
|
||||
*/
|
||||
bool getFavicon(string& content, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get an entry associated to an path.
|
||||
*
|
||||
* @param path The path of the entry.
|
||||
* @return The entry.
|
||||
* @throw NoEntry If no entry correspond to the path.
|
||||
*/
|
||||
Entry getEntryFromPath(const std::string& path) const;
|
||||
|
||||
/**
|
||||
* Get an entry associated to an url encoded path.
|
||||
*
|
||||
* Equivalent to `getEntryFromPath(urlDecode(path));`
|
||||
*
|
||||
* @param path The url encoded path.
|
||||
* @return The entry.
|
||||
* @throw NoEntry If no entry correspond to the path.
|
||||
*/
|
||||
Entry getEntryFromEncodedPath(const std::string& path) const;
|
||||
|
||||
/**
|
||||
* Get un entry associated to a title.
|
||||
*
|
||||
* @param title The title.
|
||||
* @return The entry
|
||||
* throw NoEntry If no entry correspond to the url.
|
||||
*/
|
||||
Entry getEntryFromTitle(const std::string& title) const;
|
||||
|
||||
/**
|
||||
* Get the url of a page specified by a title.
|
||||
*
|
||||
* @param[in] title the title of the page.
|
||||
* @param[out] url the url of the page.
|
||||
* @return True if the page can be found.
|
||||
*/
|
||||
DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;
|
||||
|
||||
/**
|
||||
* Get the mimetype of a entry specified by a url.
|
||||
*
|
||||
* @param[in] url the url of the entry.
|
||||
* @param[out] mimeType the mimeType of the entry.
|
||||
* @return True if the mimeType has been found.
|
||||
*/
|
||||
DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an entry specifed by a url.
|
||||
*
|
||||
* Alias to `getContentByEncodedUrl`
|
||||
*/
|
||||
DEPRECATED bool getContentByUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an entry specified by a url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
|
||||
*/
|
||||
DEPRECATED bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const;
|
||||
|
||||
/**
|
||||
* Get the content of an entry specified by an url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByEncodedUrl but without baseUrl.
|
||||
*/
|
||||
DEPRECATED bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an entry specified by a url.
|
||||
*
|
||||
* @param[in] url The url of the entry.
|
||||
* @param[out] content The content of the entry.
|
||||
* @param[out] title the title of the entry.
|
||||
* @param[out] contentLength The size of the entry (size of content).
|
||||
* @param[out] contentType The mimeType of the entry.
|
||||
* @param[out] baseUrl Return the true url of the entry.
|
||||
* If the specified entry is a redirection, contains
|
||||
* the url of the targeted entry.
|
||||
* @return True if the entry has been found.
|
||||
*/
|
||||
DEPRECATED bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const;
|
||||
/**
|
||||
* Get the content of an entry specified by a url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
|
||||
*/
|
||||
DEPRECATED bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Search for entries with title starting with prefix (case sensitive).
|
||||
*
|
||||
* Suggestions are stored in an internal vector and can be retrieved using
|
||||
* `getNextSuggestion` method.
|
||||
*
|
||||
* @param prefix The prefix to search.
|
||||
* @param suggestionsCount How many suggestions to search for.
|
||||
* @param reset If true, remove previous suggestions in the internal vector.
|
||||
* If false, add suggestions to the internal vector
|
||||
* (until internal vector size is suggestionCount (or no more
|
||||
* suggestion))
|
||||
* @return True if some suggestions where added to the internal vector.
|
||||
*/
|
||||
bool searchSuggestions(const string& prefix,
|
||||
unsigned int suggestionsCount,
|
||||
const bool reset = true);
|
||||
|
||||
/**
|
||||
* Search for entries for the given prefix.
|
||||
*
|
||||
* If the zim file has a internal fulltext index, the suggestions will be
|
||||
* searched using it.
|
||||
* Else the suggestions will be search using `searchSuggestions` while trying
|
||||
* to be smart about case sensitivity (using `getTitleVariants`).
|
||||
*
|
||||
* In any case, suggestions are stored in an internal vector and can be
|
||||
* retrieved using `getNextSuggestion` method.
|
||||
* The internal vector will be reset.
|
||||
*
|
||||
* @param prefix The prefix to search for.
|
||||
* @param suggestionsCount How many suggestions to search for.
|
||||
*/
|
||||
bool searchSuggestionsSmart(const string& prefix,
|
||||
unsigned int suggestionsCount);
|
||||
|
||||
/**
|
||||
* Check if the url exists in the zim file.
|
||||
*
|
||||
* Deprecated : Use `pathExists` instead.
|
||||
*
|
||||
* @param url the url to check.
|
||||
* @return True if the url exits in the zim file.
|
||||
*/
|
||||
DEPRECATED bool urlExists(const string& url) const;
|
||||
|
||||
/**
|
||||
* Check if the path exists in the zim file.
|
||||
*
|
||||
* @param path the path to check.
|
||||
* @return True if the path exists in the zim file.
|
||||
*/
|
||||
bool pathExists(const string& path) const;
|
||||
|
||||
/**
|
||||
* Check if the zim file has a embedded fulltext index.
|
||||
*
|
||||
* @return True if the zim file has a embedded fulltext index
|
||||
* and is not split (else the fulltext is not accessible).
|
||||
*/
|
||||
bool hasFulltextIndex() const;
|
||||
|
||||
/**
|
||||
* Get potential case title variations for a title.
|
||||
*
|
||||
* @param title a title.
|
||||
* @return the list of variantions.
|
||||
*/
|
||||
std::vector<std::string> getTitleVariants(const std::string& title) const;
|
||||
|
||||
/**
|
||||
* Get the next suggestion title.
|
||||
*
|
||||
* @param[out] title the title of the suggestion.
|
||||
* @return True if title has been set.
|
||||
*/
|
||||
bool getNextSuggestion(string& title);
|
||||
|
||||
/**
|
||||
* Get the next suggestion title and url.
|
||||
*
|
||||
* @param[out] title the title of the suggestion.
|
||||
* @param[out] url the url of the suggestion.
|
||||
* @return True if title and url have been set.
|
||||
*/
|
||||
bool getNextSuggestion(string& title, string& url);
|
||||
|
||||
/**
|
||||
* Get if we can check zim file integrity (has a checksum).
|
||||
*
|
||||
* @return True if zim file have a checksum.
|
||||
*/
|
||||
bool canCheckIntegrity() const;
|
||||
|
||||
/**
|
||||
* Check is zim file is corrupted.
|
||||
*
|
||||
* @return True if zim file is corrupted.
|
||||
*/
|
||||
bool isCorrupted() const;
|
||||
|
||||
/**
|
||||
* Parse a full url into a namespace and url.
|
||||
*
|
||||
* @param[in] url The full url ("/N/url").
|
||||
* @param[out] ns The namespace (N).
|
||||
* @param[out] title The url (url).
|
||||
* @return True
|
||||
*/
|
||||
DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;
|
||||
|
||||
/**
|
||||
* Return the total size of the zim file.
|
||||
*
|
||||
* If zim file is split, return the sum of all parts' size.
|
||||
*
|
||||
* @return Size of the size file is KiB.
|
||||
*/
|
||||
unsigned int getFileSize() const;
|
||||
|
||||
/**
|
||||
* Get the zim file handler.
|
||||
*
|
||||
* @return The libzim file handler.
|
||||
*/
|
||||
zim::File* getZimFileHandler() const;
|
||||
|
||||
/**
|
||||
* Get the zim article object associated to a url.
|
||||
*
|
||||
* @param[in] url The url of the article.
|
||||
* @param[out] article The libzim article object.
|
||||
* @return True if the url is good (article.good()).
|
||||
*/
|
||||
DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
|
||||
zim::Article& article) const;
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
zim::size_type lastArticleOffset;
|
||||
zim::size_type nsACount;
|
||||
zim::size_type nsICount;
|
||||
std::string zimFilePath;
|
||||
|
||||
std::vector<std::vector<std::string>> suggestions;
|
||||
std::vector<std::vector<std::string>>::iterator suggestionsOffset;
|
||||
|
||||
private:
|
||||
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
230
include/searcher.h
Normal file
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_SEARCHER_H
|
||||
#define KIWIX_SEARCHER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unicode/putil.h>
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <vector>
|
||||
#include "common/pathTools.h"
|
||||
#include "common/stringTools.h"
|
||||
#include "kiwix_config.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
class Reader;
|
||||
class Result
|
||||
{
|
||||
public:
|
||||
virtual ~Result(){};
|
||||
virtual std::string get_url() = 0;
|
||||
virtual std::string get_title() = 0;
|
||||
virtual int get_score() = 0;
|
||||
virtual std::string get_snippet() = 0;
|
||||
virtual std::string get_content() = 0;
|
||||
virtual int get_wordCount() = 0;
|
||||
virtual int get_size() = 0;
|
||||
virtual int get_readerIndex() = 0;
|
||||
};
|
||||
|
||||
struct SearcherInternal;
|
||||
/**
|
||||
* The Searcher class is reponsible to do different kind of search using the
|
||||
* fulltext index.
|
||||
*
|
||||
* Historically, there are two kind of fulltext index :
|
||||
* - The legacy one, is the external fulltext index. A directory stored outside
|
||||
* of the zim file.
|
||||
* - The new one, a embedded fulltext index in the zim file.
|
||||
*
|
||||
* Legacy external fulltext index has to be considered as obsolet format with
|
||||
* less functionnalities:
|
||||
* - No multi zim search ;
|
||||
* - No geo_search ;
|
||||
* - No suggestions search ;
|
||||
*
|
||||
* To reflect this, there is two Search creation "API":
|
||||
* - One for the external fulltext index, using the constructor taking a
|
||||
* xapianDirectoryPath) ;
|
||||
* - One for the embedded fulltext index, using a "empty" constructor and the
|
||||
* `add_reader` method".
|
||||
*
|
||||
* On top of that, the Searcher may (if compiled with ctpp2) be used to
|
||||
* generate a html page for the search result. This use a template that need a
|
||||
* humanReaderName. This feature is only used by kiwix-serve and this should be
|
||||
* move outside of Searcher (and with a better API). If you don't use the html
|
||||
* rendering (getHtml method), you better should simply ignore the different
|
||||
* humanReadeableName attributes (or give an empty string).
|
||||
*/
|
||||
class Searcher
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* The default constructor.
|
||||
*
|
||||
* @param humanReadableName The global zim's humanReadableName.
|
||||
* Used to generate pagination links.
|
||||
*/
|
||||
Searcher(const string& humanReadableName = "");
|
||||
|
||||
/**
|
||||
* The constructor for legacy external fulltext index.
|
||||
*
|
||||
* @param xapianDirectoryPath The path to the external index directory.
|
||||
* @param reader The reader associated to the external index.
|
||||
* It will be used retrive the article content or generate
|
||||
* the snippet.
|
||||
* @param humanReadableName The humanReadableName for the zim.
|
||||
*/
|
||||
Searcher(const string& xapianDirectoryPath,
|
||||
Reader* reader,
|
||||
const string& humanReadableName);
|
||||
~Searcher();
|
||||
|
||||
/**
|
||||
* Add a reader (containing embedded fulltext index) to the search.
|
||||
*
|
||||
* @param reader The Reader for the zim containing the fulltext index.
|
||||
* @param humanReaderName The human readable name of the reader.
|
||||
* @return true if the reader has been added.
|
||||
* false if the reader cannot be added (no embedded fulltext index present)
|
||||
*/
|
||||
bool add_reader(Reader* reader, const std::string& humanReaderName);
|
||||
|
||||
/**
|
||||
* Start a search on the zim associated to the Searcher.
|
||||
*
|
||||
* Search results should be retrived using the getNextResult method.
|
||||
*
|
||||
* @param search The search query.
|
||||
* @param resultStart the start offset of the search results (used for pagination).
|
||||
* @param resultEnd the end offset of the search results (used for pagination).
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void search(std::string& search,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Start a geographique search.
|
||||
* The search return result for entry in a disc of center latitude/longitude
|
||||
* and radius distance.
|
||||
*
|
||||
* Search results should be retrived using the getNextResult method.
|
||||
*
|
||||
* @param latitude The latitude of the center point.
|
||||
* @param longitude The longitude of the center point.
|
||||
* @param distance The radius of the disc.
|
||||
* @param resultStart the start offset of the search results (used for pagination).
|
||||
* @param resultEnd the end offset of the search results (used for pagination).
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void geo_search(float latitude, float longitude, float distance,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Start a suggestion search.
|
||||
* The search made depend of the "version" of the embedded index.
|
||||
* - If the index is newer enough and have a title namespace, the search is
|
||||
* made in the titles only.
|
||||
* - Else the search is made on the whole article content.
|
||||
* In any case, the search is made "partial" (as adding '*' at the end of the query)
|
||||
*
|
||||
* @param search The search query.
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void suggestions(std::string& search, const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Get the next result of a started search.
|
||||
* This is the method to use to loop hover the search results.
|
||||
*/
|
||||
Result* getNextResult();
|
||||
|
||||
/**
|
||||
* Restart the previous search.
|
||||
* Next call to getNextResult will return the first result.
|
||||
*/
|
||||
void restart_search();
|
||||
|
||||
/**
|
||||
* Get a estimation of the result count.
|
||||
*/
|
||||
unsigned int getEstimatedResultCount();
|
||||
|
||||
/**
|
||||
* Set protocol prefix.
|
||||
* Only used by getHtml.
|
||||
*/
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
|
||||
/**
|
||||
* Set search protocol prefix.
|
||||
* Only used by getHtml.
|
||||
*/
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
/**
|
||||
* Generate the html page with the resutls of the search.
|
||||
*/
|
||||
string getHtml();
|
||||
#endif
|
||||
|
||||
protected:
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
void closeIndex();
|
||||
void searchInIndex(string& search,
|
||||
const unsigned int resultStart,
|
||||
const unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
std::vector<Reader*> readers;
|
||||
std::vector<std::string> humanReaderNames;
|
||||
SearcherInternal* internal;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
std::string searchProtocolPrefix;
|
||||
unsigned int resultCountPerPage;
|
||||
unsigned int estimatedResultCount;
|
||||
unsigned int resultStart;
|
||||
unsigned int resultEnd;
|
||||
std::string contentHumanReadableId;
|
||||
|
||||
private:
|
||||
void reset();
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
98
include/xapianSearcher.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_SEARCHER_H
|
||||
#define KIWIX_XAPIAN_SEARCHER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "reader.h"
|
||||
#include "searcher.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
class XapianSearcher;
|
||||
|
||||
class XapianResult : public Result
|
||||
{
|
||||
public:
|
||||
XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator);
|
||||
virtual ~XapianResult(){};
|
||||
|
||||
virtual std::string get_url();
|
||||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual std::string get_content();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
virtual int get_readerIndex() { return 0; };
|
||||
|
||||
private:
|
||||
XapianSearcher* searcher;
|
||||
Xapian::MSetIterator iterator;
|
||||
Xapian::Document document;
|
||||
};
|
||||
|
||||
class NoXapianIndexInZim : public exception
|
||||
{
|
||||
virtual const char* what() const throw()
|
||||
{
|
||||
return "There is no fulltext index in the zim file";
|
||||
}
|
||||
};
|
||||
|
||||
class XapianSearcher
|
||||
{
|
||||
friend class XapianResult;
|
||||
|
||||
public:
|
||||
XapianSearcher(const string& xapianDirectoryPath, Reader* reader);
|
||||
virtual ~XapianSearcher(){};
|
||||
void searchInIndex(string& search,
|
||||
const unsigned int resultStart,
|
||||
const unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
virtual Result* getNextResult();
|
||||
void restart_search();
|
||||
|
||||
Xapian::MSet results;
|
||||
|
||||
protected:
|
||||
void closeIndex();
|
||||
void openIndex(const string& xapianDirectoryPath);
|
||||
void setup_queryParser();
|
||||
|
||||
Reader* reader;
|
||||
Xapian::Database readableDatabase;
|
||||
std::string language;
|
||||
std::string stopwords;
|
||||
Xapian::QueryParser queryParser;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::MSetIterator current_result;
|
||||
std::map<std::string, int> valuesmap;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
10
kiwix.pc.in
Normal file
@@ -0,0 +1,10 @@
|
||||
prefix=@prefix@
|
||||
libdir=${prefix}/lib64
|
||||
includedir=${prefix}/include
|
||||
|
||||
Name: libkiwix
|
||||
Description: A library that contains a lot of things used by used by other kiwix programs
|
||||
Version: @version@
|
||||
Requires: @requires@
|
||||
Libs: -L${libdir} -lkiwix @extra_libs@
|
||||
Cflags: -I${includedir}/ @extra_cflags@
|
||||
132
meson.build
Normal file
@@ -0,0 +1,132 @@
|
||||
project('kiwix-lib', 'cpp',
|
||||
version : '2.0.2',
|
||||
license : 'GPL',
|
||||
default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true'])
|
||||
|
||||
compiler = meson.get_compiler('cpp')
|
||||
find_library_in_compiler = meson.version().version_compare('>=0.31.0')
|
||||
|
||||
static_deps = get_option('android') or get_option('default_library') == 'static'
|
||||
if get_option('android')
|
||||
extra_libs = ['-llog']
|
||||
else
|
||||
extra_libs = []
|
||||
endif
|
||||
|
||||
thread_dep = dependency('threads')
|
||||
libicu_dep = dependency('icu-i18n', static:static_deps)
|
||||
libzim_dep = dependency('libzim', version : '>=4.0.0', static:static_deps)
|
||||
pugixml_dep = dependency('pugixml', static:static_deps)
|
||||
libaria2_dep = dependency('libaria2', static:static_deps, required:false)
|
||||
|
||||
ctpp2_include_path = ''
|
||||
has_ctpp2_dep = false
|
||||
ctpp2_prefix_install = get_option('ctpp2-install-prefix')
|
||||
ctpp2_link_args = []
|
||||
if ctpp2_prefix_install == ''
|
||||
if compiler.has_header('ctpp2/CTPP2Logger.hpp')
|
||||
if find_library_in_compiler
|
||||
ctpp2_lib = compiler.find_library('ctpp2')
|
||||
else
|
||||
ctpp2_lib = find_library('ctpp2')
|
||||
endif
|
||||
ctpp2_link_args = ['-lctpp2']
|
||||
if meson.is_cross_build() and host_machine.system() == 'windows'
|
||||
if find_library_in_compiler
|
||||
iconv_lib = compiler.find_library('iconv', required:false)
|
||||
else
|
||||
iconv_lib = find_library('iconv', required:false)
|
||||
endif
|
||||
if iconv_lib.found()
|
||||
ctpp2_link_args += ['-liconv']
|
||||
endif
|
||||
endif
|
||||
has_ctpp2_dep = true
|
||||
ctpp2_dep = declare_dependency(link_args:ctpp2_link_args)
|
||||
else
|
||||
message('ctpp2/CTPP2Logger.hpp not found. Compiling without CTPP2 support')
|
||||
endif
|
||||
else
|
||||
if not find_library_in_compiler
|
||||
error('For custom ctpp2_prefix_install you need a meson version >=0.31.0')
|
||||
endif
|
||||
ctpp2_include_path = ctpp2_prefix_install + '/include'
|
||||
ctpp2_include_args = ['-I'+ctpp2_include_path]
|
||||
if compiler.has_header('ctpp2/CTPP2Logger.hpp', args:ctpp2_include_args)
|
||||
ctpp2_include_dir = include_directories(ctpp2_include_path, is_system:true)
|
||||
ctpp2_lib_path = join_paths(ctpp2_prefix_install, get_option('libdir'))
|
||||
message(ctpp2_lib_path)
|
||||
ctpp2_lib = compiler.find_library('ctpp2', dirs:ctpp2_lib_path, required:false)
|
||||
if not ctpp2_lib.found()
|
||||
ctpp2_lib_path = join_paths(ctpp2_prefix_install, 'lib')
|
||||
message(ctpp2_lib_path)
|
||||
ctpp2_lib = compiler.find_library('ctpp2', dirs:ctpp2_lib_path)
|
||||
endif
|
||||
ctpp2_link_args = ['-L'+ctpp2_lib_path, '-lctpp2']
|
||||
if meson.is_cross_build() and host_machine.system() == 'windows'
|
||||
iconv_lib = compiler.find_library('iconv', required:false)
|
||||
if iconv_lib.found()
|
||||
ctpp2_link_args += ['-liconv']
|
||||
endif
|
||||
endif
|
||||
has_ctpp2_dep = true
|
||||
ctpp2_dep = declare_dependency(include_directories:ctpp2_include_dir, link_args:ctpp2_link_args)
|
||||
else
|
||||
message('ctpp2/CTPP2Logger.hpp not found. Compiling without CTPP2 support')
|
||||
endif
|
||||
endif
|
||||
|
||||
xapian_dep = dependency('xapian-core', required:false, static:static_deps)
|
||||
|
||||
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep, libaria2_dep]
|
||||
if has_ctpp2_dep
|
||||
all_deps += [ctpp2_dep]
|
||||
endif
|
||||
|
||||
inc = include_directories('include')
|
||||
|
||||
conf = configuration_data()
|
||||
conf.set('VERSION', '"@0@"'.format(meson.project_version()))
|
||||
conf.set('ENABLE_CTPP2', has_ctpp2_dep)
|
||||
conf.set('ENABLE_LIBARIA2', libaria2_dep.found())
|
||||
|
||||
if build_machine.system() == 'windows'
|
||||
extra_link_args = ['-lshlwapi', '-lwinmm']
|
||||
else
|
||||
extra_link_args = []
|
||||
endif
|
||||
|
||||
subdir('include')
|
||||
subdir('scripts')
|
||||
subdir('static')
|
||||
subdir('src')
|
||||
subdir('test')
|
||||
|
||||
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
|
||||
if libaria2_dep.found()
|
||||
pkg_requires += ['libaria2']
|
||||
endif
|
||||
if xapian_dep.found()
|
||||
pkg_requires += ['xapian-core']
|
||||
endif
|
||||
|
||||
extra_cflags = ''
|
||||
if has_ctpp2_dep
|
||||
extra_libs += ctpp2_link_args
|
||||
if ctpp2_include_path != ''
|
||||
extra_cflags = '-I'+ctpp2_include_path
|
||||
endif
|
||||
endif
|
||||
|
||||
pkg_conf = configuration_data()
|
||||
pkg_conf.set('prefix', get_option('prefix'))
|
||||
pkg_conf.set('requires', ' '.join(pkg_requires))
|
||||
pkg_conf.set('extra_libs', ' '.join(extra_libs))
|
||||
pkg_conf.set('extra_cflags', extra_cflags)
|
||||
pkg_conf.set('version', meson.project_version())
|
||||
configure_file(output : 'kiwix.pc',
|
||||
configuration : pkg_conf,
|
||||
input : 'kiwix.pc.in',
|
||||
install_dir: get_option('libdir')+'/pkgconfig'
|
||||
)
|
||||
|
||||
4
meson_options.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
option('ctpp2-install-prefix', type : 'string', value : '',
|
||||
description : 'Prefix where ctpp libs has been installed')
|
||||
option('android', type : 'boolean', value : false,
|
||||
description : 'Do we make a kiwix-lib for android')
|
||||
8
scripts/ctpp2c.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
ctpp2c=$1
|
||||
SOURCE=$(pwd)/$2
|
||||
DEST=$3
|
||||
|
||||
$ctpp2c $SOURCE $DEST
|
||||
201
scripts/kiwix-compile-resources
Executable file
@@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
'''
|
||||
Copyright 2016 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301, USA.
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import os.path
|
||||
import re
|
||||
|
||||
def full_identifier(filename):
|
||||
parts = os.path.normpath(filename).split(os.sep)
|
||||
parts = [to_identifier(part) for part in parts]
|
||||
print(filename, parts)
|
||||
return parts
|
||||
|
||||
def to_identifier(name):
|
||||
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
|
||||
if ident[0].isnumeric():
|
||||
return "_"+ident
|
||||
return ident
|
||||
|
||||
resource_impl_template = """
|
||||
static const unsigned char {data_identifier}[] = {{
|
||||
{resource_content}
|
||||
}};
|
||||
|
||||
namespace RESOURCE {{
|
||||
{namespaces_open}
|
||||
const std::string {identifier} = init_resource("{env_identifier}", {data_identifier}, {resource_len});
|
||||
{namespaces_close}
|
||||
}}
|
||||
"""
|
||||
|
||||
resource_getter_template = """
|
||||
if (name == "{common_name}")
|
||||
return RESOURCE::{identifier};
|
||||
"""
|
||||
|
||||
resource_decl_template = """{namespaces_open}
|
||||
extern const std::string {identifier};
|
||||
{namespaces_close}"""
|
||||
|
||||
class Resource:
|
||||
def __init__(self, base_dirs, filename):
|
||||
filename = filename.strip()
|
||||
self.filename = filename
|
||||
self.identifier = full_identifier(filename)
|
||||
found = False
|
||||
for base_dir in base_dirs:
|
||||
try:
|
||||
with open(os.path.join(base_dir, filename), 'rb') as f:
|
||||
self.data = f.read()
|
||||
found = True
|
||||
break
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
if not found:
|
||||
raise Exception("Impossible to found {}".format(filename))
|
||||
|
||||
def dump_impl(self):
|
||||
nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0)
|
||||
sliced = (self.data[i*16:(i+1)*16] for i in range(nb_row))
|
||||
|
||||
return resource_impl_template.format(
|
||||
data_identifier="_".join([""]+self.identifier),
|
||||
resource_content=",\n ".join(", ".join("{:#04x}".format(i) for i in r) for r in sliced),
|
||||
resource_len=len(self.data),
|
||||
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
|
||||
namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
|
||||
identifier=self.identifier[-1],
|
||||
env_identifier="RES_"+"_".join(self.identifier)+"_PATH"
|
||||
)
|
||||
|
||||
def dump_getter(self):
|
||||
return resource_getter_template.format(
|
||||
common_name=self.filename,
|
||||
identifier="::".join(self.identifier)
|
||||
)
|
||||
|
||||
def dump_decl(self):
|
||||
return resource_decl_template.format(
|
||||
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
|
||||
namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
|
||||
identifier=self.identifier[-1]
|
||||
)
|
||||
|
||||
|
||||
|
||||
master_c_template = """//This file is automaically generated. Do not modify it.
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
#include "{include_file}"
|
||||
|
||||
static std::string init_resource(const char* name, const unsigned char* content, int len)
|
||||
{{
|
||||
char * resPath = getenv(name);
|
||||
if (NULL == resPath)
|
||||
return std::string(reinterpret_cast<const char*>(content), len);
|
||||
|
||||
std::ifstream ifs(resPath);
|
||||
if (!ifs.good())
|
||||
return std::string(reinterpret_cast<const char*>(content), len);
|
||||
return std::string( (std::istreambuf_iterator<char>(ifs)),
|
||||
(std::istreambuf_iterator<char>() ));
|
||||
}}
|
||||
|
||||
const std::string& getResource_{basename}(const std::string& name) {{
|
||||
{RESOURCES_GETTER}
|
||||
throw ResourceNotFound("Resource not found.");
|
||||
}}
|
||||
|
||||
{RESOURCES}
|
||||
|
||||
"""
|
||||
|
||||
def gen_c_file(resources, basename):
|
||||
return master_c_template.format(
|
||||
RESOURCES="\n\n".join(r.dump_impl() for r in resources),
|
||||
RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources),
|
||||
include_file=basename,
|
||||
basename=to_identifier(basename)
|
||||
)
|
||||
|
||||
|
||||
|
||||
master_h_template = """//This file is automaically generated. Do not modify it.
|
||||
#ifndef KIWIX_{BASENAME}
|
||||
#define KIWIX_{BASENAME}
|
||||
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace RESOURCE {{
|
||||
{RESOURCES}
|
||||
}};
|
||||
|
||||
class ResourceNotFound : public std::runtime_error {{
|
||||
public:
|
||||
ResourceNotFound(const std::string& what_arg):
|
||||
std::runtime_error(what_arg)
|
||||
{{ }};
|
||||
}};
|
||||
|
||||
const std::string& getResource_{basename}(const std::string& name);
|
||||
|
||||
#define getResource(a) (getResource_{basename}(a))
|
||||
|
||||
#endif // KIWIX_{BASENAME}
|
||||
|
||||
"""
|
||||
|
||||
def gen_h_file(resources, basename):
|
||||
return master_h_template.format(
|
||||
RESOURCES="\n ".join(r.dump_decl() for r in resources),
|
||||
BASENAME=basename.upper(),
|
||||
basename=basename,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--cxxfile',
|
||||
help='The Cpp file name to generate')
|
||||
parser.add_argument('--hfile',
|
||||
help='The h file name to generate')
|
||||
parser.add_argument('--source_dir',
|
||||
help="Additional directory where to look for resources.",
|
||||
action='append')
|
||||
parser.add_argument('resource_file',
|
||||
help='The list of resources to compile.')
|
||||
args = parser.parse_args()
|
||||
|
||||
base_dir = os.path.dirname(os.path.realpath(args.resource_file))
|
||||
source_dir = args.source_dir or []
|
||||
with open(args.resource_file, 'r') as f:
|
||||
resources = [Resource([base_dir]+source_dir, filename)
|
||||
for filename in f.readlines()]
|
||||
|
||||
h_identifier = to_identifier(os.path.basename(args.hfile))
|
||||
with open(args.hfile, 'w') as f:
|
||||
f.write(gen_h_file(resources, h_identifier))
|
||||
|
||||
with open(args.cxxfile, 'w') as f:
|
||||
f.write(gen_c_file(resources, os.path.basename(args.hfile)))
|
||||
|
||||
5
scripts/meson.build
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
res_compiler = find_program('kiwix-compile-resources')
|
||||
intermediate_ctpp2c = find_program('ctpp2c.sh')
|
||||
|
||||
install_data(res_compiler.path(), install_dir:get_option('bindir'))
|
||||
13
src/android/AndroidManifest.xml
Normal file
@@ -0,0 +1,13 @@
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
|
||||
package="kiwix.org.kiwixlib"
|
||||
>
|
||||
|
||||
<application android:allowBackup="true"
|
||||
android:label="@string/app_name"
|
||||
android:supportsRtl="true"
|
||||
>
|
||||
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
16
src/android/gen_kiwix.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
BUILD_PATH=$(pwd)
|
||||
|
||||
echo "javac -d $BUILD_PATH/src/android $@"
|
||||
javac -d $BUILD_PATH/src/android/ "$@"
|
||||
|
||||
|
||||
cd $BUILD_PATH/src/android
|
||||
echo "javah -jni org.kiwix.kiwixlib"
|
||||
javah -jni org.kiwix.kiwixlib.JNIKiwix
|
||||
javah -jni org.kiwix.kiwixlib.JNIKiwixReader
|
||||
javah -jni org.kiwix.kiwixlib.JNIKiwixSearcher
|
||||
cd $BUILD_PATH
|
||||
44
src/android/kiwix.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <jni.h>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwix.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "unicode/putil.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory(
|
||||
JNIEnv* env, jobject obj, jstring dirStr)
|
||||
{
|
||||
std::string cPath = jni2c(dirStr, env);
|
||||
|
||||
Lock l;
|
||||
try {
|
||||
u_setDataDirectory(cPath.c_str());
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to set data directory " << cPath << std::endl;
|
||||
}
|
||||
}
|
||||
425
src/android/kiwixreader.cpp
Normal file
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include <jni.h>
|
||||
#include <zim/file.h>
|
||||
#include <android/log.h>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixReader.h"
|
||||
|
||||
#include "common/base64.h"
|
||||
#include "reader.h"
|
||||
#include "utils.h"
|
||||
|
||||
/* Kiwix Reader JNI functions */
|
||||
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
|
||||
JNIEnv* env, jobject obj, jstring filename)
|
||||
{
|
||||
std::string cPath = jni2c(filename, env);
|
||||
|
||||
__android_log_print(ANDROID_LOG_INFO, "kiwix", "Attempting to create reader with: %s", cPath.c_str());
|
||||
Lock l;
|
||||
try {
|
||||
kiwix::Reader* reader = new kiwix::Reader(cPath);
|
||||
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", "Error opening ZIM file");
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", e.what());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
|
||||
{
|
||||
Handle<kiwix::Reader>::dispose(env, obj);
|
||||
}
|
||||
|
||||
#define READER (Handle<kiwix::Reader>::getHandle(env, obj))
|
||||
|
||||
/* Kiwix library functions */
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring url;
|
||||
|
||||
try {
|
||||
std::string cUrl = READER->getMainPage().getPath();
|
||||
url = c2jni(cUrl, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM main page");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
url = NULL;
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getId(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring id;
|
||||
|
||||
try {
|
||||
std::string cId = READER->getId();
|
||||
id = c2jni(cId, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM id");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
id = NULL;
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFileSize(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jint size;
|
||||
|
||||
try {
|
||||
int cSize = READER->getFileSize();
|
||||
size = c2jni(cSize);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM file size");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getCreator(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring creator;
|
||||
|
||||
try {
|
||||
std::string cCreator = READER->getCreator();
|
||||
creator = c2jni(cCreator, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM creator");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
creator = NULL;
|
||||
}
|
||||
|
||||
return creator;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPublisher(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring publisher;
|
||||
|
||||
try {
|
||||
std::string cPublisher = READER->getPublisher();
|
||||
publisher = c2jni(cPublisher, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM publish");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
publisher = NULL;
|
||||
}
|
||||
return publisher;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getName(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring name;
|
||||
|
||||
try {
|
||||
std::string cName = READER->getName();
|
||||
name = c2jni(cName, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM name");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
name = NULL;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getFavicon(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring favicon;
|
||||
|
||||
try {
|
||||
std::string cContent;
|
||||
std::string cMime;
|
||||
READER->getFavicon(cContent, cMime);
|
||||
favicon = c2jni(
|
||||
base64_encode(reinterpret_cast<const unsigned char*>(cContent.c_str()),
|
||||
cContent.length()),
|
||||
env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM favicon");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
favicon = NULL;
|
||||
}
|
||||
return favicon;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDate(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring date;
|
||||
|
||||
try {
|
||||
std::string cDate = READER->getDate();
|
||||
date = c2jni(cDate, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM date");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
date = NULL;
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getLanguage(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring language;
|
||||
|
||||
try {
|
||||
std::string cLanguage = READER->getLanguage();
|
||||
language = c2jni(cLanguage, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM language");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
language = NULL;
|
||||
}
|
||||
|
||||
return language;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType(
|
||||
JNIEnv* env, jobject obj, jstring url)
|
||||
{
|
||||
jstring mimeType;
|
||||
|
||||
std::string cUrl = jni2c(url, env);
|
||||
try {
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
auto cMimeType = entry.getMimetype();
|
||||
mimeType = c2jni(cMimeType, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get mime-type for url: %s", cUrl.c_str());
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
mimeType = NULL;
|
||||
}
|
||||
return mimeType;
|
||||
}
|
||||
|
||||
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent(
|
||||
JNIEnv* env, jobject obj, jstring url, jobject titleObj, jobject mimeTypeObj, jobject sizeObj)
|
||||
{
|
||||
/* Default values */
|
||||
setStringObjValue("", titleObj, env);
|
||||
setStringObjValue("", mimeTypeObj, env);
|
||||
setIntObjValue(0, sizeObj, env);
|
||||
jbyteArray data = env->NewByteArray(0);
|
||||
|
||||
/* Retrieve the content */
|
||||
std::string cUrl = jni2c(url, env);
|
||||
unsigned int cSize = 0;
|
||||
|
||||
try {
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
cSize = entry.getSize();
|
||||
setIntObjValue(cSize, sizeObj, env);
|
||||
|
||||
data = env->NewByteArray(cSize);
|
||||
env->SetByteArrayRegion(
|
||||
data, 0, cSize, reinterpret_cast<const jbyte*>(entry.getBlob().data()));
|
||||
|
||||
setStringObjValue(entry.getMimetype(), mimeTypeObj, env);
|
||||
setStringObjValue(entry.getTitle(), titleObj, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get content for url: %s", cUrl.c_str());
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPart(
|
||||
JNIEnv* env, jobject obj, jstring url, jint offset, jint len, jobject sizeObj)
|
||||
{
|
||||
jbyteArray data = env->NewByteArray(0);
|
||||
setIntObjValue(0, sizeObj, env);
|
||||
|
||||
/* Default values */
|
||||
/* Retrieve the content */
|
||||
std::string cUrl = jni2c(url, env);
|
||||
unsigned int cOffset = jni2c(offset);
|
||||
unsigned int cLen = jni2c(len);
|
||||
try {
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
|
||||
if (cLen == 0) {
|
||||
setIntObjValue(entry.getSize(), sizeObj, env);
|
||||
} else if (cOffset+cLen < entry.getSize()) {
|
||||
auto blob = entry.getBlob(cOffset, cLen);
|
||||
data = env->NewByteArray(cLen);
|
||||
env->SetByteArrayRegion(
|
||||
data, 0, cLen, reinterpret_cast<const jbyte*>(blob.data()));
|
||||
setIntObjValue(cLen, sizeObj, env);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get partial content for url: %s (%u : %u)", cUrl.c_str(), cOffset, cLen);
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
JNIEXPORT jobject JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
|
||||
JNIEnv* env, jobject obj, jstring url)
|
||||
{
|
||||
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair");
|
||||
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V");
|
||||
jobject pair = env->NewObject(classPair, midPairinit);
|
||||
setPairObjValue("", 0, pair, env);
|
||||
|
||||
std::string cUrl = jni2c(url, env);
|
||||
try {
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
auto part_info = entry.getDirectAccessInfo();
|
||||
setPairObjValue(part_info.first, part_info.second, pair, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get direct access info for url: %s", cUrl.c_str());
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
}
|
||||
return pair;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
|
||||
jobject obj,
|
||||
jstring prefix,
|
||||
jint count)
|
||||
{
|
||||
jboolean retVal = JNI_FALSE;
|
||||
std::string cPrefix = jni2c(prefix, env);
|
||||
unsigned int cCount = jni2c(count);
|
||||
|
||||
try {
|
||||
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
|
||||
retVal = JNI_TRUE;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get search results for pattern: %s", cPrefix.c_str());
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", e.what());
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
|
||||
jobject obj,
|
||||
jobject titleObj)
|
||||
{
|
||||
jboolean retVal = JNI_FALSE;
|
||||
std::string cTitle;
|
||||
|
||||
try {
|
||||
if (READER->getNextSuggestion(cTitle)) {
|
||||
setStringObjValue(cTitle, titleObj, env);
|
||||
retVal = JNI_TRUE;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get next suggestion");
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", e.what());
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env,
|
||||
jobject obj,
|
||||
jstring title,
|
||||
jobject urlObj)
|
||||
{
|
||||
std::string cTitle = jni2c(title, env);
|
||||
|
||||
try {
|
||||
auto entry = READER->getEntryFromTitle(cTitle);
|
||||
entry = entry.getFinalEntry();
|
||||
setStringObjValue(entry.getPath(), urlObj, env);
|
||||
return JNI_TRUE;
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get url for title %s: ", cTitle.c_str());
|
||||
__android_log_print(ANDROID_LOG_WARN, "kiwix", e.what());
|
||||
}
|
||||
|
||||
return JNI_FALSE;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle(
|
||||
JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring title;
|
||||
|
||||
try {
|
||||
std::string cTitle = READER->getTitle();
|
||||
title = c2jni(cTitle, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get zim title");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
title = NULL;
|
||||
}
|
||||
return title;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDescription(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jstring description;
|
||||
|
||||
try {
|
||||
std::string cDescription = READER->getDescription();
|
||||
description = c2jni(cDescription, env);
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get zim description");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
description = NULL;
|
||||
}
|
||||
return description;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage(
|
||||
JNIEnv* env, jobject obj, jobject urlObj)
|
||||
{
|
||||
jboolean retVal = JNI_FALSE;
|
||||
std::string cUrl;
|
||||
|
||||
try {
|
||||
std::string cUrl = READER->getRandomPage().getPath();
|
||||
setStringObjValue(cUrl, urlObj, env);
|
||||
retVal = JNI_TRUE;
|
||||
} catch (std::exception& e) {
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get random page");
|
||||
__android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what());
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
124
src/android/kiwixsearcher.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include <zim/file.h>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixSearcher.h"
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h"
|
||||
|
||||
#include "reader.h"
|
||||
#include "searcher.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define SEARCHER (Handle<kiwix::Searcher>::getHandle(env, obj))
|
||||
#define RESULT (Handle<kiwix::Result>::getHandle(env, obj))
|
||||
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_dispose(JNIEnv* env, jobject obj)
|
||||
{
|
||||
Handle<kiwix::Searcher>::dispose(env, obj);
|
||||
}
|
||||
|
||||
/* Kiwix Reader JNI functions */
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNativeHandle(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
kiwix::Searcher* searcher = new kiwix::Searcher();
|
||||
return reinterpret_cast<jlong>(new Handle<kiwix::Searcher>(searcher));
|
||||
}
|
||||
|
||||
/* Kiwix library functions */
|
||||
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_addReader(
|
||||
JNIEnv* env, jobject obj, jobject reader)
|
||||
{
|
||||
auto searcher = SEARCHER;
|
||||
|
||||
searcher->add_reader(*(Handle<kiwix::Reader>::getHandle(env, reader)), "");
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_search(
|
||||
JNIEnv* env, jobject obj, jstring query, jint count)
|
||||
{
|
||||
std::string cquery = jni2c(query, env);
|
||||
unsigned int ccount = jni2c(count);
|
||||
|
||||
SEARCHER->search(cquery, 0, ccount);
|
||||
}
|
||||
|
||||
JNIEXPORT jobject JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNextResult(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
jobject result = nullptr;
|
||||
|
||||
kiwix::Result* cresult = SEARCHER->getNextResult();
|
||||
if (cresult != nullptr) {
|
||||
jclass resultclass
|
||||
= env->FindClass("org/kiwix/kiwixlib/JNIKiwixSearcher$Result");
|
||||
jmethodID ctor = env->GetMethodID(
|
||||
resultclass, "<init>", "(Lorg/kiwix/kiwixlib/JNIKiwixSearcher;JLorg/kiwix/kiwixlib/JNIKiwixSearcher;)V");
|
||||
result = env->NewObject(resultclass, ctor, obj, reinterpret_cast<jlong>(new Handle<kiwix::Result>(cresult)), obj);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_dispose(
|
||||
JNIEnv* env, jobject obj)
|
||||
{
|
||||
Handle<kiwix::Result>::dispose(env, obj);
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getUrl(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
try {
|
||||
return c2jni(RESULT->get_url(), env);
|
||||
} catch (...) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getTitle(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
try {
|
||||
return c2jni(RESULT->get_title(), env);
|
||||
} catch (...) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getSnippet(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
return c2jni(RESULT->get_snippet(), env);
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getContent(JNIEnv* env,
|
||||
jobject obj)
|
||||
{
|
||||
return c2jni(RESULT->get_content(), env);
|
||||
}
|
||||
28
src/android/meson.build
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
jni_generator = find_program('gen_kiwix.sh')
|
||||
|
||||
kiwix_jni = custom_target('jni',
|
||||
input: ['org/kiwix/kiwixlib/JNIKiwix.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixReader.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixSearcher.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixInt.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixString.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixBool.java',
|
||||
'org/kiwix/kiwixlib/JNIKiwixException.java',
|
||||
'org/kiwix/kiwixlib/Pair.java'],
|
||||
output: ['org_kiwix_kiwixlib_JNIKiwix.h',
|
||||
'org_kiwix_kiwixlib_JNIKiwixReader.h',
|
||||
'org_kiwix_kiwixlib_JNIKiwixSearcher.h',
|
||||
'org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h'],
|
||||
command:[jni_generator, '@INPUT@']
|
||||
)
|
||||
|
||||
kiwix_sources += [
|
||||
'android/kiwix.cpp',
|
||||
'android/kiwixreader.cpp',
|
||||
'android/kiwixsearcher.cpp',
|
||||
kiwix_jni]
|
||||
|
||||
install_subdir('org', install_dir: 'kiwix-lib/java')
|
||||
install_subdir('res', install_dir: 'kiwix-lib')
|
||||
install_data('AndroidManifest.xml', install_dir: 'kiwix-lib')
|
||||
31
src/android/org/kiwix/kiwixlib/JNIKiwix.java
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
import org.kiwix.kiwixlib.JNIKiwixReader;
|
||||
import org.kiwix.kiwixlib.JNIKiwixString;
|
||||
|
||||
public class JNIKiwix
|
||||
{
|
||||
static { System.loadLibrary("kiwix"); }
|
||||
|
||||
public native void setDataDirectory(String icuDataDir);
|
||||
}
|
||||
25
src/android/org/kiwix/kiwixlib/JNIKiwixBool.java
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
public class JNIKiwixBool
|
||||
{
|
||||
public boolean value;
|
||||
}
|
||||
27
src/android/org/kiwix/kiwixlib/JNIKiwixException.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
public class JNIKiwixException extends Exception
|
||||
{
|
||||
public JNIKiwixException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
25
src/android/org/kiwix/kiwixlib/JNIKiwixInt.java
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
public class JNIKiwixInt
|
||||
{
|
||||
public int value;
|
||||
}
|
||||
127
src/android/org/kiwix/kiwixlib/JNIKiwixReader.java
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
import org.kiwix.kiwixlib.JNIKiwixException;
|
||||
import org.kiwix.kiwixlib.JNIKiwixString;
|
||||
import org.kiwix.kiwixlib.JNIKiwixInt;
|
||||
import org.kiwix.kiwixlib.JNIKiwixSearcher;
|
||||
import org.kiwix.kiwixlib.Pair;
|
||||
|
||||
public class JNIKiwixReader
|
||||
{
|
||||
public native String getMainPage();
|
||||
|
||||
public native String getTitle();
|
||||
|
||||
public native String getId();
|
||||
|
||||
public native String getLanguage();
|
||||
|
||||
public native String getMimeType(String url);
|
||||
|
||||
public native byte[] getContent(String url,
|
||||
JNIKiwixString title,
|
||||
JNIKiwixString mimeType,
|
||||
JNIKiwixInt size);
|
||||
|
||||
/**
|
||||
* getContentPart.
|
||||
*
|
||||
* Get only a part of the content of the article.
|
||||
* Return a byte array of `len` size starting from offset `offset`.
|
||||
* Set `size` to the number of bytes read
|
||||
* (`len` if everything is ok, 0 in case of error).
|
||||
* If `len` == 0, no bytes are read but `size` is set to the total size of the
|
||||
* article.
|
||||
*/
|
||||
public native byte[] getContentPart(String url,
|
||||
int offest,
|
||||
int len,
|
||||
JNIKiwixInt size);
|
||||
|
||||
/**
|
||||
* getDirectAccessInformation.
|
||||
*
|
||||
* Return information giving where the content is located in the zim file.
|
||||
*
|
||||
* Some contents (binary content) are stored uncompressed in the zim file.
|
||||
* Knowing this information, it could be interesting to directly open
|
||||
* the zim file (or zim part) and directly read the content from it (and so
|
||||
* bypassing the libzim).
|
||||
*
|
||||
* Return a `Pair` (filename, offset) where the content is located.
|
||||
*
|
||||
* If the content cannot be directly accessed (content is compressed or zim
|
||||
* file is cut in the middle of the content), the filename is an empty string
|
||||
* and offset is zero.
|
||||
*/
|
||||
public native Pair getDirectAccessInformation(String url);
|
||||
|
||||
public native boolean searchSuggestions(String prefix, int count);
|
||||
|
||||
public native boolean getNextSuggestion(JNIKiwixString title);
|
||||
|
||||
public native boolean getPageUrlFromTitle(String title, JNIKiwixString url);
|
||||
|
||||
public native String getDescription();
|
||||
|
||||
public native String getDate();
|
||||
|
||||
public native String getFavicon();
|
||||
|
||||
public native String getCreator();
|
||||
|
||||
public native String getPublisher();
|
||||
|
||||
public native String getName();
|
||||
|
||||
public native int getFileSize();
|
||||
|
||||
public native int getArticleCount();
|
||||
|
||||
public native int getMediaCount();
|
||||
|
||||
public native boolean getRandomPage(JNIKiwixString url);
|
||||
|
||||
public JNIKiwixSearcher search(String query, int count)
|
||||
{
|
||||
JNIKiwixSearcher searcher = new JNIKiwixSearcher();
|
||||
searcher.addKiwixReader(this);
|
||||
searcher.search(query, count);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
public JNIKiwixReader(String filename) throws JNIKiwixException
|
||||
{
|
||||
nativeHandle = getNativeReader(filename);
|
||||
if (nativeHandle == 0) {
|
||||
throw new JNIKiwixException("Cannot open zimfile "+filename);
|
||||
}
|
||||
}
|
||||
public JNIKiwixReader() {
|
||||
|
||||
}
|
||||
public native void dispose();
|
||||
|
||||
private native long getNativeReader(String filename);
|
||||
private long nativeHandle;
|
||||
}
|
||||
67
src/android/org/kiwix/kiwixlib/JNIKiwixSearcher.java
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
import org.kiwix.kiwixlib.JNIKiwixReader;
|
||||
import java.util.Vector;
|
||||
|
||||
public class JNIKiwixSearcher
|
||||
{
|
||||
public class Result
|
||||
{
|
||||
private long nativeHandle;
|
||||
private JNIKiwixSearcher searcher;
|
||||
public Result(long handle, JNIKiwixSearcher _searcher)
|
||||
{
|
||||
nativeHandle = handle;
|
||||
searcher = _searcher;
|
||||
}
|
||||
public native String getUrl();
|
||||
public native String getTitle();
|
||||
public native String getContent();
|
||||
public native String getSnippet();
|
||||
public native void dispose();
|
||||
}
|
||||
|
||||
public JNIKiwixSearcher()
|
||||
{
|
||||
nativeHandle = getNativeHandle();
|
||||
usedReaders = new Vector();
|
||||
}
|
||||
public native void dispose();
|
||||
|
||||
private native long getNativeHandle();
|
||||
private long nativeHandle;
|
||||
private Vector usedReaders;
|
||||
|
||||
public native void addReader(JNIKiwixReader reader);
|
||||
public void addKiwixReader(JNIKiwixReader reader)
|
||||
{
|
||||
addReader(reader);
|
||||
usedReaders.addElement(reader);
|
||||
};
|
||||
|
||||
public native void search(String query, int count);
|
||||
|
||||
public native Result getNextResult();
|
||||
public native boolean hasMoreResult();
|
||||
}
|
||||
25
src/android/org/kiwix/kiwixlib/JNIKiwixString.java
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
public class JNIKiwixString
|
||||
{
|
||||
public String value;
|
||||
}
|
||||
26
src/android/org/kiwix/kiwixlib/Pair.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package org.kiwix.kiwixlib;
|
||||
|
||||
public class Pair
|
||||
{
|
||||
public String filename;
|
||||
public int offset;
|
||||
}
|
||||
3
src/android/res/values/strings.xml
Normal file
@@ -0,0 +1,3 @@
|
||||
<resources>
|
||||
<string name="app_name">Kiwix Lib</string>
|
||||
</resources>
|
||||
150
src/android/utils.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _ANDROID_JNI_UTILS_H
|
||||
#define _ANDROID_JNI_UTILS_H
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <string>
|
||||
|
||||
extern pthread_mutex_t globalLock;
|
||||
|
||||
inline jfieldID getHandleField(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jclass c = env->GetObjectClass(obj);
|
||||
// J is the type signature for long:
|
||||
return env->GetFieldID(c, "nativeHandle", "J");
|
||||
}
|
||||
|
||||
class Lock
|
||||
{
|
||||
protected:
|
||||
pthread_mutex_t* lock;
|
||||
|
||||
public:
|
||||
Lock() : lock(&globalLock) { pthread_mutex_lock(lock); }
|
||||
Lock(const Lock&) = delete;
|
||||
Lock& operator=(const Lock&) = delete;
|
||||
Lock(Lock&& other) : lock(&globalLock) { other.lock = nullptr; }
|
||||
virtual ~Lock()
|
||||
{
|
||||
if (lock) {
|
||||
pthread_mutex_unlock(lock);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class LockedHandle;
|
||||
|
||||
template <class T>
|
||||
class Handle
|
||||
{
|
||||
protected:
|
||||
T* h;
|
||||
|
||||
public:
|
||||
Handle(T* h) : h(h){};
|
||||
|
||||
// No destructor. This must and will be handled by dispose method.
|
||||
|
||||
static LockedHandle<T> getHandle(JNIEnv* env, jobject obj)
|
||||
{
|
||||
jlong handle = env->GetLongField(obj, getHandleField(env, obj));
|
||||
return LockedHandle<T>(reinterpret_cast<Handle<T>*>(handle));
|
||||
}
|
||||
|
||||
static void dispose(JNIEnv* env, jobject obj)
|
||||
{
|
||||
auto lHandle = getHandle(env, obj);
|
||||
auto handle = lHandle.h;
|
||||
delete handle->h;
|
||||
delete handle;
|
||||
}
|
||||
friend class LockedHandle<T>;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct LockedHandle : public Lock {
|
||||
Handle<T>* h;
|
||||
LockedHandle(Handle<T>* h) : h(h) {}
|
||||
T* operator->() { return h->h; }
|
||||
T* operator*() { return h->h; }
|
||||
operator bool() const { return (h->h != nullptr); }
|
||||
};
|
||||
|
||||
/* c2jni type conversion functions */
|
||||
inline jboolean c2jni(const bool& val) { return val ? JNI_TRUE : JNI_FALSE; }
|
||||
inline jstring c2jni(const std::string& val, JNIEnv* env)
|
||||
{
|
||||
return env->NewStringUTF(val.c_str());
|
||||
}
|
||||
|
||||
inline jint c2jni(const int val) { return (jint)val; }
|
||||
inline jint c2jni(const unsigned val) { return (unsigned)val; }
|
||||
/* jni2c type conversion functions */
|
||||
inline bool jni2c(const jboolean& val) { return val == JNI_TRUE; }
|
||||
inline std::string jni2c(const jstring& val, JNIEnv* env)
|
||||
{
|
||||
const char* chars = env->GetStringUTFChars(val, 0);
|
||||
std::string ret(chars);
|
||||
env->ReleaseStringUTFChars(val, chars);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline int jni2c(const jint val) { return (int)val; }
|
||||
/* Method to deal with variable passed by reference */
|
||||
inline void setStringObjValue(const std::string& value,
|
||||
const jobject obj,
|
||||
JNIEnv* env)
|
||||
{
|
||||
jclass objClass = env->GetObjectClass(obj);
|
||||
jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;");
|
||||
env->SetObjectField(obj, objFid, c2jni(value, env));
|
||||
}
|
||||
|
||||
inline void setIntObjValue(const int value, const jobject obj, JNIEnv* env)
|
||||
{
|
||||
jclass objClass = env->GetObjectClass(obj);
|
||||
jfieldID objFid = env->GetFieldID(objClass, "value", "I");
|
||||
env->SetIntField(obj, objFid, value);
|
||||
}
|
||||
|
||||
inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
|
||||
{
|
||||
jclass objClass = env->GetObjectClass(obj);
|
||||
jfieldID objFid = env->GetFieldID(objClass, "value", "Z");
|
||||
env->SetIntField(obj, objFid, c2jni(value));
|
||||
}
|
||||
|
||||
inline void setPairObjValue(const std::string& filename, const int offset,
|
||||
const jobject obj, JNIEnv* env)
|
||||
{
|
||||
jclass objClass = env->GetObjectClass(obj);
|
||||
jfieldID filenameFid = env->GetFieldID(objClass, "filename", "Ljava/lang/String;");
|
||||
env->SetObjectField(obj, filenameFid, c2jni(filename, env));
|
||||
jfieldID offsetFid = env->GetFieldID(objClass, "offset", "I");
|
||||
env->SetIntField(obj, offsetFid, offset);
|
||||
}
|
||||
|
||||
#endif // _ANDROID_JNI_UTILS_H
|
||||
@@ -24,7 +24,7 @@
|
||||
René Nyffenegger rene.nyffenegger@adp-gmbh.ch
|
||||
*/
|
||||
|
||||
#include "base64.h"
|
||||
#include <common/base64.h>
|
||||
#include <iostream>
|
||||
|
||||
static const std::string base64_chars =
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "componentTools.h"
|
||||
|
||||
const char *nsStringToCString(const nsAString &str) {
|
||||
const char *cStr;
|
||||
nsCString tmpStr;
|
||||
|
||||
#ifdef _WIN32
|
||||
LossyCopyUTF16toASCII(str, tmpStr);
|
||||
#else
|
||||
CopyUTF16toUTF8(str, tmpStr);
|
||||
#endif
|
||||
|
||||
NS_CStringGetData(tmpStr, &cStr);
|
||||
|
||||
#ifdef _WIN32
|
||||
return _strdup(cStr);
|
||||
#else
|
||||
return strdup(cStr);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string nsStringToString(const nsEmbedString &str) {
|
||||
#ifdef _WIN32
|
||||
PRUnichar *start = (PRUnichar *)str.get();
|
||||
PRUnichar *end = start + str.Length();
|
||||
wchar_t wca[4096];
|
||||
wchar_t *wstart = wca;
|
||||
wchar_t *wpr = wstart;
|
||||
|
||||
for(; start < end; ++start)
|
||||
{
|
||||
*wstart = (wchar_t) *start;
|
||||
++wstart;
|
||||
}
|
||||
*wstart = 0;
|
||||
|
||||
std::string ptr;
|
||||
ptr.resize(4096);
|
||||
size_t size = wcstombs((char*)ptr.data(), wpr, 4096);
|
||||
ptr.resize(size);
|
||||
|
||||
return ptr;
|
||||
#else
|
||||
const char *cStr;
|
||||
nsCString tmpStr;
|
||||
|
||||
CopyUTF16toUTF8(str, tmpStr);
|
||||
NS_CStringGetData(tmpStr, &cStr);
|
||||
return std::string(cStr);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
const char *nsStringToUTF8(const nsAString &str) {
|
||||
const char *cStr;
|
||||
nsCString tmpStr;
|
||||
CopyUTF16toUTF8(str, tmpStr);
|
||||
NS_CStringGetData(tmpStr, &cStr);
|
||||
|
||||
#ifdef _WIN32
|
||||
return _strdup(cStr);
|
||||
#else
|
||||
return strdup(cStr);
|
||||
#endif
|
||||
}
|
||||
@@ -1,526 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "indexer.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Count word */
|
||||
unsigned int Indexer::countWords(const string &text) {
|
||||
unsigned int numWords = 1;
|
||||
unsigned int length = text.size();
|
||||
|
||||
for(unsigned int i=0; i<length;) {
|
||||
while(i<length && text[i] != ' ') {
|
||||
i++;
|
||||
}
|
||||
numWords++;
|
||||
i++;
|
||||
}
|
||||
|
||||
return numWords;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
Indexer::Indexer() :
|
||||
keywordsBoostFactor(3),
|
||||
verboseFlag(false) {
|
||||
|
||||
/* Initialize mutex */
|
||||
pthread_mutex_init(&threadIdsMutex, NULL);
|
||||
pthread_mutex_init(&toParseQueueMutex, NULL);
|
||||
pthread_mutex_init(&toIndexQueueMutex, NULL);
|
||||
pthread_mutex_init(&articleExtractorRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleParserRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleIndexerRunningMutex, NULL);
|
||||
pthread_mutex_init(&articleCountMutex, NULL);
|
||||
pthread_mutex_init(&zimPathMutex, NULL);
|
||||
pthread_mutex_init(&zimIdMutex, NULL);
|
||||
pthread_mutex_init(&indexPathMutex, NULL);
|
||||
pthread_mutex_init(&progressionMutex, NULL);
|
||||
pthread_mutex_init(&verboseMutex, NULL);
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Indexer::~Indexer() {
|
||||
}
|
||||
|
||||
/* Read the stopwords */
|
||||
void Indexer::readStopWords(const string languageCode) {
|
||||
std::string stopWord;
|
||||
std::istringstream file(getResourceAsString("stopwords/" + languageCode));
|
||||
|
||||
this->stopWords.clear();
|
||||
|
||||
while (getline(file, stopWord, '\n')) {
|
||||
this->stopWords.push_back(stopWord);
|
||||
}
|
||||
|
||||
if (this->verboseFlag) {
|
||||
std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Extractor
|
||||
|
||||
/* Article extractor methods */
|
||||
void *Indexer::extractArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
|
||||
/* Get the number of article to index and the ZIM id */
|
||||
kiwix::Reader reader(self->getZimPath());
|
||||
unsigned int articleCount = reader.getArticleCount();
|
||||
self->setArticleCount(articleCount);
|
||||
string zimId = reader.getId();
|
||||
self->setZimId(zimId);
|
||||
|
||||
/* Progression */
|
||||
unsigned int readArticleCount = 0;
|
||||
unsigned int currentProgression = 0;
|
||||
self->setProgression(currentProgression);
|
||||
unsigned int newProgress;
|
||||
|
||||
/* StopWords */
|
||||
self->readStopWords(reader.getLanguage());
|
||||
|
||||
/* Goes trough all articles */
|
||||
zim::File *zimHandler = reader.getZimFileHandler();
|
||||
unsigned int currentOffset = zimHandler->getNamespaceBeginOffset('A');
|
||||
unsigned int lastOffset = zimHandler->getNamespaceEndOffset('A');
|
||||
zim::Article currentArticle;
|
||||
|
||||
while (currentOffset < lastOffset) {
|
||||
currentArticle = zimHandler->getArticle(currentOffset);
|
||||
|
||||
if (!currentArticle.isRedirect()) {
|
||||
/* Add articles to the queue */
|
||||
indexerToken token;
|
||||
token.title = currentArticle.getTitle();
|
||||
token.url = currentArticle.getLongUrl();
|
||||
token.content = string(currentArticle.getData().data(), currentArticle.getData().size());
|
||||
self->pushToParseQueue(token);
|
||||
readArticleCount += 1;
|
||||
|
||||
/* Update progress */
|
||||
if (self->progressCallback) {
|
||||
self->progressCallback(readArticleCount, articleCount);
|
||||
}
|
||||
newProgress = (unsigned int)((float)readArticleCount / (float)articleCount * 100);
|
||||
if (newProgress != currentProgression) {
|
||||
self->setProgression(newProgress);
|
||||
}
|
||||
}
|
||||
|
||||
currentOffset += 1;
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
self->articleExtractorRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleExtractorRunning(bool value) {
|
||||
pthread_mutex_lock(&articleExtractorRunningMutex);
|
||||
this->articleExtractorRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleExtractorRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleExtractorRunning() {
|
||||
pthread_mutex_lock(&articleExtractorRunningMutex);
|
||||
bool retVal = this->articleExtractorRunningFlag;
|
||||
pthread_mutex_unlock(&articleExtractorRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Parser
|
||||
|
||||
/* Article parser methods */
|
||||
void *Indexer::parseArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
size_t found;
|
||||
indexerToken token;
|
||||
|
||||
while (self->popFromToParseQueue(token)) {
|
||||
MyHtmlParser htmlParser;
|
||||
|
||||
/* The parser generate a lot of exceptions which should be avoided */
|
||||
try {
|
||||
htmlParser.parse_html(token.content, "UTF-8", true);
|
||||
} catch (...) {
|
||||
}
|
||||
|
||||
/* If content does not have the noindex meta tag */
|
||||
/* Seems that the parser generates an exception in such case */
|
||||
found = htmlParser.dump.find("NOINDEX");
|
||||
|
||||
if (found == string::npos) {
|
||||
/* Get the accented title */
|
||||
token.accentedTitle = (htmlParser.title.empty() ? token.title : htmlParser.title);
|
||||
|
||||
/* count words */
|
||||
stringstream countWordStringStream;
|
||||
countWordStringStream << self->countWords(htmlParser.dump);
|
||||
token.wordCount = countWordStringStream.str();
|
||||
|
||||
/* snippet */
|
||||
std::string snippet = std::string(htmlParser.dump, 0, 300);
|
||||
std::string::size_type last = snippet.find_last_of('.');
|
||||
if (last == snippet.npos)
|
||||
last = snippet.find_last_of(' ');
|
||||
if (last != snippet.npos)
|
||||
snippet = snippet.substr(0, last);
|
||||
token.snippet = snippet;
|
||||
|
||||
/* size */
|
||||
stringstream sizeStringStream;
|
||||
sizeStringStream << token.content.size() / 1024;
|
||||
token.size = sizeStringStream.str();
|
||||
|
||||
/* Remove accent */
|
||||
token.title = kiwix::removeAccents(token.accentedTitle);
|
||||
token.keywords = kiwix::removeAccents(htmlParser.keywords);
|
||||
token.content = kiwix::removeAccents(htmlParser.dump);
|
||||
self->pushToIndexQueue(token);
|
||||
}
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
self->articleParserRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleParserRunning(bool value) {
|
||||
pthread_mutex_lock(&articleParserRunningMutex);
|
||||
this->articleParserRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleParserRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleParserRunning() {
|
||||
pthread_mutex_lock(&articleParserRunningMutex);
|
||||
bool retVal = this->articleParserRunningFlag;
|
||||
pthread_mutex_unlock(&articleParserRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Indexer
|
||||
|
||||
/* Article indexer methods */
|
||||
void *Indexer::indexArticles(void *ptr) {
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
unsigned int indexedArticleCount = 0;
|
||||
indexerToken token;
|
||||
|
||||
self->indexingPrelude(self->getIndexPath());
|
||||
|
||||
while (self->popFromToIndexQueue(token)) {
|
||||
self->index(token.url,
|
||||
token.accentedTitle,
|
||||
token.title,
|
||||
token.keywords,
|
||||
token.content,
|
||||
token.snippet,
|
||||
token.size,
|
||||
token.wordCount
|
||||
);
|
||||
|
||||
indexedArticleCount += 1;
|
||||
|
||||
/* Make a hard-disk flush every 10.000 articles */
|
||||
if (indexedArticleCount % 5000 == 0) {
|
||||
self->flush();
|
||||
}
|
||||
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
}
|
||||
self->indexingPostlude(self->getIndexPath());
|
||||
|
||||
/* Write content id file */
|
||||
string path = appendToDirectory(self->getIndexPath(), "content.id");
|
||||
writeTextFile(path, self->getZimId());
|
||||
|
||||
self->setProgression(100);
|
||||
kiwix::sleep(100);
|
||||
|
||||
self->articleIndexerRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Indexer::articleIndexerRunning(bool value) {
|
||||
pthread_mutex_lock(&articleIndexerRunningMutex);
|
||||
this->articleIndexerRunningFlag = value;
|
||||
pthread_mutex_unlock(&articleIndexerRunningMutex);
|
||||
}
|
||||
|
||||
bool Indexer::isArticleIndexerRunning() {
|
||||
pthread_mutex_lock(&articleIndexerRunningMutex);
|
||||
bool retVal = this->articleIndexerRunningFlag;
|
||||
pthread_mutex_unlock(&articleIndexerRunningMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Parse Queue
|
||||
|
||||
/* ToParseQueue methods */
|
||||
bool Indexer::isToParseQueueEmpty() {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
bool retVal = this->toParseQueue.empty();
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::pushToParseQueue(indexerToken &token) {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
this->toParseQueue.push(token);
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
kiwix::sleep(int(this->toParseQueue.size() / 200) / 10 * 1000);
|
||||
}
|
||||
|
||||
bool Indexer::popFromToParseQueue(indexerToken &token) {
|
||||
while (this->isToParseQueueEmpty() && this->isArticleExtractorRunning()) {
|
||||
kiwix::sleep(500);
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl;
|
||||
}
|
||||
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
if (!this->isToParseQueueEmpty()) {
|
||||
pthread_mutex_lock(&toParseQueueMutex);
|
||||
token = this->toParseQueue.front();
|
||||
this->toParseQueue.pop();
|
||||
pthread_mutex_unlock(&toParseQueueMutex);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - Index Queue
|
||||
|
||||
/* ToIndexQueue methods */
|
||||
bool Indexer::isToIndexQueueEmpty() {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
bool retVal = this->toIndexQueue.empty();
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::pushToIndexQueue(indexerToken &token) {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
this->toIndexQueue.push(token);
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
kiwix::sleep(int(this->toIndexQueue.size() / 200) / 10 * 1000);
|
||||
}
|
||||
|
||||
bool Indexer::popFromToIndexQueue(indexerToken &token) {
|
||||
while (this->isToIndexQueueEmpty() && this->isArticleParserRunning()) {
|
||||
kiwix::sleep(500);
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Waiting... ToIndexQueue is empty for now..." << std::endl;
|
||||
}
|
||||
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
if (!this->isToIndexQueueEmpty()) {
|
||||
pthread_mutex_lock(&toIndexQueueMutex);
|
||||
token = this->toIndexQueue.front();
|
||||
this->toIndexQueue.pop();
|
||||
pthread_mutex_unlock(&toIndexQueueMutex);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - Properties Getter & Setter
|
||||
|
||||
/* ZIM & Index methods */
|
||||
void Indexer::setZimPath(const string path) {
|
||||
pthread_mutex_lock(&zimPathMutex);
|
||||
this->zimPath = path;
|
||||
pthread_mutex_unlock(&zimPathMutex);
|
||||
}
|
||||
|
||||
string Indexer::getZimPath() {
|
||||
pthread_mutex_lock(&zimPathMutex);
|
||||
string retVal = this->zimPath;
|
||||
pthread_mutex_unlock(&zimPathMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setIndexPath(const string path) {
|
||||
pthread_mutex_lock(&indexPathMutex);
|
||||
this->indexPath = path;
|
||||
pthread_mutex_unlock(&indexPathMutex);
|
||||
}
|
||||
|
||||
string Indexer::getIndexPath() {
|
||||
pthread_mutex_lock(&indexPathMutex);
|
||||
string retVal = this->indexPath;
|
||||
pthread_mutex_unlock(&indexPathMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setArticleCount(const unsigned int articleCount) {
|
||||
pthread_mutex_lock(&articleCountMutex);
|
||||
this->articleCount = articleCount;
|
||||
pthread_mutex_unlock(&articleCountMutex);
|
||||
}
|
||||
|
||||
unsigned int Indexer::getArticleCount() {
|
||||
pthread_mutex_lock(&articleCountMutex);
|
||||
unsigned int retVal = this->articleCount;
|
||||
pthread_mutex_unlock(&articleCountMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setProgression(const unsigned int progression) {
|
||||
pthread_mutex_lock(&progressionMutex);
|
||||
this->progression = progression;
|
||||
pthread_mutex_unlock(&progressionMutex);
|
||||
}
|
||||
|
||||
unsigned int Indexer::getProgression() {
|
||||
pthread_mutex_lock(&progressionMutex);
|
||||
unsigned int retVal = this->progression;
|
||||
pthread_mutex_unlock(&progressionMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Indexer::setZimId(const string id) {
|
||||
pthread_mutex_lock(&zimIdMutex);
|
||||
this->zimId = id;
|
||||
pthread_mutex_unlock(&zimIdMutex);
|
||||
}
|
||||
|
||||
string Indexer::getZimId() {
|
||||
pthread_mutex_lock(&zimIdMutex);
|
||||
string retVal = this->zimId;
|
||||
pthread_mutex_unlock(&zimIdMutex);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#pragma mark - Status Management
|
||||
|
||||
/* Manage */
|
||||
bool Indexer::start(const string zimPath, const string indexPath, ProgressCallback callback) {
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
|
||||
}
|
||||
|
||||
if (callback) {
|
||||
this->progressCallback = callback;
|
||||
}
|
||||
|
||||
this->setArticleCount(0);
|
||||
this->setProgression(0);
|
||||
this->setZimPath(zimPath);
|
||||
this->setIndexPath(indexPath);
|
||||
|
||||
pthread_mutex_lock(&threadIdsMutex);
|
||||
this->articleExtractorRunning(true);
|
||||
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
|
||||
pthread_detach(this->articleExtractor);
|
||||
|
||||
while(this->isArticleExtractorRunning() && this->getArticleCount() == 0) {
|
||||
kiwix::sleep(100);
|
||||
}
|
||||
|
||||
this->articleParserRunning(true);
|
||||
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
|
||||
pthread_detach(this->articleParser);
|
||||
|
||||
this->articleIndexerRunning(true);
|
||||
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
|
||||
pthread_detach(this->articleIndexer);
|
||||
pthread_mutex_unlock(&threadIdsMutex);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Indexer::isRunning() {
|
||||
if (this->getVerboseFlag()) {
|
||||
std::cout << "isArticleExtractor running: " << (this->isArticleExtractorRunning() ? "yes" : "no") << std::endl;
|
||||
std::cout << "isArticleParser running: " << (this->isArticleParserRunning() ? "yes" : "no") << std::endl;
|
||||
std::cout << "isArticleIndexer running: " << (this->isArticleIndexerRunning() ? "yes" : "no") << std::endl;
|
||||
}
|
||||
|
||||
return this->isArticleExtractorRunning() || this->isArticleIndexerRunning() || this->isArticleParserRunning();
|
||||
}
|
||||
|
||||
bool Indexer::stop() {
|
||||
if (this->isRunning()) {
|
||||
bool isArticleExtractorRunning = this->isArticleExtractorRunning();
|
||||
bool isArticleIndexerRunning = this->isArticleIndexerRunning();
|
||||
bool isArticleParserRunning = this->isArticleParserRunning();
|
||||
|
||||
pthread_mutex_lock(&threadIdsMutex);
|
||||
|
||||
if (isArticleIndexerRunning) {
|
||||
pthread_cancel(this->articleIndexer);
|
||||
this->articleIndexerRunning(false);
|
||||
}
|
||||
if (isArticleParserRunning) {
|
||||
pthread_cancel(this->articleParser);
|
||||
this->articleParserRunning(false);
|
||||
}
|
||||
if (isArticleExtractorRunning) {
|
||||
pthread_cancel(this->articleExtractor);
|
||||
this->articleExtractorRunning(false);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&threadIdsMutex);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma mark - verbose
|
||||
|
||||
/* Manage the verboseFlag */
|
||||
void Indexer::setVerboseFlag(const bool value) {
|
||||
pthread_mutex_lock(&verboseMutex);
|
||||
this->verboseFlag = value;
|
||||
pthread_mutex_unlock(&verboseMutex);
|
||||
}
|
||||
|
||||
bool Indexer::getVerboseFlag() {
|
||||
bool value;
|
||||
pthread_mutex_lock(&verboseMutex);
|
||||
value = this->verboseFlag;
|
||||
pthread_mutex_unlock(&verboseMutex);
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,175 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_INDEXER_H
|
||||
#define KIWIX_INDEXER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stringTools.h>
|
||||
#include <otherTools.h>
|
||||
#include <resourceTools.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include "reader.h"
|
||||
#include "xapian/myhtmlparse.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
struct indexerToken {
|
||||
string url;
|
||||
string accentedTitle;
|
||||
string title;
|
||||
string keywords;
|
||||
string content;
|
||||
string snippet;
|
||||
string size;
|
||||
string wordCount;
|
||||
};
|
||||
|
||||
class Indexer {
|
||||
|
||||
typedef void (* ProgressCallback)(const unsigned int processedArticleCount, const unsigned int totalArticleCount);
|
||||
|
||||
public:
|
||||
Indexer();
|
||||
virtual ~Indexer();
|
||||
|
||||
bool start(const string zimPath, const string indexPath, ProgressCallback callback = NULL);
|
||||
bool stop();
|
||||
bool isRunning();
|
||||
unsigned int getProgression();
|
||||
void setVerboseFlag(const bool value);
|
||||
|
||||
protected:
|
||||
virtual void indexingPrelude(const string indexPath) = 0;
|
||||
virtual void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount) = 0;
|
||||
virtual void flush() = 0;
|
||||
virtual void indexingPostlude(const string indexPath) = 0;
|
||||
|
||||
/* Stop words */
|
||||
std::vector<std::string> stopWords;
|
||||
void readStopWords(const string languageCode);
|
||||
|
||||
/* Others */
|
||||
unsigned int countWords(const string &text);
|
||||
|
||||
/* Boost factor */
|
||||
unsigned int keywordsBoostFactor;
|
||||
inline unsigned int getTitleBoostFactor(const unsigned int contentLength) {
|
||||
return contentLength / 500 + 1;
|
||||
}
|
||||
|
||||
/* Verbose */
|
||||
pthread_mutex_t verboseMutex;
|
||||
bool getVerboseFlag();
|
||||
bool verboseFlag;
|
||||
|
||||
private:
|
||||
ProgressCallback progressCallback;
|
||||
pthread_mutex_t threadIdsMutex;
|
||||
|
||||
/* Article extraction */
|
||||
pthread_t articleExtractor;
|
||||
pthread_mutex_t articleExtractorRunningMutex;
|
||||
static void *extractArticles(void *ptr);
|
||||
bool articleExtractorRunningFlag;
|
||||
bool isArticleExtractorRunning();
|
||||
void articleExtractorRunning(bool value);
|
||||
|
||||
/* Article parsing */
|
||||
pthread_t articleParser;
|
||||
pthread_mutex_t articleParserRunningMutex;
|
||||
static void *parseArticles(void *ptr);
|
||||
bool articleParserRunningFlag;
|
||||
bool isArticleParserRunning();
|
||||
void articleParserRunning(bool value);
|
||||
|
||||
/* Index writting */
|
||||
pthread_t articleIndexer;
|
||||
pthread_mutex_t articleIndexerRunningMutex;
|
||||
static void *indexArticles(void *ptr);
|
||||
bool articleIndexerRunningFlag;
|
||||
bool isArticleIndexerRunning();
|
||||
void articleIndexerRunning(bool value);
|
||||
|
||||
/* To parse queue */
|
||||
std::queue<indexerToken> toParseQueue;
|
||||
pthread_mutex_t toParseQueueMutex;
|
||||
void pushToParseQueue(indexerToken &token);
|
||||
bool popFromToParseQueue(indexerToken &token);
|
||||
bool isToParseQueueEmpty();
|
||||
|
||||
/* To index queue */
|
||||
std::queue<indexerToken> toIndexQueue;
|
||||
pthread_mutex_t toIndexQueueMutex;
|
||||
void pushToIndexQueue(indexerToken &token);
|
||||
bool popFromToIndexQueue(indexerToken &token);
|
||||
bool isToIndexQueueEmpty();
|
||||
|
||||
/* Article Count & Progression */
|
||||
unsigned int articleCount;
|
||||
pthread_mutex_t articleCountMutex;
|
||||
void setArticleCount(const unsigned int articleCount);
|
||||
unsigned int getArticleCount();
|
||||
|
||||
/* Progression */
|
||||
unsigned int progression;
|
||||
pthread_mutex_t progressionMutex;
|
||||
void setProgression(const unsigned int progression);
|
||||
/* getProgression() is public */
|
||||
|
||||
/* ZIM path */
|
||||
pthread_mutex_t zimPathMutex;
|
||||
string zimPath;
|
||||
void setZimPath(const string path);
|
||||
string getZimPath();
|
||||
|
||||
/* Index path */
|
||||
pthread_mutex_t indexPathMutex;
|
||||
string indexPath;
|
||||
void setIndexPath(const string path);
|
||||
string getIndexPath();
|
||||
|
||||
/* ZIM id */
|
||||
pthread_mutex_t zimIdMutex;
|
||||
string zimId;
|
||||
void setZimId(const string id);
|
||||
string getZimId();
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,143 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "library.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
Book::Book():
|
||||
readOnly(false) {
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Book::~Book() {
|
||||
}
|
||||
|
||||
/* Sort functions */
|
||||
bool Book::sortByLastOpen(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return atoi(a.last.c_str()) > atoi(b.last.c_str());
|
||||
}
|
||||
|
||||
bool Book::sortByTitle(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return strcmp(a.title.c_str(), b.title.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByDate(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return strcmp(a.date.c_str(), b.date.c_str()) > 0;
|
||||
}
|
||||
|
||||
bool Book::sortBySize(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return atoi(a.size.c_str()) < atoi(b.size.c_str());
|
||||
}
|
||||
|
||||
bool Book::sortByPublisher(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByCreator(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return strcmp(a.creator.c_str(), b.creator.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByLanguage(const kiwix::Book &a, const kiwix::Book &b) {
|
||||
return strcmp(a.language.c_str(), b.language.c_str()) < 0;
|
||||
}
|
||||
|
||||
std::string Book::getHumanReadableIdFromPath() {
|
||||
std::string id = pathAbsolute;
|
||||
if (!id.empty()) {
|
||||
kiwix::removeAccents(id);
|
||||
|
||||
#ifdef _WIN32
|
||||
id = replaceRegex(id, "", "^.*\\\\");
|
||||
#else
|
||||
id = replaceRegex(id, "", "^.*/");
|
||||
#endif
|
||||
|
||||
id = replaceRegex(id, "", "\\.zim[a-z]*$");
|
||||
id = replaceRegex(id, "_", " ");
|
||||
id = replaceRegex(id, "plus", "\\+");
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
Library::Library():
|
||||
version(KIWIX_LIBRARY_VERSION) {
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Library::~Library() {
|
||||
}
|
||||
|
||||
bool Library::addBook(const Book &book) {
|
||||
|
||||
/* Try to find it */
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = this->books.begin(); itr != this->books.end(); ++itr ) {
|
||||
if (itr->id == book.id) {
|
||||
if (!itr->readOnly) {
|
||||
itr->readOnly = book.readOnly;
|
||||
|
||||
if (itr->path.empty())
|
||||
itr->path = book.path;
|
||||
|
||||
if (itr->pathAbsolute.empty())
|
||||
itr->pathAbsolute = book.pathAbsolute;
|
||||
|
||||
if (itr->url.empty())
|
||||
itr->url = book.url;
|
||||
|
||||
if (itr->tags.empty())
|
||||
itr->tags = book.tags;
|
||||
|
||||
if (itr->name.empty())
|
||||
itr->name = book.name;
|
||||
|
||||
if (itr->indexPath.empty()) {
|
||||
itr->indexPath = book.indexPath;
|
||||
itr->indexType = book.indexType;
|
||||
}
|
||||
|
||||
if (itr->indexPathAbsolute.empty()) {
|
||||
itr->indexPathAbsolute = book.indexPathAbsolute;
|
||||
itr->indexType = book.indexType;
|
||||
}
|
||||
|
||||
if (itr->faviconMimeType.empty()) {
|
||||
itr->favicon = book.favicon;
|
||||
itr->faviconMimeType = book.faviconMimeType;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* otherwise */
|
||||
this->books.push_back(book);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Library::removeBookByIndex(const unsigned int bookIndex) {
|
||||
books.erase(books.begin()+bookIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_LIBRARY_H
|
||||
#define KIWIX_LIBRARY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
|
||||
#include <stringTools.h>
|
||||
#include <regexTools.h>
|
||||
|
||||
#define KIWIX_LIBRARY_VERSION "20110515"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
enum supportedIndexType { UNKNOWN, XAPIAN };
|
||||
|
||||
class Book {
|
||||
|
||||
public:
|
||||
Book();
|
||||
~Book();
|
||||
|
||||
static bool sortByLastOpen(const Book &a, const Book &b);
|
||||
static bool sortByTitle(const Book &a, const Book &b);
|
||||
static bool sortBySize(const Book &a, const Book &b);
|
||||
static bool sortByDate(const Book &a, const Book &b);
|
||||
static bool sortByCreator(const Book &a, const Book &b);
|
||||
static bool sortByPublisher(const Book &a, const Book &b);
|
||||
static bool sortByLanguage(const Book &a, const Book &b);
|
||||
string getHumanReadableIdFromPath();
|
||||
|
||||
string id;
|
||||
string path;
|
||||
string pathAbsolute;
|
||||
string last;
|
||||
string indexPath;
|
||||
string indexPathAbsolute;
|
||||
supportedIndexType indexType;
|
||||
string title;
|
||||
string description;
|
||||
string language;
|
||||
string creator;
|
||||
string publisher;
|
||||
string date;
|
||||
string url;
|
||||
string name;
|
||||
string tags;
|
||||
string origId;
|
||||
string articleCount;
|
||||
string mediaCount;
|
||||
bool readOnly;
|
||||
string size;
|
||||
string favicon;
|
||||
string faviconMimeType;
|
||||
};
|
||||
|
||||
class Library {
|
||||
|
||||
public:
|
||||
Library();
|
||||
~Library();
|
||||
|
||||
string version;
|
||||
bool addBook(const Book &book);
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
vector <kiwix::Book> books;
|
||||
|
||||
/*
|
||||
* 'current' is the variable storing the current content/book id
|
||||
* in the library. This is used to be able to load per default a
|
||||
* content. As Kiwix may work with many library XML files, you may
|
||||
* have "current" defined many time with different values. The
|
||||
* last XML file read has the priority, Although we do not have an
|
||||
* library object for each file, we want to be able to fallback to
|
||||
* an 'old' current book if the one which should be load
|
||||
* failed. That is the reason why we need a stack here
|
||||
*/
|
||||
stack<string> current;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,562 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "manager.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
Manager::Manager() :
|
||||
writableLibraryPath("") {
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Manager::~Manager() {
|
||||
}
|
||||
|
||||
bool Manager::parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath) {
|
||||
pugi::xml_node libraryNode = doc.child("library");
|
||||
|
||||
if (strlen(libraryNode.attribute("current").value()))
|
||||
this->setCurrentBookId(libraryNode.attribute("current").value());
|
||||
|
||||
string libraryVersion = libraryNode.attribute("version").value();
|
||||
|
||||
for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; bookNode = bookNode.next_sibling("book")) {
|
||||
bool ok = true;
|
||||
kiwix::Book book;
|
||||
|
||||
book.readOnly = readOnly;
|
||||
book.id = bookNode.attribute("id").value();
|
||||
book.path = bookNode.attribute("path").value();
|
||||
book.last = (std::string(bookNode.attribute("last").value()) != "undefined" ?
|
||||
bookNode.attribute("last").value() : "");
|
||||
book.indexPath = bookNode.attribute("indexPath").value();
|
||||
book.indexType = XAPIAN;
|
||||
book.title = bookNode.attribute("title").value();
|
||||
book.name = bookNode.attribute("name").value();
|
||||
book.tags = bookNode.attribute("tags").value();
|
||||
book.description = bookNode.attribute("description").value();
|
||||
book.language = bookNode.attribute("language").value();
|
||||
book.date = bookNode.attribute("date").value();
|
||||
book.creator = bookNode.attribute("creator").value();
|
||||
book.publisher = bookNode.attribute("publisher").value();
|
||||
book.url = bookNode.attribute("url").value();
|
||||
book.origId = bookNode.attribute("origId").value();
|
||||
book.articleCount = bookNode.attribute("articleCount").value();
|
||||
book.mediaCount = bookNode.attribute("mediaCount").value();
|
||||
book.size = bookNode.attribute("size").value();
|
||||
book.favicon = bookNode.attribute("favicon").value();
|
||||
book.faviconMimeType = bookNode.attribute("faviconMimeType").value();
|
||||
|
||||
/* Check absolute and relative paths */
|
||||
this->checkAndCleanBookPaths(book, libraryPath);
|
||||
|
||||
/* Update the book properties with the new importer */
|
||||
if (libraryVersion.empty() || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) {
|
||||
if (!book.path.empty()) {
|
||||
ok = this->readBookFromPath(book.pathAbsolute);
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
library.addBook(book);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::readXml(const string xml, const bool readOnly, const string libraryPath) {
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load_buffer_inplace((void*)xml.data(), xml.size());
|
||||
|
||||
if (result) {
|
||||
this->parseXmlDom(doc, readOnly, libraryPath);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::readFile(const string path, const bool readOnly) {
|
||||
return this->readFile(path, path, readOnly);
|
||||
}
|
||||
|
||||
bool Manager::readFile(const string nativePath, const string UTF8Path, const bool readOnly) {
|
||||
bool retVal = true;
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load_file(nativePath.c_str());
|
||||
|
||||
if (result) {
|
||||
this->parseXmlDom(doc, readOnly, UTF8Path);
|
||||
} else {
|
||||
retVal = false;
|
||||
}
|
||||
|
||||
/* This has to be set (although if the file does not exists) to be
|
||||
* able to know where to save the library if new content are
|
||||
* available */
|
||||
if (!readOnly) {
|
||||
this->writableLibraryPath = UTF8Path;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Manager::writeFile(const string path) {
|
||||
pugi::xml_document doc;
|
||||
|
||||
/* Add the library node */
|
||||
pugi::xml_node libraryNode = doc.append_child("library");
|
||||
|
||||
if (!getCurrentBookId().empty()) {
|
||||
libraryNode.append_attribute("current") = getCurrentBookId().c_str();
|
||||
}
|
||||
|
||||
if (!library.version.empty())
|
||||
libraryNode.append_attribute("version") = library.version.c_str();
|
||||
|
||||
/* Add each book */
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
|
||||
if (!itr->readOnly) {
|
||||
this->checkAndCleanBookPaths(*itr, path);
|
||||
|
||||
pugi::xml_node bookNode = libraryNode.append_child("book");
|
||||
bookNode.append_attribute("id") = itr->id.c_str();
|
||||
|
||||
if (!itr->path.empty())
|
||||
bookNode.append_attribute("path") = itr->path.c_str();
|
||||
|
||||
if (!itr->last.empty() && itr->last != "undefined") {
|
||||
bookNode.append_attribute("last") = itr->last.c_str();
|
||||
}
|
||||
|
||||
if (!itr->indexPath.empty())
|
||||
bookNode.append_attribute("indexPath") = itr->indexPath.c_str();
|
||||
|
||||
if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) {
|
||||
if (itr->indexType == XAPIAN)
|
||||
bookNode.append_attribute("indexType") = "xapian";
|
||||
}
|
||||
|
||||
if (itr->origId.empty()) {
|
||||
if (!itr->title.empty())
|
||||
bookNode.append_attribute("title") = itr->title.c_str();
|
||||
|
||||
if (!itr->name.empty())
|
||||
bookNode.append_attribute("name") = itr->name.c_str();
|
||||
|
||||
if (!itr->tags.empty())
|
||||
bookNode.append_attribute("tags") = itr->tags.c_str();
|
||||
|
||||
if (!itr->description.empty())
|
||||
bookNode.append_attribute("description") = itr->description.c_str();
|
||||
|
||||
if (!itr->language.empty())
|
||||
bookNode.append_attribute("language") = itr->language.c_str();
|
||||
|
||||
if (!itr->creator.empty())
|
||||
bookNode.append_attribute("creator") = itr->creator.c_str();
|
||||
|
||||
if (!itr->publisher.empty())
|
||||
bookNode.append_attribute("publisher") = itr->publisher.c_str();
|
||||
|
||||
if (!itr->favicon.empty())
|
||||
bookNode.append_attribute("favicon") = itr->favicon.c_str();
|
||||
|
||||
if (!itr->faviconMimeType.empty())
|
||||
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str();
|
||||
}
|
||||
|
||||
if (!itr->date.empty())
|
||||
bookNode.append_attribute("date") = itr->date.c_str();
|
||||
|
||||
if (!itr->url.empty())
|
||||
bookNode.append_attribute("url") = itr->url.c_str();
|
||||
|
||||
if (!itr->origId.empty())
|
||||
bookNode.append_attribute("origId") = itr->origId.c_str();
|
||||
|
||||
if (!itr->articleCount.empty())
|
||||
bookNode.append_attribute("articleCount") = itr->articleCount.c_str();
|
||||
|
||||
if (!itr->mediaCount.empty())
|
||||
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
|
||||
|
||||
if (!itr->size.empty())
|
||||
bookNode.append_attribute("size") = itr->size.c_str();
|
||||
}
|
||||
}
|
||||
|
||||
/* saving file */
|
||||
doc.save_file(path.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::setCurrentBookId(const string id) {
|
||||
if (library.current.empty() || library.current.top() != id) {
|
||||
if (id.empty() && !library.current.empty())
|
||||
library.current.pop();
|
||||
else
|
||||
library.current.push(id);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string Manager::getCurrentBookId() {
|
||||
return library.current.empty() ?
|
||||
"" : library.current.top();
|
||||
}
|
||||
|
||||
/* Add a book to the library. Return empty string if failed, book id otherwise */
|
||||
string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave,
|
||||
const string url, const bool checkMetaData) {
|
||||
kiwix::Book book;
|
||||
|
||||
if (this->readBookFromPath(pathToOpen, &book)) {
|
||||
|
||||
if (pathToSave != pathToOpen) {
|
||||
book.path = pathToSave;
|
||||
book.pathAbsolute = isRelativePath(pathToSave) ?
|
||||
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), pathToSave) : pathToSave;
|
||||
}
|
||||
|
||||
if (!checkMetaData ||
|
||||
(checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) {
|
||||
book.url = url;
|
||||
library.addBook(book);
|
||||
return book.id;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Wrapper over Manager::addBookFromPath which return a bool instead of a string */
|
||||
bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) {
|
||||
return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty());
|
||||
}
|
||||
|
||||
bool Manager::readBookFromPath(const string path, kiwix::Book *book) {
|
||||
try {
|
||||
kiwix::Reader *reader = new kiwix::Reader(path);
|
||||
|
||||
if (book != NULL) {
|
||||
book->path = path;
|
||||
book->pathAbsolute = path;
|
||||
book->id = reader->getId();
|
||||
book->description = reader->getDescription();
|
||||
book->language = reader->getLanguage();
|
||||
book->date = reader->getDate();
|
||||
book->creator = reader->getCreator();
|
||||
book->publisher = reader->getPublisher();
|
||||
book->title = reader->getTitle();
|
||||
book->name = reader->getName();
|
||||
book->tags = reader->getTags();
|
||||
book->origId = reader->getOrigId();
|
||||
std::ostringstream articleCountStream;
|
||||
articleCountStream << reader->getArticleCount();
|
||||
book->articleCount = articleCountStream.str();
|
||||
|
||||
std::ostringstream mediaCountStream;
|
||||
mediaCountStream << reader->getMediaCount();
|
||||
book->mediaCount = mediaCountStream.str();
|
||||
|
||||
ostringstream convert; convert << reader->getFileSize();
|
||||
book->size = convert.str();
|
||||
|
||||
string favicon;
|
||||
string faviconMimeType;
|
||||
if (reader->getFavicon(favicon, faviconMimeType)) {
|
||||
book->favicon = base64_encode(reinterpret_cast<const unsigned char*>(favicon.c_str()), favicon.length());
|
||||
book->faviconMimeType = faviconMimeType;
|
||||
}
|
||||
}
|
||||
|
||||
delete reader;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::removeBookByIndex(const unsigned int bookIndex) {
|
||||
return this->library.removeBookByIndex(bookIndex);
|
||||
}
|
||||
|
||||
bool Manager::removeBookById(const string id) {
|
||||
unsigned int bookIndex = 0;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ( itr->id == id) {
|
||||
return this->library.removeBookByIndex(bookIndex);
|
||||
}
|
||||
bookIndex++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksLanguages() {
|
||||
std::vector<string> booksLanguages;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksLanguagesMap;
|
||||
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksLanguagesMap[itr->language] = true;
|
||||
booksLanguages.push_back(itr->language);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksLanguages;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksCreators() {
|
||||
std::vector<string> booksCreators;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksCreatorsMap;
|
||||
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksCreatorsMap[itr->creator] = true;
|
||||
booksCreators.push_back(itr->creator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksCreators;
|
||||
}
|
||||
|
||||
|
||||
vector<string> Manager::getBooksIds() {
|
||||
std::vector<string> booksIds;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
booksIds.push_back(itr->id);
|
||||
}
|
||||
|
||||
return booksIds;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksPublishers() {
|
||||
std::vector<string> booksPublishers;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksPublishersMap;
|
||||
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksPublishersMap[itr->publisher] = true;
|
||||
booksPublishers.push_back(itr->publisher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksPublishers;
|
||||
}
|
||||
|
||||
kiwix::Library Manager::cloneLibrary() {
|
||||
return this->library;
|
||||
}
|
||||
|
||||
bool Manager::getCurrentBook(Book &book) {
|
||||
string currentBookId = getCurrentBookId();
|
||||
if (currentBookId.empty()) {
|
||||
return false;
|
||||
} else {
|
||||
getBookById(currentBookId, book);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Manager::getBookById(const string id, Book &book) {
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ( itr->id == id) {
|
||||
book = *itr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::updateBookLastOpenDateById(const string id) {
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ( itr->id == id) {
|
||||
char unixdate[12];
|
||||
sprintf (unixdate, "%d", (int)time(NULL));
|
||||
itr->last = unixdate;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::setBookIndex(const string id, const string path, const supportedIndexType type) {
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ( itr->id == id) {
|
||||
itr->indexPath = path;
|
||||
itr->indexPathAbsolute = isRelativePath(path) ?
|
||||
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path;
|
||||
itr->indexType = type;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::setBookIndex(const string id, const string path) {
|
||||
return this->setBookIndex(id, path, XAPIAN);
|
||||
}
|
||||
|
||||
bool Manager::setBookPath(const string id, const string path) {
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ( itr->id == id) {
|
||||
itr->path = path;
|
||||
itr->pathAbsolute = isRelativePath(path) ?
|
||||
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Manager::removeBookPaths() {
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
itr->path = "";
|
||||
itr->pathAbsolute = "";
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int Manager::getBookCount(const bool localBooks, const bool remoteBooks) {
|
||||
unsigned int result = 0;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if ((!itr->path.empty() && localBooks) || (itr->path.empty() && remoteBooks))
|
||||
result++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Manager::listBooks(const supportedListMode mode, const supportedListSortBy sortBy,
|
||||
const unsigned int maxSize, const string language, const string creator,
|
||||
const string publisher, const string search) {
|
||||
this->bookIdList.clear();
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
|
||||
/* Sort */
|
||||
if (sortBy == TITLE) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByTitle);
|
||||
} else if (sortBy == SIZE) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortBySize);
|
||||
} else if (sortBy == DATE) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByDate);
|
||||
} else if (sortBy == CREATOR) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
||||
} else if (sortBy == PUBLISHER) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
|
||||
}
|
||||
|
||||
/* Special sort for LASTOPEN */
|
||||
if (mode == LASTOPEN) {
|
||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLastOpen);
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
if (!itr->last.empty())
|
||||
this->bookIdList.push_back(itr->id);
|
||||
}
|
||||
} else {
|
||||
/* Generate the list of book id */
|
||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||
bool ok = true;
|
||||
|
||||
if (mode == LOCAL && itr->path.empty())
|
||||
ok = false;
|
||||
|
||||
if (ok == true && mode == REMOTE && (!itr->path.empty() || itr->url.empty()))
|
||||
ok = false;
|
||||
|
||||
if (ok == true && maxSize != 0 && (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024)
|
||||
ok = false;
|
||||
|
||||
if (ok == true && !language.empty() && !matchRegex(itr->language, language))
|
||||
ok = false;
|
||||
|
||||
if (ok == true && !creator.empty() && itr->creator != creator)
|
||||
ok = false;
|
||||
|
||||
if (ok == true && !publisher.empty() && itr->publisher != publisher)
|
||||
ok = false;
|
||||
|
||||
if ((ok == true && !search.empty()) && !(matchRegex(itr->title, "\\Q" + search + "\\E") ||
|
||||
matchRegex(itr->description, "\\Q" + search + "\\E") ||
|
||||
matchRegex(itr->language, "\\Q" + search + "\\E")
|
||||
))
|
||||
ok = false;
|
||||
|
||||
if (ok == true) {
|
||||
this->bookIdList.push_back(itr->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Manager::checkAndCleanBookPaths(Book &book, const string &libraryPath) {
|
||||
if (!book.path.empty()) {
|
||||
if (isRelativePath(book.path)) {
|
||||
book.pathAbsolute = computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.path);
|
||||
} else {
|
||||
book.pathAbsolute = book.path;
|
||||
book.path = computeRelativePath(removeLastPathElement(libraryPath, true, false), book.pathAbsolute);
|
||||
}
|
||||
}
|
||||
|
||||
if (!book.indexPath.empty()) {
|
||||
if (isRelativePath(book.indexPath)) {
|
||||
book.indexPathAbsolute =
|
||||
computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.indexPath);
|
||||
} else {
|
||||
book.indexPathAbsolute = book.indexPath;
|
||||
book.indexPath =
|
||||
computeRelativePath(removeLastPathElement(libraryPath, true, false), book.indexPathAbsolute);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_MANAGER_H
|
||||
#define KIWIX_MANAGER_H
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <time.h>
|
||||
|
||||
#include <pugixml.hpp>
|
||||
|
||||
#include "../base64.h"
|
||||
#include "../regexTools.h"
|
||||
#include "../pathTools.h"
|
||||
#include <kiwix/library.h>
|
||||
#include <kiwix/reader.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
|
||||
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
||||
|
||||
class Manager {
|
||||
|
||||
public:
|
||||
Manager();
|
||||
~Manager();
|
||||
|
||||
bool readFile(const string path, const bool readOnly = true);
|
||||
bool readFile(const string nativePath, const string UTF8Path, const bool readOnly = true);
|
||||
bool readXml(const string xml, const bool readOnly = true, const string libraryPath = "");
|
||||
bool writeFile(const string path);
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
bool removeBookById(const string id);
|
||||
bool setCurrentBookId(const string id);
|
||||
string getCurrentBookId();
|
||||
bool setBookIndex(const string id, const string path, const supportedIndexType type);
|
||||
bool setBookIndex(const string id, const string path);
|
||||
bool setBookPath(const string id, const string path);
|
||||
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
Library cloneLibrary();
|
||||
bool getBookById(const string id, Book &book);
|
||||
bool getCurrentBook(Book &book);
|
||||
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
||||
bool updateBookLastOpenDateById(const string id);
|
||||
void removeBookPaths();
|
||||
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
|
||||
const string language, const string creator, const string publisher, const string search);
|
||||
vector<string> getBooksLanguages();
|
||||
vector<string> getBooksCreators();
|
||||
vector<string> getBooksPublishers();
|
||||
vector<string> getBooksIds();
|
||||
|
||||
string writableLibraryPath;
|
||||
|
||||
vector<std::string> bookIdList;
|
||||
|
||||
protected:
|
||||
kiwix::Library library;
|
||||
|
||||
bool readBookFromPath(const string path, Book *book = NULL);
|
||||
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);
|
||||
|
||||
private:
|
||||
void checkAndCleanBookPaths(Book &book, const string &libraryPath);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,690 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
|
||||
inline char hi(char v) {
|
||||
char hex[] = "0123456789abcdef";
|
||||
return hex[(v >> 4) & 0xf];
|
||||
}
|
||||
|
||||
inline char lo(char v) {
|
||||
char hex[] = "0123456789abcdef";
|
||||
return hex[v & 0xf];
|
||||
}
|
||||
|
||||
std::string hexUUID (std::string in) {
|
||||
std::ostringstream out;
|
||||
for (unsigned n = 0; n < 4; ++n)
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 4; n < 6; ++n)
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 6; n < 8; ++n)
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 8; n < 10; ++n)
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 10; n < 16; ++n)
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
std::string op=out.str();
|
||||
return op;
|
||||
}
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
Reader::Reader(const string zimFilePath)
|
||||
: zimFileHandler(NULL) {
|
||||
string tmpZimFilePath = zimFilePath;
|
||||
|
||||
/* Remove potential trailing zimaa */
|
||||
size_t found = tmpZimFilePath.rfind("zimaa");
|
||||
if (found != string::npos &&
|
||||
tmpZimFilePath.size() > 5 &&
|
||||
found == tmpZimFilePath.size() - 5) {
|
||||
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
||||
}
|
||||
|
||||
this->zimFileHandler = new zim::File(tmpZimFilePath);
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A');
|
||||
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
||||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
|
||||
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
|
||||
this->zimFilePath = zimFilePath;
|
||||
}
|
||||
|
||||
/* initialize random seed: */
|
||||
srand ( time(NULL) );
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Reader::~Reader() {
|
||||
if (this->zimFileHandler != NULL) {
|
||||
delete this->zimFileHandler;
|
||||
}
|
||||
}
|
||||
|
||||
zim::File* Reader::getZimFileHandler() {
|
||||
return this->zimFileHandler;
|
||||
}
|
||||
|
||||
/* Reset the cursor for GetNextArticle() */
|
||||
void Reader::reset() {
|
||||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
}
|
||||
|
||||
std::map<std::string, unsigned int> Reader::parseCounterMetadata() {
|
||||
std::map<std::string, unsigned int> counters;
|
||||
string content, mimeType, item, counterString;
|
||||
unsigned int contentLength, counter;
|
||||
string counterUrl = "/M/Counter";
|
||||
|
||||
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
||||
stringstream ssContent(content);
|
||||
|
||||
while(getline(ssContent, item, ';')) {
|
||||
stringstream ssItem(item);
|
||||
getline(ssItem, mimeType, '=');
|
||||
getline(ssItem, counterString, '=');
|
||||
if (!counterString.empty() && !mimeType.empty()) {
|
||||
sscanf(counterString.c_str(), "%u", &counter);
|
||||
counters.insert(pair<string, int>(mimeType, counter));
|
||||
}
|
||||
}
|
||||
|
||||
return counters;
|
||||
}
|
||||
|
||||
/* Get the count of articles which can be indexed/displayed */
|
||||
unsigned int Reader::getArticleCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsACount;
|
||||
} else {
|
||||
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
||||
if (it != counterMap.end())
|
||||
counter = it->second;
|
||||
}
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
/* Get the count of medias content in the ZIM file */
|
||||
unsigned int Reader::getMediaCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty())
|
||||
counter = this->nsICount;
|
||||
else {
|
||||
std::map<std::string, unsigned int>::const_iterator it;
|
||||
|
||||
it = counterMap.find("image/jpeg");
|
||||
if (it != counterMap.end())
|
||||
counter += it->second;
|
||||
|
||||
it = counterMap.find("image/gif");
|
||||
if (it != counterMap.end())
|
||||
counter += it->second;
|
||||
|
||||
it = counterMap.find("image/png");
|
||||
if (it != counterMap.end())
|
||||
counter += it->second;
|
||||
}
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
/* Get the total of all items of a ZIM file, redirects included */
|
||||
unsigned int Reader::getGlobalCount() {
|
||||
return this->zimFileHandler->getCountArticles();
|
||||
}
|
||||
|
||||
/* Return the UID of the ZIM file */
|
||||
string Reader::getId() {
|
||||
std::ostringstream s;
|
||||
s << this->zimFileHandler->getFileheader().getUuid();
|
||||
return s.str();
|
||||
}
|
||||
|
||||
/* Return a page url from a title */
|
||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) {
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findxByTitle('A', title);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
|
||||
/* Get the article */
|
||||
zim::Article article = *resultPair.second;
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return an URL from a title*/
|
||||
string Reader::getRandomPageUrl() {
|
||||
zim::Article article;
|
||||
zim::size_type idx;
|
||||
std::string mainPageUrl = this->getMainPageUrl();
|
||||
|
||||
do {
|
||||
idx = this->firstArticleOffset +
|
||||
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
|
||||
article = zimFileHandler->getArticle(idx);
|
||||
} while (article.getLongUrl() == mainPageUrl);
|
||||
|
||||
return article.getLongUrl().c_str();
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() {
|
||||
string url = "";
|
||||
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
||||
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
|
||||
url = article.getLongUrl();
|
||||
|
||||
if (url.empty()) {
|
||||
url = getFirstPageUrl();
|
||||
}
|
||||
} else {
|
||||
url = getFirstPageUrl();
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string &content, string &mimeType) {
|
||||
unsigned int contentLength = 0;
|
||||
|
||||
this->getContentByUrl( "/-/favicon.png", content,
|
||||
contentLength, mimeType);
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl( "/I/favicon.png", content,
|
||||
contentLength, mimeType);
|
||||
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl( "/I/favicon", content,
|
||||
contentLength, mimeType);
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl( "/-/favicon", content,
|
||||
contentLength, mimeType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return content.empty() ? false : true;
|
||||
}
|
||||
|
||||
string Reader::getZimFilePath() {
|
||||
return this->zimFilePath;
|
||||
}
|
||||
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string &name, string &value) {
|
||||
unsigned int contentLength = 0;
|
||||
string contentType = "";
|
||||
|
||||
return this->getContentByUrl( "/M/" + name, value,
|
||||
contentLength, contentType);
|
||||
}
|
||||
|
||||
string Reader::getTitle() {
|
||||
string value;
|
||||
this->getMetatag("Title", value);
|
||||
if (value.empty()) {
|
||||
value = getLastPathElement(zimFileHandler->getFilename());
|
||||
std::replace(value.begin(), value.end(), '_', ' ');
|
||||
size_t pos = value.find(".zim");
|
||||
value = value.substr(0, pos);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getName() {
|
||||
string value;
|
||||
this->getMetatag("Name", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getTags() {
|
||||
string value;
|
||||
this->getMetatag("Tags", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDescription() {
|
||||
string value;
|
||||
this->getMetatag("Description", value);
|
||||
|
||||
/* Mediawiki Collection tends to use the "Subtitle" name */
|
||||
if (value.empty()) {
|
||||
this->getMetatag("Subtitle", value);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getLanguage() {
|
||||
string value;
|
||||
this->getMetatag("Language", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDate() {
|
||||
string value;
|
||||
this->getMetatag("Date", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getCreator() {
|
||||
string value;
|
||||
this->getMetatag("Creator", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getPublisher() {
|
||||
string value;
|
||||
this->getMetatag("Publisher", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getOrigId() {
|
||||
string value;
|
||||
this->getMetatag("startfileuid", value);
|
||||
if(value.empty())
|
||||
return "";
|
||||
std::string id=value;
|
||||
std::string origID;
|
||||
std::string temp="";
|
||||
unsigned int k=0;
|
||||
char tempArray[16]="";
|
||||
for(unsigned int i=0; i<id.size(); i++)
|
||||
{
|
||||
if(id[i]=='\n')
|
||||
{
|
||||
tempArray[k]= atoi(temp.c_str());
|
||||
temp="";
|
||||
k++;
|
||||
}
|
||||
else
|
||||
{
|
||||
temp+=id[i];
|
||||
}
|
||||
}
|
||||
origID=hexUUID(tempArray);
|
||||
return origID;
|
||||
}
|
||||
|
||||
/* Return the first page URL */
|
||||
string Reader::getFirstPageUrl() {
|
||||
string url;
|
||||
|
||||
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
||||
url = article.getLongUrl();
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string &url, char *ns, string &title) {
|
||||
/* Offset to visit the url */
|
||||
unsigned int urlLength = url.size();
|
||||
unsigned int offset = 0;
|
||||
|
||||
/* Ignore the '/' */
|
||||
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
||||
|
||||
/* Get namespace */
|
||||
while ((offset < urlLength) && (url[offset] != '/')) {
|
||||
*ns= url[offset];
|
||||
offset++;
|
||||
}
|
||||
|
||||
/* Ignore the '/' */
|
||||
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
||||
|
||||
/* Get content title */
|
||||
unsigned int titleOffset = offset;
|
||||
while (offset < urlLength) {
|
||||
offset++;
|
||||
}
|
||||
|
||||
/* unescape title */
|
||||
title = url.substr(titleOffset, offset - titleOffset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return article by url */
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
|
||||
/* Main page */
|
||||
if (titleStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
article = zimFileHandler->getArticle(resultPair.second.getIndex());
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
retVal = true;
|
||||
} else {
|
||||
mimeType = "";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/* Get a content from a zim file */
|
||||
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
return this->getContentByEncodedUrl(url, content, contentLength, contentType);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
bool retVal = false;
|
||||
content="";
|
||||
contentType="";
|
||||
contentLength = 0;
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos &&
|
||||
content.find("<body") == std::string::npos &&
|
||||
content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
/* Set return value */
|
||||
retVal = true;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/* Check if an article exists */
|
||||
bool Reader::urlExists(const string &url) {
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
titleStr = "/" + titleStr;
|
||||
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
|
||||
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
|
||||
}
|
||||
|
||||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() {
|
||||
return this->urlExists("/Z/fulltextIndex/xapian");
|
||||
}
|
||||
|
||||
/* Search titles by prefix */
|
||||
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
|
||||
bool retVal = false;
|
||||
zim::File::const_iterator articleItr;
|
||||
|
||||
/* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */
|
||||
if (reset) {
|
||||
this->suggestions.clear();
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
} else {
|
||||
if (this->suggestions.size() > suggestionsCount) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return if no prefix */
|
||||
if (prefix.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (articleItr = zimFileHandler->findByTitle('A', prefix);
|
||||
articleItr != zimFileHandler->end() &&
|
||||
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
|
||||
this->suggestions.size() < suggestionsCount ;
|
||||
++articleItr) {
|
||||
|
||||
/* Extract the interesting part of article title & url */
|
||||
std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle());
|
||||
std::string articleFinalUrl = "/A/"+articleItr->getUrl();
|
||||
if (articleItr->isRedirect()) {
|
||||
zim::Article article = *articleItr;
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
articleFinalUrl = "/A/"+article.getUrl();
|
||||
}
|
||||
|
||||
/* Go through all already found suggestions and skip if this
|
||||
article is already in the suggestions list (with an other
|
||||
title) */
|
||||
bool insert = true;
|
||||
std::vector< std::vector<std::string> >::iterator suggestionItr;
|
||||
for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) {
|
||||
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
|
||||
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
|
||||
insert = false;
|
||||
break;
|
||||
} else if (result < 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert if possible */
|
||||
if (insert) {
|
||||
std::vector<std::string> suggestion;
|
||||
suggestion.push_back(articleItr->getTitle());
|
||||
suggestion.push_back(articleFinalUrl);
|
||||
suggestion.push_back(normalizedArticleTitle);
|
||||
this->suggestions.insert(suggestionItr, suggestion);
|
||||
}
|
||||
|
||||
/* Suggestions where found */
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
/* Set the cursor to the begining */
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<std::string> Reader::getTitleVariants(const std::string &title) {
|
||||
std::vector<std::string> variants;
|
||||
variants.push_back(title);
|
||||
variants.push_back(kiwix::ucFirst(title));
|
||||
variants.push_back(kiwix::lcFirst(title));
|
||||
variants.push_back(kiwix::toTitle(title));
|
||||
return variants;
|
||||
}
|
||||
|
||||
/* Try also a few variations of the prefix to have better results */
|
||||
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
|
||||
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
||||
bool retVal;
|
||||
|
||||
this->suggestions.clear();
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
||||
variantsItr != variants.end();
|
||||
variantsItr++) {
|
||||
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) || retVal;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/* Get next suggestion */
|
||||
bool Reader::getNextSuggestion(string &title) {
|
||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
||||
/* title */
|
||||
title = (*(this->suggestionsOffset))[0];
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->suggestionsOffset++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Reader::getNextSuggestion(string &title, string &url) {
|
||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
||||
/* title */
|
||||
title = (*(this->suggestionsOffset))[0];
|
||||
url = (*(this->suggestionsOffset))[1];
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->suggestionsOffset++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if the file has as checksum */
|
||||
bool Reader::canCheckIntegrity() {
|
||||
return this->zimFileHandler->getChecksum() != "";
|
||||
}
|
||||
|
||||
/* Return true if corrupted, false otherwise */
|
||||
bool Reader::isCorrupted() {
|
||||
try {
|
||||
if (this->zimFileHandler->verify() == true)
|
||||
return false;
|
||||
} catch (exception &e) {
|
||||
cerr << e.what() << endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return the file size, works also for splitted files */
|
||||
unsigned int Reader::getFileSize() {
|
||||
zim::File *file = this->getZimFileHandler();
|
||||
zim::offset_type size = 0;
|
||||
|
||||
if (file != NULL) {
|
||||
size = file->getFilesize();
|
||||
}
|
||||
|
||||
return (size / 1024);
|
||||
}
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_READER_H
|
||||
#define KIWIX_READER_H
|
||||
|
||||
#include <zim/zim.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include "time.h"
|
||||
#include <pathTools.h>
|
||||
#include <stringTools.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class Reader {
|
||||
|
||||
public:
|
||||
Reader(const string zimFilePath);
|
||||
~Reader();
|
||||
|
||||
void reset();
|
||||
unsigned int getArticleCount();
|
||||
unsigned int getMediaCount();
|
||||
unsigned int getGlobalCount();
|
||||
string getZimFilePath();
|
||||
string getId();
|
||||
string getRandomPageUrl();
|
||||
string getFirstPageUrl();
|
||||
string getMainPageUrl();
|
||||
bool getMetatag(const string &url, string &content);
|
||||
string getTitle();
|
||||
string getDescription();
|
||||
string getLanguage();
|
||||
string getName();
|
||||
string getTags();
|
||||
string getDate();
|
||||
string getCreator();
|
||||
string getPublisher();
|
||||
string getOrigId();
|
||||
bool getFavicon(string &content, string &mimeType);
|
||||
bool getPageUrlFromTitle(const string &title, string &url);
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType);
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
|
||||
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
|
||||
bool urlExists(const string &url);
|
||||
bool hasFulltextIndex();
|
||||
std::vector<std::string> getTitleVariants(const std::string &title);
|
||||
bool getNextSuggestion(string &title);
|
||||
bool getNextSuggestion(string &title, string &url);
|
||||
bool canCheckIntegrity();
|
||||
bool isCorrupted();
|
||||
bool parseUrl(const string &url, char *ns, string &title);
|
||||
unsigned int getFileSize();
|
||||
zim::File* getZimFileHandler();
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article);
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
zim::size_type lastArticleOffset;
|
||||
zim::size_type currentArticleOffset;
|
||||
zim::size_type nsACount;
|
||||
zim::size_type nsICount;
|
||||
std::string zimFilePath;
|
||||
|
||||
std::vector< std::vector<std::string> > suggestions;
|
||||
std::vector< std::vector<std::string> >::iterator suggestionsOffset;
|
||||
|
||||
private:
|
||||
std::map<std::string, unsigned int> parseCounterMetadata();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,212 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "searcher.h"
|
||||
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
Searcher::Searcher() :
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
resultCountPerPage(0),
|
||||
estimatedResultCount(0),
|
||||
resultStart(0),
|
||||
resultEnd(0)
|
||||
{
|
||||
template_ct2 = getResourceAsString("results.ct2");
|
||||
loadICUExternalTables();
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Searcher::~Searcher() {}
|
||||
|
||||
/* Search strings in the database */
|
||||
void Searcher::search(std::string &search, unsigned int resultStart,
|
||||
unsigned int resultEnd, const bool verbose) {
|
||||
this->reset();
|
||||
|
||||
if (verbose == true) {
|
||||
cout << "Performing query `" << search << "'" << endl;
|
||||
}
|
||||
|
||||
/* If resultEnd & resultStart inverted */
|
||||
if (resultStart > resultEnd) {
|
||||
resultEnd += resultStart;
|
||||
resultStart = resultEnd - resultStart;
|
||||
resultEnd -= resultStart;
|
||||
}
|
||||
|
||||
/* Try to find results */
|
||||
if (resultStart != resultEnd) {
|
||||
|
||||
/* Avoid big researches */
|
||||
this->resultCountPerPage = resultEnd - resultStart;
|
||||
if (this->resultCountPerPage > 70) {
|
||||
resultEnd = resultStart + 70;
|
||||
this->resultCountPerPage = 70;
|
||||
}
|
||||
|
||||
/* Perform the search */
|
||||
this->searchPattern = search;
|
||||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
this->resultOffset = this->results.begin();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Reset the results */
|
||||
void Searcher::reset() {
|
||||
this->results.clear();
|
||||
this->resultOffset = this->results.begin();
|
||||
this->estimatedResultCount = 0;
|
||||
this->searchPattern = "";
|
||||
return;
|
||||
}
|
||||
|
||||
/* Return the result count estimation */
|
||||
unsigned int Searcher::getEstimatedResultCount() {
|
||||
return this->estimatedResultCount;
|
||||
}
|
||||
|
||||
/* Get next result */
|
||||
bool Searcher::getNextResult(string &url, string &title, unsigned int &score) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->resultOffset != this->results.end()) {
|
||||
|
||||
/* url */
|
||||
url = this->resultOffset->url;
|
||||
|
||||
/* title */
|
||||
title = this->resultOffset->title;
|
||||
|
||||
/* score */
|
||||
score = this->resultOffset->score;
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->resultOffset++;
|
||||
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Searcher::setProtocolPrefix(const std::string prefix) {
|
||||
this->protocolPrefix = prefix;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Searcher::setSearchProtocolPrefix(const std::string prefix) {
|
||||
this->searchProtocolPrefix = prefix;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Searcher::setContentHumanReadableId(const string &contentHumanReadableId) {
|
||||
this->contentHumanReadableId = contentHumanReadableId;
|
||||
}
|
||||
|
||||
#ifndef __ANDROID__
|
||||
|
||||
string Searcher::getHtml() {
|
||||
|
||||
SimpleVM oSimpleVM;
|
||||
|
||||
// Fill data
|
||||
CDT oData;
|
||||
CDT resultsCDT(CDT::ARRAY_VAL);
|
||||
|
||||
this->resultOffset = this->results.begin();
|
||||
while (this->resultOffset != this->results.end()) {
|
||||
CDT result;
|
||||
result["title"] = this->resultOffset->title;
|
||||
result["url"] = this->resultOffset->url;
|
||||
result["snippet"] = this->resultOffset->snippet;
|
||||
|
||||
if (this->resultOffset->size >= 0)
|
||||
result["size"] = kiwix::beautifyInteger(this->resultOffset->size);
|
||||
|
||||
if (this->resultOffset->wordCount >= 0)
|
||||
result["wordCount"] = kiwix::beautifyInteger(this->resultOffset->wordCount);
|
||||
|
||||
resultsCDT.PushBack(result);
|
||||
this->resultOffset++;
|
||||
}
|
||||
this->resultOffset = this->results.begin();
|
||||
oData["results"] = resultsCDT;
|
||||
|
||||
// pages
|
||||
CDT pagesCDT(CDT::ARRAY_VAL);
|
||||
|
||||
unsigned int pageStart = this->resultStart / this->resultCountPerPage >= 5 ? this->resultStart / this->resultCountPerPage - 4 : 0;
|
||||
unsigned int pageCount = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
|
||||
|
||||
if (pageCount > 10)
|
||||
pageCount = 10;
|
||||
else if (pageCount == 1)
|
||||
pageCount = 0;
|
||||
|
||||
for (unsigned int i=pageStart; i<pageStart+pageCount; i++) {
|
||||
CDT page;
|
||||
page["label"] = i + 1;
|
||||
page["start"] = i * this->resultCountPerPage;
|
||||
page["end"] = (i+1) * this->resultCountPerPage;
|
||||
|
||||
if (i * this->resultCountPerPage == this->resultStart)
|
||||
page["selected"] = true;
|
||||
|
||||
pagesCDT.PushBack(page);
|
||||
}
|
||||
oData["pages"] = pagesCDT;
|
||||
|
||||
oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount);
|
||||
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
|
||||
oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
|
||||
oData["resultStart"] = this->resultStart + 1;
|
||||
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd);
|
||||
oData["resultRange"] = this->resultCountPerPage;
|
||||
oData["resultLastPageStart"] = this->estimatedResultCount > this->resultCountPerPage ? this->estimatedResultCount - this->resultCountPerPage : 0;
|
||||
oData["protocolPrefix"] = this->protocolPrefix;
|
||||
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
|
||||
oData["contentId"] = this->contentHumanReadableId;
|
||||
|
||||
VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size());
|
||||
|
||||
FileLogger oLogger(stderr);
|
||||
|
||||
// DEBUG only (write output to stdout)
|
||||
// oSimpleVM.Run(oData, oLoader, stdout, oLogger);
|
||||
|
||||
std::string sResult;
|
||||
oSimpleVM.Run(oData, oLoader, sResult, oLogger);
|
||||
|
||||
return sResult;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_SEARCHER_H
|
||||
#define KIWIX_SEARCHER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
#include <cctype>
|
||||
#include <vector>
|
||||
#include <resourceTools.h>
|
||||
#include <pathTools.h>
|
||||
#include <stringTools.h>
|
||||
#include "unicode/putil.h"
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#include <ctpp2/CDT.hpp>
|
||||
#include <ctpp2/CTPP2FileLogger.hpp>
|
||||
#include <ctpp2/CTPP2SimpleVM.hpp>
|
||||
#include "kiwix/ctpp2/CTPP2VMStringLoader.hpp"
|
||||
|
||||
using namespace CTPP;
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Result
|
||||
{
|
||||
string url;
|
||||
string title;
|
||||
int score;
|
||||
string snippet;
|
||||
int wordCount;
|
||||
int size;
|
||||
};
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class Searcher {
|
||||
|
||||
public:
|
||||
Searcher();
|
||||
~Searcher();
|
||||
|
||||
void search(std::string &search, unsigned int resultStart,
|
||||
unsigned int resultEnd, const bool verbose=false);
|
||||
bool getNextResult(string &url, string &title, unsigned int &score);
|
||||
unsigned int getEstimatedResultCount();
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
void reset();
|
||||
void setContentHumanReadableId(const string &contentHumanReadableId);
|
||||
|
||||
#ifndef __ANDROID__
|
||||
string getHtml();
|
||||
#endif
|
||||
|
||||
protected:
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
virtual void closeIndex() = 0;
|
||||
virtual void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false) = 0;
|
||||
|
||||
std::vector<Result> results;
|
||||
std::vector<Result>::iterator resultOffset;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
std::string searchProtocolPrefix;
|
||||
std::string template_ct2;
|
||||
unsigned int resultCountPerPage;
|
||||
unsigned int estimatedResultCount;
|
||||
unsigned int resultStart;
|
||||
unsigned int resultEnd;
|
||||
std::string contentHumanReadableId;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "xapianIndexer.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
XapianIndexer::XapianIndexer() {
|
||||
/*
|
||||
stemmer(Xapian::Stem("french")) {
|
||||
this->indexer.set_stemmer(this->stemmer);
|
||||
*/
|
||||
}
|
||||
|
||||
void XapianIndexer::indexingPrelude(const string indexPath) {
|
||||
this->writableDatabase = Xapian::WritableDatabase(indexPath+".tmp", Xapian::DB_CREATE_OR_OVERWRITE | Xapian::DB_BACKEND_GLASS);
|
||||
this->writableDatabase.begin_transaction(true);
|
||||
|
||||
/* Insert the stopwords */
|
||||
if (!this->stopWords.empty()) {
|
||||
std::vector<std::string>::iterator it = this->stopWords.begin();
|
||||
for( ; it != this->stopWords.end(); ++it) {
|
||||
this->stopper.add(*it);
|
||||
}
|
||||
|
||||
this->indexer.set_stopper(&(this->stopper));
|
||||
}
|
||||
}
|
||||
|
||||
void XapianIndexer::index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount) {
|
||||
|
||||
/* Put the data in the document */
|
||||
Xapian::Document currentDocument;
|
||||
currentDocument.clear_values();
|
||||
currentDocument.add_value(0, title);
|
||||
currentDocument.add_value(1, snippet);
|
||||
currentDocument.add_value(2, size);
|
||||
currentDocument.add_value(3, wordCount);
|
||||
currentDocument.set_data(url);
|
||||
indexer.set_document(currentDocument);
|
||||
|
||||
/* Index the title */
|
||||
if (!unaccentedTitle.empty()) {
|
||||
this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size()));
|
||||
}
|
||||
|
||||
/* Index the keywords */
|
||||
if (!keywords.empty()) {
|
||||
this->indexer.index_text_without_positions(keywords, keywordsBoostFactor);
|
||||
}
|
||||
|
||||
/* Index the content */
|
||||
if (!content.empty()) {
|
||||
this->indexer.index_text_without_positions(content);
|
||||
}
|
||||
|
||||
/* add to the database */
|
||||
this->writableDatabase.add_document(currentDocument);
|
||||
}
|
||||
|
||||
void XapianIndexer::flush() {
|
||||
this->writableDatabase.commit_transaction();
|
||||
this->writableDatabase.begin_transaction(true);
|
||||
}
|
||||
|
||||
void XapianIndexer::indexingPostlude(const string indexPath) {
|
||||
this->flush();
|
||||
this->writableDatabase.commit_transaction();
|
||||
#ifdef _WIN32
|
||||
this->writableDatabase.close();
|
||||
#endif
|
||||
|
||||
/* Compacting the index */
|
||||
Xapian::Compactor compactor;
|
||||
try {
|
||||
Xapian::Database src;
|
||||
src.add_database(Xapian::Database(indexPath+".tmp"));
|
||||
src.compact(indexPath, Xapian::Compactor::FULL | Xapian::DBCOMPACT_SINGLE_FILE, 0, compactor);
|
||||
} catch (const Xapian::Error &error) {
|
||||
cerr << indexPath << ": " << error.get_description() << endl;
|
||||
exit(1);
|
||||
} catch (const char * msg) {
|
||||
cerr << indexPath << ": " << msg << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_INDEXER_H
|
||||
#define KIWIX_XAPIAN_INDEXER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "indexer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class XapianIndexer : public Indexer {
|
||||
|
||||
public:
|
||||
XapianIndexer();
|
||||
|
||||
protected:
|
||||
void indexingPrelude(const string indexPath);
|
||||
void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount);
|
||||
void flush();
|
||||
void indexingPostlude(const string indexPath);
|
||||
|
||||
Xapian::WritableDatabase writableDatabase;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::TermGenerator indexer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "xapianSearcher.h"
|
||||
#include <zim/zim.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/error.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
/* Constructor */
|
||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath)
|
||||
: Searcher(),
|
||||
stemmer(Xapian::Stem("english")) {
|
||||
this->openIndex(xapianDirectoryPath);
|
||||
}
|
||||
|
||||
/* Open Xapian readable database */
|
||||
void XapianSearcher::openIndex(const string &directoryPath) {
|
||||
try
|
||||
{
|
||||
zim::File zimFile = zim::File(directoryPath);
|
||||
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
|
||||
if (!xapianArticle.good())
|
||||
throw NoXapianIndexInZim();
|
||||
zim::offset_type dbOffset = xapianArticle.getOffset();
|
||||
int databasefd = open(directoryPath.c_str(), O_RDONLY);
|
||||
lseek(databasefd, dbOffset, SEEK_SET);
|
||||
this->readableDatabase = Xapian::Database(databasefd);
|
||||
} catch (...) {
|
||||
this->readableDatabase = Xapian::Database(directoryPath);
|
||||
}
|
||||
}
|
||||
|
||||
/* Close Xapian writable database */
|
||||
void XapianSearcher::closeIndex() {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose) {
|
||||
/* Create the query */
|
||||
Xapian::QueryParser queryParser;
|
||||
Xapian::Query query = queryParser.parse_query(search);
|
||||
|
||||
/* Create the enquire object */
|
||||
Xapian::Enquire enquire(this->readableDatabase);
|
||||
enquire.set_query(query);
|
||||
|
||||
/* Get the results */
|
||||
Xapian::MSet matches = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||
|
||||
Xapian::MSetIterator i;
|
||||
for (i = matches.begin(); i != matches.end(); ++i) {
|
||||
Xapian::Document doc = i.get_document();
|
||||
|
||||
Result result;
|
||||
result.url = doc.get_data();
|
||||
result.title = doc.get_value(0);
|
||||
result.snippet = doc.get_value(1);
|
||||
result.size = (doc.get_value(2).empty() == true ? -1 : atoi(doc.get_value(2).c_str()));
|
||||
result.wordCount = (doc.get_value(3).empty() == true ? -1 : atoi(doc.get_value(3).c_str()));
|
||||
result.score = i.get_percent();
|
||||
|
||||
this->results.push_back(result);
|
||||
|
||||
if (verbose) {
|
||||
std::cout << "Document ID " << *i << " \t";
|
||||
std::cout << i.get_percent() << "% ";
|
||||
std::cout << "\t[" << doc.get_data() << "] - " << doc.get_value(0) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the global resultCount value*/
|
||||
this->estimatedResultCount = matches.get_matches_estimated();
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_SEARCHER_H
|
||||
#define KIWIX_XAPIAN_SEARCHER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "searcher.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class NoXapianIndexInZim: public exception {
|
||||
virtual const char* what() const throw() {
|
||||
return "There is no fulltext index in the zim file";
|
||||
}
|
||||
};
|
||||
|
||||
class XapianSearcher : public Searcher {
|
||||
|
||||
public:
|
||||
XapianSearcher(const string &xapianDirectoryPath);
|
||||
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
|
||||
const bool verbose=false);
|
||||
|
||||
protected:
|
||||
void closeIndex();
|
||||
void openIndex(const string &xapianDirectoryPath);
|
||||
|
||||
Xapian::Database readableDatabase;
|
||||
Xapian::Stem stemmer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -17,46 +17,58 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "networkTools.h"
|
||||
#include <common/networkTools.h>
|
||||
|
||||
std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
|
||||
|
||||
|
||||
std::map<std::string, std::string> kiwix::getNetworkInterfaces()
|
||||
{
|
||||
std::map<std::string, std::string> interfaces;
|
||||
|
||||
#ifdef _WIN32
|
||||
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
|
||||
if (sd == SOCKET_ERROR) {
|
||||
std::cerr << "Failed to get a socket. Error " << WSAGetLastError() <<
|
||||
std::endl;
|
||||
if (sd == (SOCKET)SOCKET_ERROR) {
|
||||
std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
|
||||
<< std::endl;
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
INTERFACE_INFO InterfaceList[20];
|
||||
unsigned long nBytesReturned;
|
||||
if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList,
|
||||
sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) {
|
||||
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() <<
|
||||
std::endl;
|
||||
if (WSAIoctl(sd,
|
||||
SIO_GET_INTERFACE_LIST,
|
||||
0,
|
||||
0,
|
||||
&InterfaceList,
|
||||
sizeof(InterfaceList),
|
||||
&nBytesReturned,
|
||||
0,
|
||||
0)
|
||||
== SOCKET_ERROR) {
|
||||
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
|
||||
<< std::endl;
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
|
||||
for (int i = 0; i < nNumInterfaces; ++i) {
|
||||
sockaddr_in *pAddress;
|
||||
pAddress = (sockaddr_in *) & (InterfaceList[i].iiAddress);
|
||||
sockaddr_in* pAddress;
|
||||
pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
|
||||
|
||||
/* Add to the map */
|
||||
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
|
||||
std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
|
||||
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
interfaces.insert(
|
||||
std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
}
|
||||
#else
|
||||
/* Get Network interfaces information */
|
||||
char buf[16384];
|
||||
struct ifconf ifconf;
|
||||
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
|
||||
ifconf.ifc_len=sizeof buf;
|
||||
ifconf.ifc_buf=buf;
|
||||
if(ioctl(fd, SIOCGIFCONF, &ifconf)!=0) {
|
||||
ifconf.ifc_len = sizeof buf;
|
||||
ifconf.ifc_buf = buf;
|
||||
if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
|
||||
perror("ioctl(SIOCGIFCONF)");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@@ -64,73 +76,86 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
|
||||
/* Go through each interface */
|
||||
int i;
|
||||
size_t len;
|
||||
struct ifreq *ifreq;
|
||||
struct ifreq* ifreq;
|
||||
ifreq = ifconf.ifc_req;
|
||||
for (i = 0; i < ifconf.ifc_len; ) {
|
||||
for (i = 0; i < ifconf.ifc_len;) {
|
||||
if (ifreq->ifr_addr.sa_family == AF_INET) {
|
||||
/* Get the network interface ip */
|
||||
char host[128] = { 0 };
|
||||
const int error = getnameinfo(&(ifreq->ifr_addr), sizeof ifreq->ifr_addr,
|
||||
host, sizeof host,
|
||||
0, 0, NI_NUMERICHOST);
|
||||
char host[128] = {0};
|
||||
const int error = getnameinfo(&(ifreq->ifr_addr),
|
||||
sizeof ifreq->ifr_addr,
|
||||
host,
|
||||
sizeof host,
|
||||
0,
|
||||
0,
|
||||
NI_NUMERICHOST);
|
||||
if (!error) {
|
||||
std::string interfaceName = std::string(ifreq->ifr_name);
|
||||
std::string interfaceIp = std::string(host);
|
||||
/* Add to the map */
|
||||
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
interfaces.insert(
|
||||
std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
} else {
|
||||
perror("getnameinfo()");
|
||||
}
|
||||
}
|
||||
|
||||
/* some systems have ifr_addr.sa_len and adjust the length that
|
||||
* way, but not mine. weird */
|
||||
#ifndef linux
|
||||
len=IFNAMSIZ + ifreq->ifr_addr.sa_len;
|
||||
/* some systems have ifr_addr.sa_len and adjust the length that
|
||||
* way, but not mine. weird */
|
||||
#ifndef __linux__
|
||||
len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
|
||||
#else
|
||||
len=sizeof *ifreq;
|
||||
len = sizeof *ifreq;
|
||||
#endif
|
||||
ifreq=(struct ifreq*)((char*)ifreq+len);
|
||||
i+=len;
|
||||
ifreq = (struct ifreq*)((char*)ifreq + len);
|
||||
i += len;
|
||||
}
|
||||
#endif
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
std::string kiwix::getBestPublicIp() {
|
||||
std::string kiwix::getBestPublicIp()
|
||||
{
|
||||
std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
|
||||
|
||||
#ifndef _WIN32
|
||||
const char* const prioritizedNames[] =
|
||||
{ "eth0", "eth1", "wlan0", "wlan1", "en0", "en1" };
|
||||
const char* const prioritizedNames[]
|
||||
= {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
|
||||
const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
std::map<std::string, std::string>::const_iterator it =
|
||||
interfaces.find(prioritizedNames[i]);
|
||||
if (it != interfaces.end())
|
||||
std::map<std::string, std::string>::const_iterator it
|
||||
= interfaces.find(prioritizedNames[i]);
|
||||
if (it != interfaces.end()) {
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end(); ++iter) {
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168")
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end(); ++iter) {
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.")
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end(); ++iter) {
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.")
|
||||
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
return "127.0.0.1";
|
||||
|
||||
@@ -17,12 +17,13 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "otherTools.h"
|
||||
#include <common/otherTools.h>
|
||||
|
||||
void kiwix::sleep(unsigned int milliseconds) {
|
||||
void kiwix::sleep(unsigned int milliseconds)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
Sleep(milliseconds);
|
||||
Sleep(milliseconds);
|
||||
#else
|
||||
usleep(1000 * milliseconds);
|
||||
usleep(1000 * milliseconds);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "pathTools.h"
|
||||
#include <common/pathTools.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <mach-o/dyld.h>
|
||||
#include <limits.h>
|
||||
#include <mach-o/dyld.h>
|
||||
#elif _WIN32
|
||||
#include <windows.h>
|
||||
#include "Shlwapi.h"
|
||||
#include <direct.h>
|
||||
#define getcwd _getcwd // stupid MSFT "deprecation" warning
|
||||
#include <windows.h>
|
||||
#include "shlwapi.h"
|
||||
#define getcwd _getcwd // stupid MSFT "deprecation" warning
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -41,11 +41,14 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX 1024
|
||||
#endif
|
||||
|
||||
bool isRelativePath(const string &path) {
|
||||
bool isRelativePath(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
|
||||
#else
|
||||
@@ -53,19 +56,21 @@ bool isRelativePath(const string &path) {
|
||||
#endif
|
||||
}
|
||||
|
||||
string computeRelativePath(const string path, const string absolutePath) {
|
||||
string computeRelativePath(const string path, const string absolutePath)
|
||||
{
|
||||
std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
|
||||
std::vector<std::string> absolutePathParts = kiwix::split(absolutePath, SEPARATOR);
|
||||
std::vector<std::string> absolutePathParts
|
||||
= kiwix::split(absolutePath, SEPARATOR);
|
||||
|
||||
unsigned int commonCount = 0;
|
||||
while (commonCount < pathParts.size() &&
|
||||
commonCount < absolutePathParts.size() &&
|
||||
pathParts[commonCount] == absolutePathParts[commonCount]) {
|
||||
while (commonCount < pathParts.size()
|
||||
&& commonCount < absolutePathParts.size()
|
||||
&& pathParts[commonCount] == absolutePathParts[commonCount]) {
|
||||
if (!pathParts[commonCount].empty()) {
|
||||
commonCount++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
string relativePath;
|
||||
#ifdef _WIN32
|
||||
/* On Windows you have a token more because the root is represented
|
||||
@@ -75,10 +80,10 @@ string computeRelativePath(const string path, const string absolutePath) {
|
||||
}
|
||||
#endif
|
||||
|
||||
for (unsigned int i = commonCount ; i < pathParts.size() ; i++) {
|
||||
for (unsigned int i = commonCount; i < pathParts.size(); i++) {
|
||||
relativePath += "../";
|
||||
}
|
||||
for (unsigned int i = commonCount ; i < absolutePathParts.size() ; i++) {
|
||||
for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
|
||||
relativePath += absolutePathParts[i];
|
||||
relativePath += i + 1 < absolutePathParts.size() ? "/" : "";
|
||||
}
|
||||
@@ -87,11 +92,12 @@ string computeRelativePath(const string path, const string absolutePath) {
|
||||
}
|
||||
|
||||
/* Warning: the relative path must be with slashes */
|
||||
string computeAbsolutePath(const string path, const string relativePath) {
|
||||
string computeAbsolutePath(const string path, const string relativePath)
|
||||
{
|
||||
string absolutePath;
|
||||
|
||||
if (path.empty()) {
|
||||
char *path=NULL;
|
||||
char* path = NULL;
|
||||
size_t size = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -102,15 +108,17 @@ string computeAbsolutePath(const string path, const string relativePath) {
|
||||
|
||||
absolutePath = string(path) + SEPARATOR;
|
||||
} else {
|
||||
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR ? path : path + SEPARATOR;
|
||||
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
|
||||
? path
|
||||
: path + SEPARATOR;
|
||||
}
|
||||
|
||||
#if _WIN32
|
||||
char *cRelativePath = _strdup(relativePath.c_str());
|
||||
char* cRelativePath = _strdup(relativePath.c_str());
|
||||
#else
|
||||
char *cRelativePath = strdup(relativePath.c_str());
|
||||
char* cRelativePath = strdup(relativePath.c_str());
|
||||
#endif
|
||||
char *token = strtok(cRelativePath, "/");
|
||||
char* token = strtok(cRelativePath, "/");
|
||||
|
||||
while (token != NULL) {
|
||||
if (string(token) == "..") {
|
||||
@@ -119,8 +127,9 @@ string computeAbsolutePath(const string path, const string relativePath) {
|
||||
} else if (strcmp(token, ".") && strcmp(token, "")) {
|
||||
absolutePath += string(token);
|
||||
token = strtok(NULL, "/");
|
||||
if (token != NULL)
|
||||
absolutePath += SEPARATOR;
|
||||
if (token != NULL) {
|
||||
absolutePath += SEPARATOR;
|
||||
}
|
||||
} else {
|
||||
token = strtok(NULL, "/");
|
||||
}
|
||||
@@ -129,31 +138,38 @@ string computeAbsolutePath(const string path, const string relativePath) {
|
||||
return absolutePath;
|
||||
}
|
||||
|
||||
string removeLastPathElement(const string path, const bool removePreSeparator, const bool removePostSeparator) {
|
||||
string removeLastPathElement(const string path,
|
||||
const bool removePreSeparator,
|
||||
const bool removePostSeparator)
|
||||
{
|
||||
string newPath = path;
|
||||
size_t offset = newPath.find_last_of(SEPARATOR);
|
||||
if (removePreSeparator &&
|
||||
if (removePreSeparator &&
|
||||
#ifndef _WIN32
|
||||
offset != newPath.find_first_of(SEPARATOR) &&
|
||||
offset != newPath.find_first_of(SEPARATOR) &&
|
||||
#endif
|
||||
offset == newPath.length()-1) {
|
||||
offset == newPath.length() - 1) {
|
||||
newPath = newPath.substr(0, offset);
|
||||
offset = newPath.find_last_of(SEPARATOR);
|
||||
}
|
||||
newPath = removePostSeparator ? newPath.substr(0, offset) : newPath.substr(0, offset+1);
|
||||
newPath = removePostSeparator ? newPath.substr(0, offset)
|
||||
: newPath.substr(0, offset + 1);
|
||||
return newPath;
|
||||
}
|
||||
|
||||
string appendToDirectory(const string &directoryPath, const string &filename) {
|
||||
string appendToDirectory(const string& directoryPath, const string& filename)
|
||||
{
|
||||
string newPath = directoryPath + SEPARATOR + filename;
|
||||
return newPath;
|
||||
}
|
||||
|
||||
string getLastPathElement(const string &path) {
|
||||
string getLastPathElement(const string& path)
|
||||
{
|
||||
return path.substr(path.find_last_of(SEPARATOR) + 1);
|
||||
}
|
||||
|
||||
unsigned int getFileSize(const string &path) {
|
||||
unsigned int getFileSize(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
struct _stat filestatus;
|
||||
_stat(path.c_str(), &filestatus);
|
||||
@@ -165,12 +181,29 @@ unsigned int getFileSize(const string &path) {
|
||||
return filestatus.st_size / 1024;
|
||||
}
|
||||
|
||||
string getFileSizeAsString(const string &path) {
|
||||
ostringstream convert; convert << getFileSize(path);
|
||||
string getFileSizeAsString(const string& path)
|
||||
{
|
||||
ostringstream convert;
|
||||
convert << getFileSize(path);
|
||||
return convert.str();
|
||||
}
|
||||
|
||||
bool fileExists(const string &path) {
|
||||
string getFileContent(const string& path)
|
||||
{
|
||||
std::ifstream f(path, std::ios::in|std::ios::ate);
|
||||
std::string content;
|
||||
if (f.is_open()) {
|
||||
auto size = f.tellg();
|
||||
content.reserve(size);
|
||||
f.seekg(0, std::ios::beg);
|
||||
content.assign((std::istreambuf_iterator<char>(f)),
|
||||
std::istreambuf_iterator<char>());
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
bool fileExists(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return PathFileExists(path.c_str());
|
||||
#else
|
||||
@@ -185,7 +218,8 @@ bool fileExists(const string &path) {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool makeDirectory(const string &path) {
|
||||
bool makeDirectory(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
int status = _mkdir(path.c_str());
|
||||
#else
|
||||
@@ -194,19 +228,44 @@ bool makeDirectory(const string &path) {
|
||||
return status == 0;
|
||||
}
|
||||
|
||||
string makeTmpDirectory()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
char cbase[MAX_PATH+1];
|
||||
int base_len = GetTempPath(MAX_PATH+1, cbase);
|
||||
UUID uuid;
|
||||
UuidCreate(&uuid);
|
||||
char* dir_name;
|
||||
UuidToString(&uuid, reinterpret_cast<unsigned char**>(&dir_name));
|
||||
string dir(cbase, base_len);
|
||||
dir += dir_name;
|
||||
_mkdir(dir.c_str());
|
||||
RpcStringFree(reinterpret_cast<unsigned char**>(&dir_name));
|
||||
#else
|
||||
string base = "/tmp";
|
||||
auto _template = base + "/kiwix-lib_XXXXXX";
|
||||
char* _template_array = new char[_template.size()+1];
|
||||
memcpy(_template_array, _template.c_str(), _template.size());
|
||||
string dir = mkdtemp(_template_array);
|
||||
delete[] _template_array;
|
||||
#endif
|
||||
return dir;
|
||||
}
|
||||
|
||||
/* Try to create a link and if does not work then make a copy */
|
||||
bool copyFile(const string &sourcePath, const string &destPath) {
|
||||
bool copyFile(const string& sourcePath, const string& destPath)
|
||||
{
|
||||
try {
|
||||
#ifndef _WIN32
|
||||
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
|
||||
#endif
|
||||
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
|
||||
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
|
||||
outfile << infile.rdbuf();
|
||||
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
|
||||
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
|
||||
outfile << infile.rdbuf();
|
||||
#ifndef _WIN32
|
||||
}
|
||||
#endif
|
||||
} catch (exception &e) {
|
||||
} catch (exception& e) {
|
||||
cerr << e.what() << endl;
|
||||
return false;
|
||||
}
|
||||
@@ -214,18 +273,19 @@ bool copyFile(const string &sourcePath, const string &destPath) {
|
||||
return true;
|
||||
}
|
||||
|
||||
string getExecutablePath() {
|
||||
string getExecutablePath()
|
||||
{
|
||||
char binRootPath[PATH_MAX];
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
GetModuleFileName( NULL, binRootPath, PATH_MAX);
|
||||
GetModuleFileName(NULL, binRootPath, PATH_MAX);
|
||||
return std::string(binRootPath);
|
||||
#elif __APPLE__
|
||||
uint32_t max = (uint32_t)PATH_MAX;
|
||||
_NSGetExecutablePath(binRootPath, &max);
|
||||
return std::string(binRootPath);
|
||||
#else
|
||||
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
|
||||
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
|
||||
if (size != -1) {
|
||||
return std::string(binRootPath, size);
|
||||
}
|
||||
@@ -234,7 +294,8 @@ string getExecutablePath() {
|
||||
return "";
|
||||
}
|
||||
|
||||
bool writeTextFile(const string &path, const string &content) {
|
||||
bool writeTextFile(const string& path, const string& content)
|
||||
{
|
||||
std::ofstream file;
|
||||
file.open(path.c_str());
|
||||
file << content;
|
||||
@@ -242,8 +303,9 @@ bool writeTextFile(const string &path, const string &content) {
|
||||
return true;
|
||||
}
|
||||
|
||||
string getCurrentDirectory() {
|
||||
char* a_cwd = getcwd(NULL,0);
|
||||
string getCurrentDirectory()
|
||||
{
|
||||
char* a_cwd = getcwd(NULL, 0);
|
||||
string s_cwd(a_cwd);
|
||||
free(a_cwd);
|
||||
return s_cwd;
|
||||
|
||||
@@ -17,14 +17,15 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "regexTools.h"
|
||||
#include <common/regexTools.h>
|
||||
|
||||
std::map<std::string, RegexMatcher*> regexCache;
|
||||
std::map<std::string, icu::RegexMatcher*> regexCache;
|
||||
|
||||
icu::RegexMatcher* buildRegex(const std::string& regex)
|
||||
{
|
||||
icu::RegexMatcher* matcher;
|
||||
auto itr = regexCache.find(regex);
|
||||
|
||||
RegexMatcher *buildRegex(const std::string ®ex) {
|
||||
RegexMatcher *matcher;
|
||||
std::map<std::string, RegexMatcher*>::iterator itr = regexCache.find(regex);
|
||||
|
||||
/* Regex is in cache */
|
||||
if (itr != regexCache.end()) {
|
||||
matcher = itr->second;
|
||||
@@ -33,8 +34,8 @@ RegexMatcher *buildRegex(const std::string ®ex) {
|
||||
/* Regex needs to be parsed (and cached) */
|
||||
else {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString uregex = UnicodeString(regex.c_str());
|
||||
matcher = new RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
||||
icu::UnicodeString uregex(regex.c_str());
|
||||
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
||||
regexCache[regex] = matcher;
|
||||
}
|
||||
|
||||
@@ -42,40 +43,47 @@ RegexMatcher *buildRegex(const std::string ®ex) {
|
||||
}
|
||||
|
||||
/* todo */
|
||||
void freeRegexCache() {
|
||||
void freeRegexCache()
|
||||
{
|
||||
}
|
||||
|
||||
bool matchRegex(const std::string &content, const std::string ®ex) {
|
||||
bool matchRegex(const std::string& content, const std::string& regex)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
||||
RegexMatcher *matcher = buildRegex(regex);
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
return matcher->find();
|
||||
}
|
||||
|
||||
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string ®ex) {
|
||||
std::string replaceRegex(const std::string& content,
|
||||
const std::string& replacement,
|
||||
const std::string& regex)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
||||
UnicodeString ureplacement = UnicodeString(replacement.c_str());
|
||||
RegexMatcher *matcher = buildRegex(regex);
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
icu::UnicodeString ureplacement(replacement.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString uresult = matcher->replaceAll(ureplacement, status);
|
||||
auto uresult = matcher->replaceAll(ureplacement, status);
|
||||
std::string tmp;
|
||||
uresult.toUTF8String(tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement) {
|
||||
std::string appendToFirstOccurence(const std::string& content,
|
||||
const std::string regex,
|
||||
const std::string& replacement)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
||||
UnicodeString ureplacement = UnicodeString(replacement.c_str());
|
||||
RegexMatcher *matcher = buildRegex(regex);
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
icu::UnicodeString ureplacement(replacement.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
|
||||
if (matcher->find()) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ucontent.insert(matcher->end(status), ureplacement);
|
||||
ucontent.insert(matcher->end(status), ureplacement);
|
||||
std::string tmp;
|
||||
ucontent.toUTF8String(tmp);
|
||||
return tmp;
|
||||
@@ -83,4 +91,3 @@ std::string appendToFirstOccurence(const std::string &content, const std::strin
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
#include <resourceTools.h>
|
||||
#include <iostream>
|
||||
|
||||
std::string getResourceAsString(const std::string &name) {
|
||||
std::map<std::string, std::pair<const unsigned char*, unsigned int> >::iterator it = resourceMap.find(name);
|
||||
if (it != resourceMap.end()) {
|
||||
return std::string((const char*)resourceMap[name].first, resourceMap[name].second);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
@@ -17,28 +17,39 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "stringTools.h"
|
||||
#include <common/stringTools.h>
|
||||
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/ucnv.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ustring.h>
|
||||
|
||||
/* tell ICU where to find its dat file (tables) */
|
||||
void kiwix::loadICUExternalTables() {
|
||||
void kiwix::loadICUExternalTables()
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
std::string executablePath = getExecutablePath();
|
||||
std::string executableDirectory = removeLastPathElement(executablePath);
|
||||
std::string datPath = computeAbsolutePath(executableDirectory, "icudt49l.dat");
|
||||
try {
|
||||
u_setDataDirectory(datPath.c_str());
|
||||
} catch (exception &e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
std::string executablePath = getExecutablePath();
|
||||
std::string executableDirectory = removeLastPathElement(executablePath);
|
||||
std::string datPath
|
||||
= computeAbsolutePath(executableDirectory, "icudt58l.dat");
|
||||
try {
|
||||
u_setDataDirectory(datPath.c_str());
|
||||
} catch (exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string kiwix::removeAccents(const std::string &text) {
|
||||
std::string kiwix::removeAccents(const std::string& text)
|
||||
{
|
||||
loadICUExternalTables();
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
||||
UnicodeString ustring = UnicodeString(text.c_str());
|
||||
auto removeAccentsTrans = icu::Transliterator::createInstance(
|
||||
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
||||
icu::UnicodeString ustring(text.c_str());
|
||||
removeAccentsTrans->transliterate(ustring);
|
||||
delete removeAccentsTrans;
|
||||
std::string unaccentedText;
|
||||
@@ -49,7 +60,8 @@ std::string kiwix::removeAccents(const std::string &text) {
|
||||
#ifndef __ANDROID__
|
||||
|
||||
/* Prepare integer for display */
|
||||
std::string kiwix::beautifyInteger(const unsigned int number) {
|
||||
std::string kiwix::beautifyInteger(const unsigned int number)
|
||||
{
|
||||
std::stringstream numberStream;
|
||||
numberStream << number;
|
||||
std::string numberString = numberStream.str();
|
||||
@@ -63,210 +75,299 @@ std::string kiwix::beautifyInteger(const unsigned int number) {
|
||||
return numberString;
|
||||
}
|
||||
|
||||
std::string kiwix::beautifyFileSize(const unsigned int number) {
|
||||
if (number > 1024*1024) {
|
||||
return kiwix::beautifyInteger(number/(1024*1024)) + " GB";
|
||||
std::string kiwix::beautifyFileSize(const unsigned int number)
|
||||
{
|
||||
if (number > 1024 * 1024) {
|
||||
return kiwix::beautifyInteger(number / (1024 * 1024)) + " GB";
|
||||
} else {
|
||||
return kiwix::beautifyInteger(number/1024 !=
|
||||
0 ? number/1024 : 1) + " MB";
|
||||
return kiwix::beautifyInteger(number / 1024 != 0 ? number / 1024 : 1)
|
||||
+ " MB";
|
||||
}
|
||||
}
|
||||
|
||||
void kiwix::printStringInHexadecimal(UnicodeString s) {
|
||||
void kiwix::printStringInHexadecimal(icu::UnicodeString s)
|
||||
{
|
||||
std::cout << std::showbase << std::hex;
|
||||
for (int i=0; i<s.length(); i++) {
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = (char)((s.getTerminatedBuffer())[i]);
|
||||
if (c & 0x80)
|
||||
if (c & 0x80) {
|
||||
std::cout << (c & 0xffff) << " ";
|
||||
else
|
||||
} else {
|
||||
std::cout << c << " ";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void kiwix::printStringInHexadecimal(const char *s) {
|
||||
void kiwix::printStringInHexadecimal(const char* s)
|
||||
{
|
||||
std::cout << std::showbase << std::hex;
|
||||
for (char const* pc = s; *pc; ++pc) {
|
||||
if (*pc & 0x80)
|
||||
if (*pc & 0x80) {
|
||||
std::cout << (*pc & 0xffff);
|
||||
else
|
||||
} else {
|
||||
std::cout << *pc;
|
||||
}
|
||||
std::cout << ' ';
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr) {
|
||||
void kiwix::stringReplacement(std::string& str,
|
||||
const std::string& oldStr,
|
||||
const std::string& newStr)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while((pos = str.find(oldStr, pos)) != std::string::npos) {
|
||||
while ((pos = str.find(oldStr, pos)) != std::string::npos) {
|
||||
str.replace(pos, oldStr.length(), newStr);
|
||||
pos += newStr.length();
|
||||
}
|
||||
}
|
||||
|
||||
/* Encode string to avoid XSS attacks */
|
||||
std::string kiwix::encodeDiples(const std::string& str) {
|
||||
std::string kiwix::encodeDiples(const std::string& str)
|
||||
{
|
||||
std::string result = str;
|
||||
kiwix::stringReplacement(result, "<", "<");
|
||||
kiwix::stringReplacement(result, ">", ">");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Urlencode
|
||||
//based on javascript encodeURIComponent()
|
||||
|
||||
std::string char2hex(char dec) {
|
||||
char dig1 = (dec&0xF0)>>4;
|
||||
char dig2 = (dec&0x0F);
|
||||
if ( 0<= dig1 && dig1<= 9) dig1+=48; //0,48inascii
|
||||
if (10<= dig1 && dig1<=15) dig1+=97-10; //a,97inascii
|
||||
if ( 0<= dig2 && dig2<= 9) dig2+=48;
|
||||
if (10<= dig2 && dig2<=15) dig2+=97-10;
|
||||
|
||||
std::string r;
|
||||
r.append( &dig1, 1);
|
||||
r.append( &dig2, 1);
|
||||
return r;
|
||||
}
|
||||
|
||||
std::string kiwix::urlEncode(const std::string &c) {
|
||||
std::string escaped="";
|
||||
int max = c.length();
|
||||
for(int i=0; i<max; i++)
|
||||
{
|
||||
if ( (48 <= c[i] && c[i] <= 57) ||//0-9
|
||||
(65 <= c[i] && c[i] <= 90) ||//abc...xyz
|
||||
(97 <= c[i] && c[i] <= 122) || //ABC...XYZ
|
||||
(c[i]=='~' || c[i]=='!' || c[i]=='*' || c[i]=='(' || c[i]==')' || c[i]=='\'')
|
||||
)
|
||||
{
|
||||
escaped.append( &c[i], 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
escaped.append("%");
|
||||
escaped.append( char2hex(c[i]) );//converts char 255 to string "ff"
|
||||
}
|
||||
}
|
||||
return escaped;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static char charFromHex(std::string a) {
|
||||
std::istringstream Blat(a);
|
||||
int Z;
|
||||
Blat >> std::hex >> Z;
|
||||
return char (Z);
|
||||
/* urlEncode() based on javascript encodeURI() &
|
||||
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
|
||||
|
||||
bool isReservedUrlChar(char c)
|
||||
{
|
||||
switch (c) {
|
||||
case ';':
|
||||
case ',':
|
||||
case '/':
|
||||
case '?':
|
||||
case ':':
|
||||
case '@':
|
||||
case '&':
|
||||
case '=':
|
||||
case '+':
|
||||
case '$':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::string kiwix::urlDecode(const std::string &originalUrl) {
|
||||
std::string url = originalUrl;
|
||||
std::string::size_type pos = 0;
|
||||
while ((pos = url.find('%', pos)) != std::string::npos &&
|
||||
pos + 2 < url.length()) {
|
||||
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
|
||||
++pos;
|
||||
bool needsEscape(char c, bool encodeReserved)
|
||||
{
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return false;
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return false;
|
||||
if (c >= '0' && c <= '9')
|
||||
return false;
|
||||
if (isReservedUrlChar(c))
|
||||
return encodeReserved;
|
||||
switch (c) {
|
||||
case '-':
|
||||
case '_':
|
||||
case '.':
|
||||
case '!':
|
||||
case '~':
|
||||
case '*':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
return false;
|
||||
}
|
||||
return url;
|
||||
return true;
|
||||
}
|
||||
|
||||
int hexToInt(char c) {
|
||||
switch (c) {
|
||||
case '0': return 0;
|
||||
case '1': return 1;
|
||||
case '2': return 2;
|
||||
case '3': return 3;
|
||||
case '4': return 4;
|
||||
case '5': return 5;
|
||||
case '6': return 6;
|
||||
case '7': return 7;
|
||||
case '8': return 8;
|
||||
case '9': return 9;
|
||||
case 'A': case 'a': return 10;
|
||||
case 'B': case 'b': return 11;
|
||||
case 'C': case 'c': return 12;
|
||||
case 'D': case 'd': return 13;
|
||||
case 'E': case 'e': return 14;
|
||||
case 'F': case 'f': return 15;
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << std::hex << std::uppercase;
|
||||
for (std::string::const_iterator it = value.begin();
|
||||
it != value.end();
|
||||
it++) {
|
||||
|
||||
if (!needsEscape(*it, encodeReserved)) {
|
||||
os << *it;
|
||||
} else {
|
||||
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
|
||||
}
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
std::string kiwix::urlDecode(const std::string& value, bool component)
|
||||
{
|
||||
std::ostringstream os;
|
||||
for (std::string::const_iterator it = value.begin();
|
||||
it != value.end();
|
||||
it++) {
|
||||
|
||||
// If there aren't enough characters left for this to be a
|
||||
// valid escape code, just use the character and move on
|
||||
if (it > value.end() - 3) {
|
||||
os << *it;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*it == '%') {
|
||||
char hi = *(++it);
|
||||
char lo = *(++it);
|
||||
int iHi = hexToInt(hi);
|
||||
int iLo = hexToInt(lo);
|
||||
if (iHi < 0 || iLo < 0) {
|
||||
// Invalid escape sequence
|
||||
os << '%' << hi << lo;
|
||||
continue;
|
||||
}
|
||||
char c = (char)(iHi << 4 | iLo);
|
||||
if (!component && isReservedUrlChar(c)) {
|
||||
os << '%' << hi << lo;
|
||||
} else {
|
||||
os << c;
|
||||
}
|
||||
} else {
|
||||
os << *it;
|
||||
}
|
||||
}
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
/* Split string in a token array */
|
||||
std::vector<std::string> kiwix::split(const std::string & str,
|
||||
const std::string & delims=" *-")
|
||||
std::vector<std::string> kiwix::split(const std::string& str,
|
||||
const std::string& delims = " *-")
|
||||
{
|
||||
std::string::size_type lastPos = str.find_first_not_of(delims, 0);
|
||||
std::string::size_type pos = str.find_first_of(delims, lastPos);
|
||||
std::vector<std::string> tokens;
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos)
|
||||
{
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
lastPos = str.find_first_not_of(delims, pos);
|
||||
pos = str.find_first_of(delims, lastPos);
|
||||
}
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
lastPos = str.find_first_not_of(delims, pos);
|
||||
pos = str.find_first_of(delims, lastPos);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs){
|
||||
const std::string m1 (lhs), m2 (rhs);
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
|
||||
{
|
||||
const std::string m1(lhs), m2(rhs);
|
||||
return split(m1, m2);
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs){
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
|
||||
{
|
||||
return split(lhs, rhs.c_str());
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs){
|
||||
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
|
||||
{
|
||||
return split(lhs.c_str(), rhs);
|
||||
}
|
||||
|
||||
std::string kiwix::ucFirst (const std::string &word) {
|
||||
if (word.empty())
|
||||
std::string kiwix::ucFirst(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
UnicodeString unicodeWord(word.c_str());
|
||||
UnicodeString unicodeFirstLetter = UnicodeString(unicodeWord, 0, 1).toUpper();
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper();
|
||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::ucAll (const std::string &word) {
|
||||
if (word.empty())
|
||||
std::string kiwix::ucAll(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
UnicodeString unicodeWord(word.c_str());
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord.toUpper().toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::lcFirst (const std::string &word) {
|
||||
if (word.empty())
|
||||
std::string kiwix::lcFirst(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
UnicodeString unicodeWord(word.c_str());
|
||||
UnicodeString unicodeFirstLetter = UnicodeString(unicodeWord, 0, 1).toLower();
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower();
|
||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::lcAll (const std::string &word) {
|
||||
if (word.empty())
|
||||
std::string kiwix::lcAll(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
UnicodeString unicodeWord(word.c_str());
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord.toLower().toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::toTitle (const std::string &word) {
|
||||
if (word.empty())
|
||||
std::string kiwix::toTitle(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
UnicodeString unicodeWord(word.c_str());
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord = unicodeWord.toTitle(0);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::normalize (const std::string &word) {
|
||||
std::string kiwix::normalize(const std::string& word)
|
||||
{
|
||||
return kiwix::lcAll(word);
|
||||
}
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_STRINGTOOLS_H
|
||||
#define KIWIX_STRINGTOOLS_H
|
||||
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/unistr.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include <pathTools.h>
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
#ifndef __ANDROID__
|
||||
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
std::string beautifyFileSize(const unsigned int number);
|
||||
std::string urlEncode(const std::string &c);
|
||||
void printStringInHexadecimal(const char *s);
|
||||
void printStringInHexadecimal(UnicodeString s);
|
||||
void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr);
|
||||
std::string encodeDiples(const std::string& str);
|
||||
|
||||
#endif
|
||||
|
||||
std::string removeAccents(const std::string &text);
|
||||
void loadICUExternalTables();
|
||||
std::string urlDecode(const std::string &c);
|
||||
|
||||
std::vector<std::string> split(const std::string&, const std::string&);
|
||||
std::vector<std::string> split(const char*, const char*);
|
||||
std::vector<std::string> split(const std::string&, const char*);
|
||||
std::vector<std::string> split(const char*, const std::string&);
|
||||
|
||||
std::string ucAll(const std::string &word);
|
||||
std::string lcAll(const std::string &word);
|
||||
std::string ucFirst(const std::string &word);
|
||||
std::string lcFirst(const std::string &word);
|
||||
std::string toTitle(const std::string &word);
|
||||
|
||||
std::string normalize(const std::string &word);
|
||||
}
|
||||
|
||||
#endif
|
||||
2786
src/common/tree.h
6
src/config.h.in
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
#mesondefine VERSION
|
||||
|
||||
#mesondefine ENABLE_CTPP2
|
||||
|
||||
#mesondefine ENABLE_LIBARIA2
|
||||
@@ -17,7 +17,7 @@
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "CTPP2VMStringLoader.hpp"
|
||||
#include <ctpp2/CTPP2VMStringLoader.hpp>
|
||||
|
||||
namespace CTPP // C++ Template Engine
|
||||
{
|
||||
121
src/downloader.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "downloader.h"
|
||||
#include "common/pathTools.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
pthread_mutex_t Downloader::globalLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
|
||||
/* Constructor */
|
||||
Downloader::Downloader()
|
||||
{
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
aria2::SessionConfig config;
|
||||
config.downloadEventCallback = Downloader::downloadEventCallback;
|
||||
config.userData = this;
|
||||
tmpDir = makeTmpDirectory();
|
||||
aria2::KeyVals options;
|
||||
options.push_back(std::pair<std::string, std::string>("dir", tmpDir));
|
||||
session = aria2::sessionNew(options, config);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* Destructor */
|
||||
Downloader::~Downloader()
|
||||
{
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
aria2::sessionFinal(session);
|
||||
#endif
|
||||
rmdir(tmpDir.c_str());
|
||||
}
|
||||
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
int Downloader::downloadEventCallback(aria2::Session* session,
|
||||
aria2::DownloadEvent event,
|
||||
aria2::A2Gid gid,
|
||||
void* userData)
|
||||
{
|
||||
Downloader* downloader = static_cast<Downloader*>(userData);
|
||||
|
||||
auto fileHandle = downloader->fileHandle;
|
||||
auto dh = aria2::getDownloadHandle(session, gid);
|
||||
|
||||
if (!dh) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (event) {
|
||||
case aria2::EVENT_ON_DOWNLOAD_COMPLETE:
|
||||
{
|
||||
if (dh->getNumFiles() > 0) {
|
||||
auto f = dh->getFile(1);
|
||||
fileHandle->path = f.path;
|
||||
fileHandle->success = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case aria2::EVENT_ON_DOWNLOAD_ERROR:
|
||||
{
|
||||
fileHandle->success = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
aria2::deleteDownloadHandle(dh);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
DownloadedFile Downloader::download(const std::string& url) {
|
||||
pthread_mutex_lock(&globalLock);
|
||||
DownloadedFile fileHandle;
|
||||
#ifdef ENABLE_LIBARIA2
|
||||
try {
|
||||
std::vector<std::string> uris = {url};
|
||||
aria2::KeyVals options;
|
||||
aria2::A2Gid gid;
|
||||
int ret;
|
||||
DownloadedFile fileHandle;
|
||||
|
||||
ret = aria2::addUri(session, &gid, uris, options);
|
||||
if (ret < 0) {
|
||||
std::cerr << "Failed to download" << std::endl;
|
||||
} else {
|
||||
this->fileHandle = &fileHandle;
|
||||
aria2::run(session, aria2::RUN_DEFAULT);
|
||||
}
|
||||
} catch (...) {};
|
||||
this->fileHandle = nullptr;
|
||||
pthread_mutex_unlock(&globalLock);
|
||||
#endif
|
||||
return fileHandle;
|
||||
}
|
||||
|
||||
}
|
||||
138
src/entry.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
#include <time.h>
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
Entry::Entry(zim::Article article)
|
||||
: article(article)
|
||||
{
|
||||
}
|
||||
|
||||
#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
|
||||
|
||||
std::string Entry::getPath() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
std::string Entry::getTitle() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getTitle();
|
||||
}
|
||||
|
||||
std::string Entry::getContent() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getData();
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset);
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset, size_type size) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset, size);
|
||||
}
|
||||
|
||||
std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
|
||||
{
|
||||
RETURN_IF_INVALID(std::make_pair("", 0));
|
||||
return article.getDirectAccessInformation();
|
||||
}
|
||||
|
||||
size_type Entry::getSize() const
|
||||
{
|
||||
RETURN_IF_INVALID(0);
|
||||
return article.getArticleSize();
|
||||
}
|
||||
|
||||
std::string Entry::getMimetype() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
try {
|
||||
return article.getMimeType();
|
||||
} catch (exception& e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
|
||||
bool Entry::isRedirect() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isRedirect();
|
||||
}
|
||||
|
||||
bool Entry::isLinkTarget() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isLinktarget();
|
||||
}
|
||||
|
||||
bool Entry::isDeleted() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isDeleted();
|
||||
}
|
||||
|
||||
Entry Entry::getRedirectEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if ( !article.isRedirect() ) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto targeted_article = article.getRedirectArticle();
|
||||
if ( !targeted_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
return targeted_article;
|
||||
}
|
||||
|
||||
Entry Entry::getFinalEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if (final_article.good()) {
|
||||
return final_article;
|
||||
}
|
||||
|
||||
int loopCounter = 42;
|
||||
final_article = article;
|
||||
while (final_article.isRedirect() && loopCounter--) {
|
||||
final_article = final_article.getRedirectArticle();
|
||||
if ( !final_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
}
|
||||
|
||||
return final_article;
|
||||
}
|
||||
|
||||
}
|
||||
154
src/library.cpp
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "library.h"
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
/* Constructor */
|
||||
Book::Book() : readOnly(false)
|
||||
{
|
||||
}
|
||||
/* Destructor */
|
||||
Book::~Book()
|
||||
{
|
||||
}
|
||||
/* Sort functions */
|
||||
bool Book::sortByLastOpen(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return atoi(a.last.c_str()) > atoi(b.last.c_str());
|
||||
}
|
||||
|
||||
bool Book::sortByTitle(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return strcmp(a.title.c_str(), b.title.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByDate(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return strcmp(a.date.c_str(), b.date.c_str()) > 0;
|
||||
}
|
||||
|
||||
bool Book::sortBySize(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return atoi(a.size.c_str()) < atoi(b.size.c_str());
|
||||
}
|
||||
|
||||
bool Book::sortByPublisher(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByCreator(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return strcmp(a.creator.c_str(), b.creator.c_str()) < 0;
|
||||
}
|
||||
|
||||
bool Book::sortByLanguage(const kiwix::Book& a, const kiwix::Book& b)
|
||||
{
|
||||
return strcmp(a.language.c_str(), b.language.c_str()) < 0;
|
||||
}
|
||||
|
||||
std::string Book::getHumanReadableIdFromPath()
|
||||
{
|
||||
std::string id = pathAbsolute;
|
||||
if (!id.empty()) {
|
||||
kiwix::removeAccents(id);
|
||||
|
||||
#ifdef _WIN32
|
||||
id = replaceRegex(id, "", "^.*\\\\");
|
||||
#else
|
||||
id = replaceRegex(id, "", "^.*/");
|
||||
#endif
|
||||
|
||||
id = replaceRegex(id, "", "\\.zim[a-z]*$");
|
||||
id = replaceRegex(id, "_", " ");
|
||||
id = replaceRegex(id, "plus", "\\+");
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
Library::Library() : version(KIWIX_LIBRARY_VERSION)
|
||||
{
|
||||
}
|
||||
/* Destructor */
|
||||
Library::~Library()
|
||||
{
|
||||
}
|
||||
bool Library::addBook(const Book& book)
|
||||
{
|
||||
/* Try to find it */
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = this->books.begin(); itr != this->books.end(); ++itr) {
|
||||
if (itr->id == book.id) {
|
||||
if (!itr->readOnly) {
|
||||
itr->readOnly = book.readOnly;
|
||||
|
||||
if (itr->path.empty()) {
|
||||
itr->path = book.path;
|
||||
}
|
||||
|
||||
if (itr->pathAbsolute.empty()) {
|
||||
itr->pathAbsolute = book.pathAbsolute;
|
||||
}
|
||||
|
||||
if (itr->url.empty()) {
|
||||
itr->url = book.url;
|
||||
}
|
||||
|
||||
if (itr->tags.empty()) {
|
||||
itr->tags = book.tags;
|
||||
}
|
||||
|
||||
if (itr->name.empty()) {
|
||||
itr->name = book.name;
|
||||
}
|
||||
|
||||
if (itr->indexPath.empty()) {
|
||||
itr->indexPath = book.indexPath;
|
||||
itr->indexType = book.indexType;
|
||||
}
|
||||
|
||||
if (itr->indexPathAbsolute.empty()) {
|
||||
itr->indexPathAbsolute = book.indexPathAbsolute;
|
||||
itr->indexType = book.indexType;
|
||||
}
|
||||
|
||||
if (itr->faviconMimeType.empty()) {
|
||||
itr->favicon = book.favicon;
|
||||
itr->faviconMimeType = book.faviconMimeType;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* otherwise */
|
||||
this->books.push_back(book);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Library::removeBookByIndex(const unsigned int bookIndex)
|
||||
{
|
||||
books.erase(books.begin() + bookIndex);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
734
src/manager.cpp
Normal file
@@ -0,0 +1,734 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "manager.h"
|
||||
#include "downloader.h"
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
/* Constructor */
|
||||
Manager::Manager() : writableLibraryPath("")
|
||||
{
|
||||
}
|
||||
/* Destructor */
|
||||
Manager::~Manager()
|
||||
{
|
||||
}
|
||||
bool Manager::parseXmlDom(const pugi::xml_document& doc,
|
||||
const bool readOnly,
|
||||
const string libraryPath)
|
||||
{
|
||||
pugi::xml_node libraryNode = doc.child("library");
|
||||
|
||||
if (strlen(libraryNode.attribute("current").value()))
|
||||
this->setCurrentBookId(libraryNode.attribute("current").value());
|
||||
|
||||
string libraryVersion = libraryNode.attribute("version").value();
|
||||
|
||||
for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode;
|
||||
bookNode = bookNode.next_sibling("book")) {
|
||||
bool ok = true;
|
||||
kiwix::Book book;
|
||||
|
||||
book.readOnly = readOnly;
|
||||
book.id = bookNode.attribute("id").value();
|
||||
book.path = bookNode.attribute("path").value();
|
||||
book.last = (std::string(bookNode.attribute("last").value()) != "undefined"
|
||||
? bookNode.attribute("last").value()
|
||||
: "");
|
||||
book.indexPath = bookNode.attribute("indexPath").value();
|
||||
book.indexType = XAPIAN;
|
||||
book.title = bookNode.attribute("title").value();
|
||||
book.name = bookNode.attribute("name").value();
|
||||
book.tags = bookNode.attribute("tags").value();
|
||||
book.description = bookNode.attribute("description").value();
|
||||
book.language = bookNode.attribute("language").value();
|
||||
book.date = bookNode.attribute("date").value();
|
||||
book.creator = bookNode.attribute("creator").value();
|
||||
book.publisher = bookNode.attribute("publisher").value();
|
||||
book.url = bookNode.attribute("url").value();
|
||||
book.origId = bookNode.attribute("origId").value();
|
||||
book.articleCount = bookNode.attribute("articleCount").value();
|
||||
book.mediaCount = bookNode.attribute("mediaCount").value();
|
||||
book.size = bookNode.attribute("size").value();
|
||||
book.favicon = bookNode.attribute("favicon").value();
|
||||
book.faviconMimeType = bookNode.attribute("faviconMimeType").value();
|
||||
|
||||
/* Check absolute and relative paths */
|
||||
this->checkAndCleanBookPaths(book, libraryPath);
|
||||
|
||||
/* Update the book properties with the new importer */
|
||||
if (libraryVersion.empty()
|
||||
|| atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) {
|
||||
if (!book.path.empty()) {
|
||||
ok = this->readBookFromPath(book.pathAbsolute);
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
library.addBook(book);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::readXml(const string& xml,
|
||||
const bool readOnly,
|
||||
const string libraryPath)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result
|
||||
= doc.load_buffer_inplace((void*)xml.data(), xml.size());
|
||||
|
||||
if (result) {
|
||||
this->parseXmlDom(doc, readOnly, libraryPath);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost)
|
||||
{
|
||||
pugi::xml_node libraryNode = doc.child("feed");
|
||||
|
||||
for (pugi::xml_node entryNode = libraryNode.child("entry"); entryNode;
|
||||
entryNode = entryNode.next_sibling("entry")) {
|
||||
kiwix::Book book;
|
||||
|
||||
book.readOnly = false;
|
||||
book.id = entryNode.child("id").child_value();
|
||||
book.title = entryNode.child("title").child_value();
|
||||
book.description = entryNode.child("summary").child_value();
|
||||
book.language = entryNode.child("language").child_value();
|
||||
book.date = entryNode.child("updated").child_value();
|
||||
book.creator = entryNode.child("author").child("name").child_value();
|
||||
for(pugi::xml_node linkNode = entryNode.child("link"); linkNode;
|
||||
linkNode = linkNode.next_sibling("link")) {
|
||||
std::string rel = linkNode.attribute("rel").value();
|
||||
|
||||
if (rel == "http://opds-spec.org/image/thumbnail") {
|
||||
auto faviconUrl = urlHost + linkNode.attribute("href").value();
|
||||
auto downloader = Downloader();
|
||||
auto fileHandle = downloader.download(faviconUrl);
|
||||
if (fileHandle.success) {
|
||||
auto content = getFileContent(fileHandle.path);
|
||||
book.favicon = base64_encode((const unsigned char*)content.data(), content.size());
|
||||
book.faviconMimeType = linkNode.attribute("type").value();
|
||||
} else {
|
||||
std::cerr << "Cannot get favicon content from " << faviconUrl << std::endl;
|
||||
}
|
||||
|
||||
} else if (rel == "http://opds-spec.org/acquisition/open-access") {
|
||||
book.url = linkNode.attribute("href").value();
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the book properties with the new importer */
|
||||
library.addBook(book);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool Manager::readOpds(const string& content, const std::string& urlHost)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result
|
||||
= doc.load_buffer_inplace((void*)content.data(), content.size());
|
||||
|
||||
if (result) {
|
||||
this->parseOpdsDom(doc, urlHost);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::readFile(const string path, const bool readOnly)
|
||||
{
|
||||
return this->readFile(path, path, readOnly);
|
||||
}
|
||||
|
||||
bool Manager::readFile(const string nativePath,
|
||||
const string UTF8Path,
|
||||
const bool readOnly)
|
||||
{
|
||||
bool retVal = true;
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load_file(nativePath.c_str());
|
||||
|
||||
if (result) {
|
||||
this->parseXmlDom(doc, readOnly, UTF8Path);
|
||||
} else {
|
||||
retVal = false;
|
||||
}
|
||||
|
||||
/* This has to be set (although if the file does not exists) to be
|
||||
* able to know where to save the library if new content are
|
||||
* available */
|
||||
if (!readOnly) {
|
||||
this->writableLibraryPath = UTF8Path;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Manager::writeFile(const string path)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
|
||||
/* Add the library node */
|
||||
pugi::xml_node libraryNode = doc.append_child("library");
|
||||
|
||||
if (!getCurrentBookId().empty()) {
|
||||
libraryNode.append_attribute("current") = getCurrentBookId().c_str();
|
||||
}
|
||||
|
||||
if (!library.version.empty())
|
||||
libraryNode.append_attribute("version") = library.version.c_str();
|
||||
|
||||
/* Add each book */
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (!itr->readOnly) {
|
||||
this->checkAndCleanBookPaths(*itr, path);
|
||||
|
||||
pugi::xml_node bookNode = libraryNode.append_child("book");
|
||||
bookNode.append_attribute("id") = itr->id.c_str();
|
||||
|
||||
if (!itr->path.empty()) {
|
||||
bookNode.append_attribute("path") = itr->path.c_str();
|
||||
}
|
||||
|
||||
if (!itr->last.empty() && itr->last != "undefined") {
|
||||
bookNode.append_attribute("last") = itr->last.c_str();
|
||||
}
|
||||
|
||||
if (!itr->indexPath.empty())
|
||||
bookNode.append_attribute("indexPath") = itr->indexPath.c_str();
|
||||
|
||||
if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) {
|
||||
if (itr->indexType == XAPIAN) {
|
||||
bookNode.append_attribute("indexType") = "xapian";
|
||||
}
|
||||
}
|
||||
|
||||
if (itr->origId.empty()) {
|
||||
if (!itr->title.empty())
|
||||
bookNode.append_attribute("title") = itr->title.c_str();
|
||||
|
||||
if (!itr->name.empty())
|
||||
bookNode.append_attribute("name") = itr->name.c_str();
|
||||
|
||||
if (!itr->tags.empty())
|
||||
bookNode.append_attribute("tags") = itr->tags.c_str();
|
||||
|
||||
if (!itr->description.empty())
|
||||
bookNode.append_attribute("description") = itr->description.c_str();
|
||||
|
||||
if (!itr->language.empty())
|
||||
bookNode.append_attribute("language") = itr->language.c_str();
|
||||
|
||||
if (!itr->creator.empty())
|
||||
bookNode.append_attribute("creator") = itr->creator.c_str();
|
||||
|
||||
if (!itr->publisher.empty())
|
||||
bookNode.append_attribute("publisher") = itr->publisher.c_str();
|
||||
|
||||
if (!itr->favicon.empty())
|
||||
bookNode.append_attribute("favicon") = itr->favicon.c_str();
|
||||
|
||||
if (!itr->faviconMimeType.empty())
|
||||
bookNode.append_attribute("faviconMimeType")
|
||||
= itr->faviconMimeType.c_str();
|
||||
}
|
||||
|
||||
if (!itr->date.empty()) {
|
||||
bookNode.append_attribute("date") = itr->date.c_str();
|
||||
}
|
||||
|
||||
if (!itr->url.empty()) {
|
||||
bookNode.append_attribute("url") = itr->url.c_str();
|
||||
}
|
||||
|
||||
if (!itr->origId.empty())
|
||||
bookNode.append_attribute("origId") = itr->origId.c_str();
|
||||
|
||||
if (!itr->articleCount.empty())
|
||||
bookNode.append_attribute("articleCount") = itr->articleCount.c_str();
|
||||
|
||||
if (!itr->mediaCount.empty())
|
||||
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
|
||||
|
||||
if (!itr->size.empty()) {
|
||||
bookNode.append_attribute("size") = itr->size.c_str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* saving file */
|
||||
doc.save_file(path.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Manager::setCurrentBookId(const string id)
|
||||
{
|
||||
if (library.current.empty() || library.current.top() != id) {
|
||||
if (id.empty() && !library.current.empty()) {
|
||||
library.current.pop();
|
||||
} else {
|
||||
library.current.push(id);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string Manager::getCurrentBookId() const
|
||||
{
|
||||
return library.current.empty() ? "" : library.current.top();
|
||||
}
|
||||
|
||||
/* Add a book to the library. Return empty string if failed, book id otherwise
|
||||
*/
|
||||
string Manager::addBookFromPathAndGetId(const string pathToOpen,
|
||||
const string pathToSave,
|
||||
const string url,
|
||||
const bool checkMetaData)
|
||||
{
|
||||
kiwix::Book book;
|
||||
|
||||
if (this->readBookFromPath(pathToOpen, &book)) {
|
||||
if (pathToSave != pathToOpen) {
|
||||
book.path = pathToSave;
|
||||
book.pathAbsolute
|
||||
= isRelativePath(pathToSave)
|
||||
? computeAbsolutePath(
|
||||
removeLastPathElement(writableLibraryPath, true, false),
|
||||
pathToSave)
|
||||
: pathToSave;
|
||||
}
|
||||
|
||||
if (!checkMetaData
|
||||
|| (checkMetaData && !book.title.empty() && !book.language.empty()
|
||||
&& !book.date.empty())) {
|
||||
book.url = url;
|
||||
library.addBook(book);
|
||||
return book.id;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Wrapper over Manager::addBookFromPath which return a bool instead of a string
|
||||
*/
|
||||
bool Manager::addBookFromPath(const string pathToOpen,
|
||||
const string pathToSave,
|
||||
const string url,
|
||||
const bool checkMetaData)
|
||||
{
|
||||
return !(
|
||||
this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData)
|
||||
.empty());
|
||||
}
|
||||
|
||||
bool Manager::readBookFromPath(const string path, kiwix::Book* book)
|
||||
{
|
||||
try {
|
||||
kiwix::Reader* reader = new kiwix::Reader(path);
|
||||
|
||||
if (book != NULL) {
|
||||
book->path = path;
|
||||
book->pathAbsolute = path;
|
||||
book->id = reader->getId();
|
||||
book->description = reader->getDescription();
|
||||
book->language = reader->getLanguage();
|
||||
book->date = reader->getDate();
|
||||
book->creator = reader->getCreator();
|
||||
book->publisher = reader->getPublisher();
|
||||
book->title = reader->getTitle();
|
||||
book->name = reader->getName();
|
||||
book->tags = reader->getTags();
|
||||
book->origId = reader->getOrigId();
|
||||
std::ostringstream articleCountStream;
|
||||
articleCountStream << reader->getArticleCount();
|
||||
book->articleCount = articleCountStream.str();
|
||||
|
||||
std::ostringstream mediaCountStream;
|
||||
mediaCountStream << reader->getMediaCount();
|
||||
book->mediaCount = mediaCountStream.str();
|
||||
|
||||
ostringstream convert;
|
||||
convert << reader->getFileSize();
|
||||
book->size = convert.str();
|
||||
|
||||
string favicon;
|
||||
string faviconMimeType;
|
||||
if (reader->getFavicon(favicon, faviconMimeType)) {
|
||||
book->favicon = base64_encode(
|
||||
reinterpret_cast<const unsigned char*>(favicon.c_str()),
|
||||
favicon.length());
|
||||
book->faviconMimeType = faviconMimeType;
|
||||
}
|
||||
}
|
||||
|
||||
delete reader;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::removeBookByIndex(const unsigned int bookIndex)
|
||||
{
|
||||
return this->library.removeBookByIndex(bookIndex);
|
||||
}
|
||||
|
||||
bool Manager::removeBookById(const string id)
|
||||
{
|
||||
unsigned int bookIndex = 0;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (itr->id == id) {
|
||||
return this->library.removeBookByIndex(bookIndex);
|
||||
}
|
||||
bookIndex++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksLanguages()
|
||||
{
|
||||
std::vector<string> booksLanguages;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksLanguagesMap;
|
||||
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksLanguagesMap[itr->language] = true;
|
||||
booksLanguages.push_back(itr->language);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksLanguages;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksCreators()
|
||||
{
|
||||
std::vector<string> booksCreators;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksCreatorsMap;
|
||||
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksCreatorsMap[itr->creator] = true;
|
||||
booksCreators.push_back(itr->creator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksCreators;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksIds()
|
||||
{
|
||||
std::vector<string> booksIds;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
booksIds.push_back(itr->id);
|
||||
}
|
||||
|
||||
return booksIds;
|
||||
}
|
||||
|
||||
vector<string> Manager::getBooksPublishers()
|
||||
{
|
||||
std::vector<string> booksPublishers;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
std::map<string, bool> booksPublishersMap;
|
||||
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
|
||||
if (itr->origId.empty()) {
|
||||
booksPublishersMap[itr->publisher] = true;
|
||||
booksPublishers.push_back(itr->publisher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return booksPublishers;
|
||||
}
|
||||
|
||||
kiwix::Library Manager::cloneLibrary()
|
||||
{
|
||||
return this->library;
|
||||
}
|
||||
bool Manager::getCurrentBook(Book& book)
|
||||
{
|
||||
string currentBookId = getCurrentBookId();
|
||||
if (currentBookId.empty()) {
|
||||
return false;
|
||||
} else {
|
||||
getBookById(currentBookId, book);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Manager::getBookById(const string id, Book& book)
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (itr->id == id) {
|
||||
book = *itr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::updateBookLastOpenDateById(const string id)
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (itr->id == id) {
|
||||
char unixdate[12];
|
||||
sprintf(unixdate, "%d", (int)time(NULL));
|
||||
itr->last = unixdate;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::setBookIndex(const string id,
|
||||
const string path,
|
||||
const supportedIndexType type)
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (itr->id == id) {
|
||||
itr->indexPath = path;
|
||||
itr->indexPathAbsolute
|
||||
= isRelativePath(path)
|
||||
? computeAbsolutePath(
|
||||
removeLastPathElement(writableLibraryPath, true, false),
|
||||
path)
|
||||
: path;
|
||||
itr->indexType = type;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Manager::setBookPath(const string id, const string path)
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (itr->id == id) {
|
||||
itr->path = path;
|
||||
itr->pathAbsolute
|
||||
= isRelativePath(path)
|
||||
? computeAbsolutePath(
|
||||
removeLastPathElement(writableLibraryPath, true, false),
|
||||
path)
|
||||
: path;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Manager::removeBookPaths()
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
itr->path = "";
|
||||
itr->pathAbsolute = "";
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int Manager::getBookCount(const bool localBooks,
|
||||
const bool remoteBooks)
|
||||
{
|
||||
unsigned int result = 0;
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if ((!itr->path.empty() && localBooks)
|
||||
|| (itr->path.empty() && remoteBooks)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Manager::listBooks(const supportedListMode mode,
|
||||
const supportedListSortBy sortBy,
|
||||
const unsigned int maxSize,
|
||||
const string language,
|
||||
const string creator,
|
||||
const string publisher,
|
||||
const string search)
|
||||
{
|
||||
this->bookIdList.clear();
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
|
||||
/* Sort */
|
||||
if (sortBy == TITLE) {
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByTitle);
|
||||
} else if (sortBy == SIZE) {
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortBySize);
|
||||
} else if (sortBy == DATE) {
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByDate);
|
||||
} else if (sortBy == CREATOR) {
|
||||
std::sort(
|
||||
library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
||||
} else if (sortBy == PUBLISHER) {
|
||||
std::sort(library.books.begin(),
|
||||
library.books.end(),
|
||||
kiwix::Book::sortByPublisher);
|
||||
}
|
||||
|
||||
/* Special sort for LASTOPEN */
|
||||
if (mode == LASTOPEN) {
|
||||
std::sort(library.books.begin(),
|
||||
library.books.end(),
|
||||
kiwix::Book::sortByLastOpen);
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
if (!itr->last.empty()) {
|
||||
this->bookIdList.push_back(itr->id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Generate the list of book id */
|
||||
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||
bool ok = true;
|
||||
|
||||
if (mode == LOCAL && itr->path.empty()) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true && mode == REMOTE
|
||||
&& (!itr->path.empty() || itr->url.empty())) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true && maxSize != 0
|
||||
&& (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true && !language.empty()
|
||||
&& !matchRegex(itr->language, language)) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true && !creator.empty() && itr->creator != creator) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true && !publisher.empty() && itr->publisher != publisher) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if ((ok == true && !search.empty())
|
||||
&& !(matchRegex(itr->title, "\\Q" + search + "\\E")
|
||||
|| matchRegex(itr->description, "\\Q" + search + "\\E")
|
||||
|| matchRegex(itr->language, "\\Q" + search + "\\E"))) {
|
||||
ok = false;
|
||||
}
|
||||
|
||||
if (ok == true) {
|
||||
this->bookIdList.push_back(itr->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Library Manager::filter(const std::string& search) {
|
||||
Library library;
|
||||
|
||||
if (search.empty()) {
|
||||
return library;
|
||||
}
|
||||
|
||||
for(auto book:this->library.books) {
|
||||
if (matchRegex(book.title, "\\Q" + search + "\\E")
|
||||
|| matchRegex(book.description, "\\Q" + search + "\\E")) {
|
||||
library.addBook(book);
|
||||
}
|
||||
}
|
||||
|
||||
return library;
|
||||
}
|
||||
|
||||
void Manager::checkAndCleanBookPaths(Book& book, const string& libraryPath)
|
||||
{
|
||||
if (!book.path.empty()) {
|
||||
if (isRelativePath(book.path)) {
|
||||
book.pathAbsolute = computeAbsolutePath(
|
||||
removeLastPathElement(libraryPath, true, false), book.path);
|
||||
} else {
|
||||
book.pathAbsolute = book.path;
|
||||
book.path = computeRelativePath(
|
||||
removeLastPathElement(libraryPath, true, false), book.pathAbsolute);
|
||||
}
|
||||
}
|
||||
|
||||
if (!book.indexPath.empty()) {
|
||||
if (isRelativePath(book.indexPath)) {
|
||||
book.indexPathAbsolute = computeAbsolutePath(
|
||||
removeLastPathElement(libraryPath, true, false), book.indexPath);
|
||||
} else {
|
||||
book.indexPathAbsolute = book.indexPath;
|
||||
book.indexPath
|
||||
= computeRelativePath(removeLastPathElement(libraryPath, true, false),
|
||||
book.indexPathAbsolute);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
48
src/meson.build
Normal file
@@ -0,0 +1,48 @@
|
||||
kiwix_sources = [
|
||||
'library.cpp',
|
||||
'manager.cpp',
|
||||
'opds_dumper.cpp',
|
||||
'downloader.cpp',
|
||||
'reader.cpp',
|
||||
'entry.cpp',
|
||||
'searcher.cpp',
|
||||
'common/base64.cpp',
|
||||
'common/pathTools.cpp',
|
||||
'common/regexTools.cpp',
|
||||
'common/stringTools.cpp',
|
||||
'common/networkTools.cpp',
|
||||
'common/otherTools.cpp',
|
||||
'xapian/htmlparse.cc',
|
||||
'xapian/myhtmlparse.cc'
|
||||
]
|
||||
kiwix_sources += lib_resources
|
||||
|
||||
if xapian_dep.found()
|
||||
kiwix_sources += ['xapianSearcher.cpp']
|
||||
endif
|
||||
|
||||
if get_option('android')
|
||||
subdir('android')
|
||||
install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi')
|
||||
else
|
||||
install_dir = get_option('libdir')
|
||||
endif
|
||||
|
||||
|
||||
if has_ctpp2_dep
|
||||
kiwix_sources += ['ctpp2/CTPP2VMStringLoader.cpp']
|
||||
endif
|
||||
|
||||
config_h = configure_file(output : 'kiwix_config.h',
|
||||
configuration : conf,
|
||||
input : 'config.h.in')
|
||||
install_headers(config_h, subdir:'kiwix')
|
||||
|
||||
kiwixlib = library('kiwix',
|
||||
kiwix_sources,
|
||||
include_directories : inc,
|
||||
dependencies : all_deps,
|
||||
version: meson.project_version(),
|
||||
install: true,
|
||||
install_dir: install_dir,
|
||||
install_rpath: '$ORIGIN')
|
||||
135
src/opds_dumper.cpp
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "opds_dumper.h"
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
/* Constructor */
|
||||
OPDSDumper::OPDSDumper(Library library)
|
||||
: library(library)
|
||||
{
|
||||
}
|
||||
/* Destructor */
|
||||
OPDSDumper::~OPDSDumper()
|
||||
{
|
||||
}
|
||||
|
||||
struct xml_string_writer: pugi::xml_writer
|
||||
{
|
||||
std::string result;
|
||||
|
||||
virtual void write(const void* data, size_t size)
|
||||
{
|
||||
result.append(static_cast<const char*>(data), size);
|
||||
}
|
||||
};
|
||||
|
||||
std::string node_to_string(pugi::xml_node node)
|
||||
{
|
||||
xml_string_writer writer;
|
||||
node.print(writer, " ");
|
||||
|
||||
return writer.result;
|
||||
}
|
||||
|
||||
std::string gen_date_str()
|
||||
{
|
||||
auto now = time(0);
|
||||
auto tm = localtime(&now);
|
||||
|
||||
std::stringstream is;
|
||||
is << std::setw(2) << std::setfill('0')
|
||||
<< 1900+tm->tm_year << "-"
|
||||
<< std::setw(2) << std::setfill('0') << tm->tm_mon << "-"
|
||||
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
|
||||
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
|
||||
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
|
||||
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
|
||||
return is.str();
|
||||
}
|
||||
|
||||
#define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str())
|
||||
|
||||
pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
|
||||
auto entry_node = root_node.append_child("entry");
|
||||
ADD_TEXT_ENTRY(entry_node, "title", book.title);
|
||||
ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.id);
|
||||
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());
|
||||
ADD_TEXT_ENTRY(entry_node, "updated", date);
|
||||
ADD_TEXT_ENTRY(entry_node, "summary", book.description);
|
||||
|
||||
auto content_node = entry_node.append_child("link");
|
||||
content_node.append_attribute("type") = "text/html";
|
||||
content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str();
|
||||
|
||||
auto author_node = entry_node.append_child("author");
|
||||
ADD_TEXT_ENTRY(author_node, "name", book.creator);
|
||||
|
||||
if (! book.url.empty()) {
|
||||
auto acquisition_link = entry_node.append_child("link");
|
||||
acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access";
|
||||
acquisition_link.append_attribute("type") = "application/x-zim";
|
||||
acquisition_link.append_attribute("href") = book.url.c_str();
|
||||
}
|
||||
|
||||
if (! book.faviconMimeType.empty() ) {
|
||||
auto image_link = entry_node.append_child("link");
|
||||
image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail";
|
||||
image_link.append_attribute("type") = book.faviconMimeType.c_str();
|
||||
image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str();
|
||||
}
|
||||
return entry_node;
|
||||
}
|
||||
|
||||
string OPDSDumper::dumpOPDSFeed()
|
||||
{
|
||||
date = gen_date_str();
|
||||
pugi::xml_document doc;
|
||||
|
||||
auto root_node = doc.append_child("feed");
|
||||
root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom";
|
||||
root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog";
|
||||
|
||||
ADD_TEXT_ENTRY(root_node, "id", id);
|
||||
|
||||
ADD_TEXT_ENTRY(root_node, "title", title);
|
||||
ADD_TEXT_ENTRY(root_node, "updated", date);
|
||||
|
||||
auto self_link_node = root_node.append_child("link");
|
||||
self_link_node.append_attribute("rel") = "self";
|
||||
self_link_node.append_attribute("href") = "";
|
||||
self_link_node.append_attribute("type") = "application/atom+xml";
|
||||
|
||||
|
||||
if (!searchDescriptionUrl.empty() ) {
|
||||
auto search_link = root_node.append_child("link");
|
||||
search_link.append_attribute("rel") = "search";
|
||||
search_link.append_attribute("type") = "application/opensearchdescription+xml";
|
||||
search_link.append_attribute("href") = searchDescriptionUrl.c_str();
|
||||
}
|
||||
|
||||
for (auto book: library.books) {
|
||||
handleBook(book, root_node);
|
||||
}
|
||||
|
||||
return node_to_string(root_node);
|
||||
}
|
||||
|
||||
}
|
||||
862
src/reader.cpp
Normal file
@@ -0,0 +1,862 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
#include <time.h>
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
inline char hi(char v)
|
||||
{
|
||||
char hex[] = "0123456789abcdef";
|
||||
return hex[(v >> 4) & 0xf];
|
||||
}
|
||||
|
||||
inline char lo(char v)
|
||||
{
|
||||
char hex[] = "0123456789abcdef";
|
||||
return hex[v & 0xf];
|
||||
}
|
||||
|
||||
std::string hexUUID(std::string in)
|
||||
{
|
||||
std::ostringstream out;
|
||||
for (unsigned n = 0; n < 4; ++n) {
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
}
|
||||
out << '-';
|
||||
for (unsigned n = 4; n < 6; ++n) {
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
}
|
||||
out << '-';
|
||||
for (unsigned n = 6; n < 8; ++n) {
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
}
|
||||
out << '-';
|
||||
for (unsigned n = 8; n < 10; ++n) {
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
}
|
||||
out << '-';
|
||||
for (unsigned n = 10; n < 16; ++n) {
|
||||
out << hi(in[n]) << lo(in[n]);
|
||||
}
|
||||
std::string op = out.str();
|
||||
return op;
|
||||
}
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
/* Constructor */
|
||||
Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
|
||||
{
|
||||
string tmpZimFilePath = zimFilePath;
|
||||
|
||||
/* Remove potential trailing zimaa */
|
||||
size_t found = tmpZimFilePath.rfind("zimaa");
|
||||
if (found != string::npos && tmpZimFilePath.size() > 5
|
||||
&& found == tmpZimFilePath.size() - 5) {
|
||||
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
||||
}
|
||||
|
||||
this->zimFileHandler = new zim::File(tmpZimFilePath);
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
this->firstArticleOffset
|
||||
= this->zimFileHandler->getNamespaceBeginOffset('A');
|
||||
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
||||
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
|
||||
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
|
||||
this->zimFilePath = zimFilePath;
|
||||
}
|
||||
|
||||
/* initialize random seed: */
|
||||
srand(time(NULL));
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Reader::~Reader()
|
||||
{
|
||||
if (this->zimFileHandler != NULL) {
|
||||
delete this->zimFileHandler;
|
||||
}
|
||||
}
|
||||
|
||||
zim::File* Reader::getZimFileHandler() const
|
||||
{
|
||||
return this->zimFileHandler;
|
||||
}
|
||||
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
|
||||
{
|
||||
std::map<const std::string, unsigned int> counters;
|
||||
string mimeType, item, counterString;
|
||||
unsigned int counter;
|
||||
|
||||
zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
|
||||
|
||||
if (article.good()) {
|
||||
stringstream ssContent(article.getData());
|
||||
|
||||
while (getline(ssContent, item, ';')) {
|
||||
stringstream ssItem(item);
|
||||
getline(ssItem, mimeType, '=');
|
||||
getline(ssItem, counterString, '=');
|
||||
if (!counterString.empty() && !mimeType.empty()) {
|
||||
sscanf(counterString.c_str(), "%u", &counter);
|
||||
counters.insert(pair<string, int>(mimeType, counter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return counters;
|
||||
}
|
||||
|
||||
/* Get the count of articles which can be indexed/displayed */
|
||||
unsigned int Reader::getArticleCount() const
|
||||
{
|
||||
std::map<const std::string, unsigned int> counterMap
|
||||
= this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsACount;
|
||||
} else {
|
||||
auto it = counterMap.find("text/html");
|
||||
if (it != counterMap.end()) {
|
||||
counter = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
/* Get the count of medias content in the ZIM file */
|
||||
unsigned int Reader::getMediaCount() const
|
||||
{
|
||||
std::map<const std::string, unsigned int> counterMap
|
||||
= this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsICount;
|
||||
} else {
|
||||
auto it = counterMap.find("image/jpeg");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
|
||||
it = counterMap.find("image/gif");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
|
||||
it = counterMap.find("image/png");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
||||
/* Get the total of all items of a ZIM file, redirects included */
|
||||
unsigned int Reader::getGlobalCount() const
|
||||
{
|
||||
return this->zimFileHandler->getCountArticles();
|
||||
}
|
||||
|
||||
/* Return the UID of the ZIM file */
|
||||
string Reader::getId() const
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << this->zimFileHandler->getFileheader().getUuid();
|
||||
return s.str();
|
||||
}
|
||||
|
||||
/* Return a page url from a title */
|
||||
bool Reader::getPageUrlFromTitle(const string& title, string& url) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromTitle(title);
|
||||
entry = entry.getFinalEntry();
|
||||
url = entry.getPath();
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return an URL from a title */
|
||||
string Reader::getRandomPageUrl() const
|
||||
{
|
||||
return getRandomPage().getPath();
|
||||
}
|
||||
|
||||
Entry Reader::getRandomPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
zim::Article article;
|
||||
std::string mainPagePath = this->getMainPage().getPath();
|
||||
int watchdog = 42;
|
||||
|
||||
do {
|
||||
auto idx = this->firstArticleOffset
|
||||
+ (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
|
||||
* this->nsACount);
|
||||
article = zimFileHandler->getArticle(idx);
|
||||
if (!watchdog--) {
|
||||
throw NoEntry();
|
||||
}
|
||||
} while (!article.good() && article.getLongUrl() == mainPagePath);
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() const
|
||||
{
|
||||
return getMainPage().getPath();
|
||||
}
|
||||
|
||||
Entry Reader::getMainPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
string url = "";
|
||||
|
||||
zim::Article article;
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage())
|
||||
{
|
||||
article = zimFileHandler->getArticle(
|
||||
this->zimFileHandler->getFileheader().getMainPage());
|
||||
}
|
||||
|
||||
if (!article.good())
|
||||
{
|
||||
return getFirstPage();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string& content, string& mimeType) const
|
||||
{
|
||||
static const char* const paths[] = {"-/favicon.png", "I/favicon.png", "I/favicon", "-/favicon"};
|
||||
|
||||
for (auto &path: paths) {
|
||||
try {
|
||||
auto entry = getEntryFromPath(path);
|
||||
entry = entry.getFinalEntry();
|
||||
content = entry.getContent();
|
||||
mimeType = entry.getMimetype();
|
||||
return true;
|
||||
} catch(NoEntry& e) {};
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
string Reader::getZimFilePath() const
|
||||
{
|
||||
return this->zimFilePath;
|
||||
}
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string& name, string& value) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromPath("M/"+name);
|
||||
value = entry.getContent();
|
||||
return true;
|
||||
} catch(NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
string Reader::getTitle() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Title", value);
|
||||
if (value.empty()) {
|
||||
value = getLastPathElement(zimFileHandler->getFilename());
|
||||
std::replace(value.begin(), value.end(), '_', ' ');
|
||||
size_t pos = value.find(".zim");
|
||||
value = value.substr(0, pos);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getName() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Name", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getTags() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Tags", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDescription() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Description", value);
|
||||
|
||||
/* Mediawiki Collection tends to use the "Subtitle" name */
|
||||
if (value.empty()) {
|
||||
this->getMetatag("Subtitle", value);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getLanguage() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Language", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDate() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Date", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getCreator() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Creator", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getPublisher() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Publisher", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getOrigId() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("startfileuid", value);
|
||||
if (value.empty()) {
|
||||
return "";
|
||||
}
|
||||
std::string id = value;
|
||||
std::string origID;
|
||||
std::string temp = "";
|
||||
unsigned int k = 0;
|
||||
char tempArray[16] = "";
|
||||
for (unsigned int i = 0; i < id.size(); i++) {
|
||||
if (id[i] == '\n') {
|
||||
tempArray[k] = atoi(temp.c_str());
|
||||
temp = "";
|
||||
k++;
|
||||
} else {
|
||||
temp += id[i];
|
||||
}
|
||||
}
|
||||
origID = hexUUID(tempArray);
|
||||
return origID;
|
||||
}
|
||||
|
||||
/* Return the first page URL */
|
||||
string Reader::getFirstPageUrl() const
|
||||
{
|
||||
return getFirstPage().getPath();
|
||||
}
|
||||
|
||||
Entry Reader::getFirstPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
auto article = zimFileHandler->getArticle(firstPageOffset);
|
||||
|
||||
if (! article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
bool _parseUrl(const string& url, char* ns, string& title)
|
||||
{
|
||||
/* Offset to visit the url */
|
||||
unsigned int urlLength = url.size();
|
||||
unsigned int offset = 0;
|
||||
|
||||
/* Ignore the first '/' */
|
||||
if (url[offset] == '/')
|
||||
offset++;
|
||||
|
||||
if (url[offset] == '/' || offset >= urlLength)
|
||||
return false;
|
||||
|
||||
/* Get namespace */
|
||||
*ns = url[offset++];
|
||||
|
||||
if (url[offset] != '/' || offset >= urlLength)
|
||||
return false;
|
||||
|
||||
offset++;
|
||||
|
||||
if ( offset >= urlLength)
|
||||
return false;
|
||||
|
||||
/* Get content title */
|
||||
title = url.substr(offset, urlLength - offset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string& url, char* ns, string& title) const
|
||||
{
|
||||
return _parseUrl(url, ns, title);
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromPath(const std::string& path) const
|
||||
{
|
||||
char ns = 0;
|
||||
std::string short_url;
|
||||
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
_parseUrl(path, &ns, short_url);
|
||||
|
||||
if (short_url.empty() && ns == 0) {
|
||||
return getMainPage();
|
||||
}
|
||||
|
||||
auto article = zimFileHandler->getArticle(ns, short_url);
|
||||
if (!article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
|
||||
{
|
||||
return getEntryFromPath(urlDecode(path, true));
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromTitle(const std::string& title) const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
if (!article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return article by url */
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string& url,
|
||||
zim::Article& article) const
|
||||
{
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string urlStr;
|
||||
_parseUrl(url, &ns, urlStr);
|
||||
|
||||
/* Main page */
|
||||
if (urlStr.empty() && ns == 0) {
|
||||
_parseUrl(this->getMainPage().getPath(), &ns, urlStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
article = zimFileHandler->getArticle(ns, urlStr);
|
||||
return article.good();
|
||||
}
|
||||
|
||||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromPath(url);
|
||||
mimeType = entry.getMimetype();
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
mimeType = "";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool get_content_by_decoded_url(const Reader& reader,
|
||||
const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl)
|
||||
{
|
||||
content = "";
|
||||
contentType = "";
|
||||
contentLength = 0;
|
||||
|
||||
try {
|
||||
auto entry = reader.getEntryFromPath(url);
|
||||
entry = entry.getFinalEntry();
|
||||
baseUrl = entry.getPath();
|
||||
contentType = entry.getMimetype();
|
||||
content = entry.getContent();
|
||||
contentLength = entry.getSize();
|
||||
title = entry.getTitle();
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos
|
||||
&& content.find("<body") == std::string::npos
|
||||
&& content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + title +
|
||||
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
|
||||
"charset=utf-8\" /></head><body>" +
|
||||
content + "</body></html>";
|
||||
}
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get a content from a zim file */
|
||||
bool Reader::getContentByUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const
|
||||
{
|
||||
std::string stubRedirectUrl;
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const
|
||||
{
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
baseUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const
|
||||
{
|
||||
std::string stubRedirectUrl;
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const
|
||||
{
|
||||
std::string stubRedirectUrl;
|
||||
return get_content_by_decoded_url(*this,
|
||||
url,
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const
|
||||
{
|
||||
return get_content_by_decoded_url(*this,
|
||||
url,
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
baseUrl);
|
||||
}
|
||||
|
||||
/* Check if an article exists */
|
||||
bool Reader::urlExists(const string& url) const
|
||||
{
|
||||
return pathExists(url);
|
||||
}
|
||||
|
||||
bool Reader::pathExists(const string& path) const
|
||||
{
|
||||
if (!zimFileHandler)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
_parseUrl(path, &ns, titleStr);
|
||||
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
|
||||
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
|
||||
}
|
||||
|
||||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() const
|
||||
{
|
||||
if (!zimFileHandler || zimFileHandler->is_multiPart() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return ( pathExists("Z//fulltextIndex/xapian")
|
||||
|| pathExists("X/fulltext/xapian"));
|
||||
}
|
||||
|
||||
/* Search titles by prefix */
|
||||
bool Reader::searchSuggestions(const string& prefix,
|
||||
unsigned int suggestionsCount,
|
||||
const bool reset)
|
||||
{
|
||||
bool retVal = false;
|
||||
zim::File::const_iterator articleItr;
|
||||
|
||||
/* Reset the suggestions otherwise check if the suggestions number is less
|
||||
* than the suggestionsCount */
|
||||
if (reset) {
|
||||
this->suggestions.clear();
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
} else {
|
||||
if (this->suggestions.size() > suggestionsCount) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return if no prefix */
|
||||
if (prefix.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (articleItr = zimFileHandler->findByTitle('A', prefix);
|
||||
articleItr != zimFileHandler->end()
|
||||
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
|
||||
&& this->suggestions.size() < suggestionsCount;
|
||||
++articleItr) {
|
||||
/* Extract the interesting part of article title & url */
|
||||
std::string normalizedArticleTitle
|
||||
= kiwix::normalize(articleItr->getTitle());
|
||||
std::string articleFinalUrl = "/A/" + articleItr->getUrl();
|
||||
if (articleItr->isRedirect()) {
|
||||
zim::Article article = *articleItr;
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++ < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
articleFinalUrl = "/A/" + article.getUrl();
|
||||
}
|
||||
|
||||
/* Go through all already found suggestions and skip if this
|
||||
article is already in the suggestions list (with an other
|
||||
title) */
|
||||
bool insert = true;
|
||||
std::vector<std::vector<std::string>>::iterator suggestionItr;
|
||||
for (suggestionItr = this->suggestions.begin();
|
||||
suggestionItr != this->suggestions.end();
|
||||
suggestionItr++) {
|
||||
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
|
||||
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
|
||||
insert = false;
|
||||
break;
|
||||
} else if (result < 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert if possible */
|
||||
if (insert) {
|
||||
std::vector<std::string> suggestion;
|
||||
suggestion.push_back(articleItr->getTitle());
|
||||
suggestion.push_back(articleFinalUrl);
|
||||
suggestion.push_back(normalizedArticleTitle);
|
||||
this->suggestions.insert(suggestionItr, suggestion);
|
||||
}
|
||||
|
||||
/* Suggestions where found */
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
/* Set the cursor to the begining */
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<std::string> Reader::getTitleVariants(
|
||||
const std::string& title) const
|
||||
{
|
||||
std::vector<std::string> variants;
|
||||
variants.push_back(title);
|
||||
variants.push_back(kiwix::ucFirst(title));
|
||||
variants.push_back(kiwix::lcFirst(title));
|
||||
variants.push_back(kiwix::toTitle(title));
|
||||
return variants;
|
||||
}
|
||||
|
||||
/* Try also a few variations of the prefix to have better results */
|
||||
bool Reader::searchSuggestionsSmart(const string& prefix,
|
||||
unsigned int suggestionsCount)
|
||||
{
|
||||
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
||||
bool retVal;
|
||||
|
||||
this->suggestions.clear();
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
/* Try to search in the title using fulltext search database */
|
||||
const zim::Search* suggestionSearch
|
||||
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
|
||||
|
||||
if (suggestionSearch->get_matches_estimated()) {
|
||||
for (auto current = suggestionSearch->begin();
|
||||
current != suggestionSearch->end();
|
||||
current++) {
|
||||
std::vector<std::string> suggestion;
|
||||
suggestion.push_back(current->getTitle());
|
||||
suggestion.push_back("/A/" + current->getUrl());
|
||||
suggestion.push_back(kiwix::normalize(current->getTitle()));
|
||||
this->suggestions.push_back(suggestion);
|
||||
}
|
||||
this->suggestionsOffset = this->suggestions.begin();
|
||||
retVal = true;
|
||||
} else {
|
||||
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
||||
variantsItr != variants.end();
|
||||
variantsItr++) {
|
||||
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false)
|
||||
|| retVal;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/* Get next suggestion */
|
||||
bool Reader::getNextSuggestion(string& title)
|
||||
{
|
||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
||||
/* title */
|
||||
title = (*(this->suggestionsOffset))[0];
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->suggestionsOffset++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Reader::getNextSuggestion(string& title, string& url)
|
||||
{
|
||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
||||
/* title */
|
||||
title = (*(this->suggestionsOffset))[0];
|
||||
url = (*(this->suggestionsOffset))[1];
|
||||
|
||||
/* increment the cursor for the next call */
|
||||
this->suggestionsOffset++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if the file has as checksum */
|
||||
bool Reader::canCheckIntegrity() const
|
||||
{
|
||||
return this->zimFileHandler->getChecksum() != "";
|
||||
}
|
||||
|
||||
/* Return true if corrupted, false otherwise */
|
||||
bool Reader::isCorrupted() const
|
||||
{
|
||||
try {
|
||||
if (this->zimFileHandler->verify() == true) {
|
||||
return false;
|
||||
}
|
||||
} catch (exception& e) {
|
||||
cerr << e.what() << endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return the file size, works also for splitted files */
|
||||
unsigned int Reader::getFileSize() const
|
||||
{
|
||||
zim::File* file = this->getZimFileHandler();
|
||||
zim::offset_type size = 0;
|
||||
|
||||
if (file != NULL) {
|
||||
size = file->getFilesize();
|
||||
}
|
||||
|
||||
return (size / 1024);
|
||||
}
|
||||
}
|
||||
465
src/searcher.cpp
Normal file
@@ -0,0 +1,465 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "searcher.h"
|
||||
#include "reader.h"
|
||||
#include "xapianSearcher.h"
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
#include <ctpp2/CDT.hpp>
|
||||
#include <ctpp2/CTPP2FileLogger.hpp>
|
||||
#include <ctpp2/CTPP2SimpleVM.hpp>
|
||||
#include "ctpp2/CTPP2VMStringLoader.hpp"
|
||||
#include "kiwixlib-resources.h"
|
||||
|
||||
using namespace CTPP;
|
||||
#endif
|
||||
|
||||
#define MAX_SEARCH_LEN 140
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
class _Result : public Result
|
||||
{
|
||||
public:
|
||||
_Result(zim::Search::iterator& iterator);
|
||||
virtual ~_Result(){};
|
||||
|
||||
virtual std::string get_url();
|
||||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual std::string get_content();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
virtual int get_readerIndex();
|
||||
|
||||
private:
|
||||
zim::Search::iterator iterator;
|
||||
};
|
||||
|
||||
struct SearcherInternal {
|
||||
const zim::Search* _search;
|
||||
XapianSearcher* _xapianSearcher;
|
||||
zim::Search::iterator current_iterator;
|
||||
|
||||
SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {}
|
||||
~SearcherInternal()
|
||||
{
|
||||
if (_search != NULL) {
|
||||
delete _search;
|
||||
}
|
||||
if (_xapianSearcher != NULL) {
|
||||
delete _xapianSearcher;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* Constructor */
|
||||
Searcher::Searcher(const string& xapianDirectoryPath,
|
||||
Reader* reader,
|
||||
const string& humanReadableName)
|
||||
: internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
resultCountPerPage(0),
|
||||
estimatedResultCount(0),
|
||||
resultStart(0),
|
||||
resultEnd(0),
|
||||
contentHumanReadableId(humanReadableName)
|
||||
{
|
||||
loadICUExternalTables();
|
||||
if (!reader || !reader->hasFulltextIndex()) {
|
||||
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
|
||||
}
|
||||
this->humanReaderNames.push_back(humanReadableName);
|
||||
}
|
||||
|
||||
Searcher::Searcher(const std::string& humanReadableName)
|
||||
: internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
resultCountPerPage(0),
|
||||
estimatedResultCount(0),
|
||||
resultStart(0),
|
||||
resultEnd(0),
|
||||
contentHumanReadableId(humanReadableName)
|
||||
{
|
||||
loadICUExternalTables();
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Searcher::~Searcher()
|
||||
{
|
||||
delete internal;
|
||||
}
|
||||
|
||||
bool Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
||||
{
|
||||
if (!reader->hasFulltextIndex()) {
|
||||
return false;
|
||||
}
|
||||
this->readers.push_back(reader);
|
||||
this->humanReaderNames.push_back(humanReadableName);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void Searcher::search(std::string& search,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose)
|
||||
{
|
||||
this->reset();
|
||||
|
||||
if (verbose == true) {
|
||||
cout << "Performing query `" << search << "'" << endl;
|
||||
}
|
||||
|
||||
/* If resultEnd & resultStart inverted */
|
||||
if (resultStart > resultEnd) {
|
||||
resultEnd += resultStart;
|
||||
resultStart = resultEnd - resultStart;
|
||||
resultEnd -= resultStart;
|
||||
}
|
||||
|
||||
/* Try to find results */
|
||||
if (resultStart != resultEnd) {
|
||||
/* Avoid big researches */
|
||||
this->resultCountPerPage = resultEnd - resultStart;
|
||||
if (this->resultCountPerPage > MAX_SEARCH_LEN) {
|
||||
resultEnd = resultStart + MAX_SEARCH_LEN;
|
||||
this->resultCountPerPage = MAX_SEARCH_LEN;
|
||||
}
|
||||
|
||||
/* Perform the search */
|
||||
this->searchPattern = search;
|
||||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
if (internal->_xapianSearcher) {
|
||||
internal->_xapianSearcher->searchInIndex(
|
||||
unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
this->estimatedResultCount
|
||||
= internal->_xapianSearcher->results.get_matches_estimated();
|
||||
} else {
|
||||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
if ( (*current)->hasFulltextIndex() ) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
internal->_search = search;
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void Searcher::geo_search(float latitude, float longitude, float distance,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose)
|
||||
{
|
||||
this->reset();
|
||||
|
||||
if (verbose == true) {
|
||||
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
|
||||
}
|
||||
|
||||
/* If resultEnd & resultStart inverted */
|
||||
if (resultStart > resultEnd) {
|
||||
resultEnd += resultStart;
|
||||
resultStart = resultEnd - resultStart;
|
||||
resultEnd -= resultStart;
|
||||
}
|
||||
|
||||
/* Try to find results */
|
||||
if (resultStart == resultEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (internal->_xapianSearcher) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Avoid big researches */
|
||||
this->resultCountPerPage = resultEnd - resultStart;
|
||||
if (this->resultCountPerPage > MAX_SEARCH_LEN) {
|
||||
resultEnd = resultStart + MAX_SEARCH_LEN;
|
||||
this->resultCountPerPage = MAX_SEARCH_LEN;
|
||||
}
|
||||
|
||||
/* Perform the search */
|
||||
std::ostringstream oss;
|
||||
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
|
||||
this->searchPattern = oss.str();
|
||||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
|
||||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query("");
|
||||
search->set_georange(latitude, longitude, distance);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
internal->_search = search;
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
|
||||
|
||||
void Searcher::restart_search()
|
||||
{
|
||||
if (internal->_xapianSearcher) {
|
||||
internal->_xapianSearcher->restart_search();
|
||||
} else if (internal->_search) {
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
}
|
||||
}
|
||||
|
||||
Result* Searcher::getNextResult()
|
||||
{
|
||||
if (internal->_xapianSearcher) {
|
||||
return internal->_xapianSearcher->getNextResult();
|
||||
} else if (internal->_search &&
|
||||
internal->current_iterator != internal->_search->end()) {
|
||||
Result* result = new _Result(internal->current_iterator);
|
||||
internal->current_iterator++;
|
||||
return result;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Reset the results */
|
||||
void Searcher::reset()
|
||||
{
|
||||
this->estimatedResultCount = 0;
|
||||
this->searchPattern = "";
|
||||
return;
|
||||
}
|
||||
|
||||
void Searcher::suggestions(std::string& search, const bool verbose)
|
||||
{
|
||||
this->reset();
|
||||
|
||||
if (verbose == true) {
|
||||
cout << "Performing suggestion query `" << search << "`" << endl;
|
||||
}
|
||||
|
||||
this->searchPattern = search;
|
||||
this->resultStart = 0;
|
||||
this->resultEnd = 10;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
|
||||
if (internal->_xapianSearcher) {
|
||||
/* [TODO] Suggestion on a external database ?
|
||||
* We do not support that. */
|
||||
this->estimatedResultCount = 0;
|
||||
} else {
|
||||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
search->set_suggestion_mode(true);
|
||||
internal->_search = search;
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the result count estimation */
|
||||
unsigned int Searcher::getEstimatedResultCount()
|
||||
{
|
||||
return this->estimatedResultCount;
|
||||
}
|
||||
|
||||
bool Searcher::setProtocolPrefix(const std::string prefix)
|
||||
{
|
||||
this->protocolPrefix = prefix;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Searcher::setSearchProtocolPrefix(const std::string prefix)
|
||||
{
|
||||
this->searchProtocolPrefix = prefix;
|
||||
return true;
|
||||
}
|
||||
|
||||
_Result::_Result(zim::Search::iterator& iterator)
|
||||
: iterator(iterator)
|
||||
{
|
||||
}
|
||||
|
||||
std::string _Result::get_url()
|
||||
{
|
||||
return iterator.get_url();
|
||||
}
|
||||
std::string _Result::get_title()
|
||||
{
|
||||
return iterator.get_title();
|
||||
}
|
||||
int _Result::get_score()
|
||||
{
|
||||
return iterator.get_score();
|
||||
}
|
||||
std::string _Result::get_snippet()
|
||||
{
|
||||
return iterator.get_snippet();
|
||||
}
|
||||
std::string _Result::get_content()
|
||||
{
|
||||
if (iterator->good()) {
|
||||
return iterator->getData();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
int _Result::get_size()
|
||||
{
|
||||
return iterator.get_size();
|
||||
}
|
||||
int _Result::get_wordCount()
|
||||
{
|
||||
return iterator.get_wordCount();
|
||||
}
|
||||
int _Result::get_readerIndex()
|
||||
{
|
||||
return iterator.get_fileIndex();
|
||||
}
|
||||
#ifdef ENABLE_CTPP2
|
||||
|
||||
string Searcher::getHtml()
|
||||
{
|
||||
SimpleVM oSimpleVM(
|
||||
1024, //iIMaxFunctions (default value)
|
||||
4096, //iIMaxArgStackSize (default value)
|
||||
4096, //iIMaxCodeStackSize (default value)
|
||||
10240 * 2 //iIMaxSteps (default*2)
|
||||
);
|
||||
|
||||
// Fill data
|
||||
CDT oData;
|
||||
CDT resultsCDT(CDT::ARRAY_VAL);
|
||||
|
||||
this->restart_search();
|
||||
Result* p_result = NULL;
|
||||
while ((p_result = this->getNextResult())) {
|
||||
CDT result;
|
||||
result["title"] = p_result->get_title();
|
||||
result["url"] = p_result->get_url();
|
||||
result["snippet"] = p_result->get_snippet();
|
||||
result["contentId"] = humanReaderNames[p_result->get_readerIndex()];
|
||||
|
||||
if (p_result->get_size() >= 0) {
|
||||
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
||||
}
|
||||
|
||||
if (p_result->get_wordCount() >= 0) {
|
||||
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
|
||||
}
|
||||
|
||||
resultsCDT.PushBack(result);
|
||||
delete p_result;
|
||||
}
|
||||
this->restart_search();
|
||||
oData["results"] = resultsCDT;
|
||||
|
||||
// pages
|
||||
CDT pagesCDT(CDT::ARRAY_VAL);
|
||||
|
||||
unsigned int pageStart
|
||||
= this->resultStart / this->resultCountPerPage >= 5
|
||||
? this->resultStart / this->resultCountPerPage - 4
|
||||
: 0;
|
||||
unsigned int pageCount
|
||||
= this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
|
||||
|
||||
if (pageCount > 10) {
|
||||
pageCount = 10;
|
||||
} else if (pageCount == 1) {
|
||||
pageCount = 0;
|
||||
}
|
||||
|
||||
for (unsigned int i = pageStart; i < pageStart + pageCount; i++) {
|
||||
CDT page;
|
||||
page["label"] = i + 1;
|
||||
page["start"] = i * this->resultCountPerPage;
|
||||
page["end"] = (i + 1) * this->resultCountPerPage;
|
||||
|
||||
if (i * this->resultCountPerPage == this->resultStart) {
|
||||
page["selected"] = true;
|
||||
}
|
||||
|
||||
pagesCDT.PushBack(page);
|
||||
}
|
||||
oData["pages"] = pagesCDT;
|
||||
|
||||
oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount);
|
||||
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
|
||||
oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
|
||||
oData["resultStart"] = this->resultStart + 1;
|
||||
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount
|
||||
? this->estimatedResultCount
|
||||
: this->resultEnd);
|
||||
oData["resultRange"] = this->resultCountPerPage;
|
||||
oData["resultLastPageStart"]
|
||||
= this->estimatedResultCount > this->resultCountPerPage
|
||||
? std::round(this->estimatedResultCount / this->resultCountPerPage) * this->resultCountPerPage
|
||||
: 0;
|
||||
oData["protocolPrefix"] = this->protocolPrefix;
|
||||
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
|
||||
oData["contentId"] = this->contentHumanReadableId;
|
||||
|
||||
std::string template_ct2 = RESOURCE::results_ct2;
|
||||
VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size());
|
||||
|
||||
FileLogger oLogger(stderr);
|
||||
|
||||
// DEBUG only (write output to stdout)
|
||||
// oSimpleVM.Run(oData, oLoader, stdout, oLogger);
|
||||
|
||||
std::string sResult;
|
||||
oSimpleVM.Run(oData, oLoader, sResult, oLogger);
|
||||
|
||||
return sResult;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -40,6 +40,7 @@ class MyHtmlParser : public HtmlParser {
|
||||
void process_text(const string &text);
|
||||
void opening_tag(const string &tag);
|
||||
void closing_tag(const string &tag);
|
||||
using HtmlParser::parse_html;
|
||||
void parse_html(const string &text, const string &charset_,
|
||||
bool charset_from_meta_);
|
||||
MyHtmlParser() :
|
||||
231
src/xapianSearcher.cpp
Normal file
@@ -0,0 +1,231 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "xapianSearcher.h"
|
||||
#include <sys/types.h>
|
||||
#include <unicode/locid.h>
|
||||
#ifndef _WIN32
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#include <zim/article.h>
|
||||
#include <zim/error.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/zim.h>
|
||||
#include "xapian/myhtmlparse.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
std::map<std::string, int> read_valuesmap(const std::string& s)
|
||||
{
|
||||
std::map<std::string, int> result;
|
||||
std::vector<std::string> elems = split(s, ";");
|
||||
for (std::vector<std::string>::iterator elem = elems.begin();
|
||||
elem != elems.end();
|
||||
elem++) {
|
||||
std::vector<std::string> tmp_elems = split(*elem, ":");
|
||||
result.insert(
|
||||
std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Constructor */
|
||||
XapianSearcher::XapianSearcher(const string& xapianDirectoryPath,
|
||||
Reader* reader)
|
||||
: reader(reader)
|
||||
{
|
||||
this->openIndex(xapianDirectoryPath);
|
||||
}
|
||||
|
||||
/* Open Xapian readable database */
|
||||
void XapianSearcher::openIndex(const string& directoryPath)
|
||||
{
|
||||
this->readableDatabase = Xapian::Database(directoryPath);
|
||||
this->valuesmap
|
||||
= read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
||||
this->language = this->readableDatabase.get_metadata("language");
|
||||
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
||||
setup_queryParser();
|
||||
}
|
||||
|
||||
/* Close Xapian writable database */
|
||||
void XapianSearcher::closeIndex()
|
||||
{
|
||||
return;
|
||||
}
|
||||
void XapianSearcher::setup_queryParser()
|
||||
{
|
||||
queryParser.set_database(readableDatabase);
|
||||
if (!language.empty()) {
|
||||
/* Build ICU Local object to retrieve ISO-639 language code (from
|
||||
ISO-639-3) */
|
||||
icu::Locale languageLocale(language.c_str());
|
||||
|
||||
/* Configuring language base steemming */
|
||||
try {
|
||||
stemmer = Xapian::Stem(languageLocale.getLanguage());
|
||||
queryParser.set_stemmer(stemmer);
|
||||
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
|
||||
} catch (...) {
|
||||
std::cout << "No steemming for language '" << languageLocale.getLanguage()
|
||||
<< "'" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (!stopwords.empty()) {
|
||||
std::string stopWord;
|
||||
std::istringstream file(this->stopwords);
|
||||
while (std::getline(file, stopWord, '\n')) {
|
||||
this->stopper.add(stopWord);
|
||||
}
|
||||
queryParser.set_stopper(&(this->stopper));
|
||||
}
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void XapianSearcher::searchInIndex(string& search,
|
||||
const unsigned int resultStart,
|
||||
const unsigned int resultEnd,
|
||||
const bool verbose)
|
||||
{
|
||||
/* Create the query */
|
||||
Xapian::Query query = queryParser.parse_query(search);
|
||||
|
||||
/* Create the enquire object */
|
||||
Xapian::Enquire enquire(this->readableDatabase);
|
||||
enquire.set_query(query);
|
||||
|
||||
/* Get the results */
|
||||
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||
this->current_result = this->results.begin();
|
||||
}
|
||||
|
||||
/* Get next result */
|
||||
Result* XapianSearcher::getNextResult()
|
||||
{
|
||||
if (this->current_result != this->results.end()) {
|
||||
XapianResult* result = new XapianResult(this, this->current_result);
|
||||
this->current_result++;
|
||||
return result;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void XapianSearcher::restart_search()
|
||||
{
|
||||
this->current_result = this->results.begin();
|
||||
}
|
||||
|
||||
XapianResult::XapianResult(XapianSearcher* searcher,
|
||||
Xapian::MSetIterator& iterator)
|
||||
: searcher(searcher), iterator(iterator), document(iterator.get_document())
|
||||
{
|
||||
}
|
||||
|
||||
std::string XapianResult::get_url()
|
||||
{
|
||||
return document.get_data();
|
||||
}
|
||||
std::string XapianResult::get_title()
|
||||
{
|
||||
if (searcher->valuesmap.empty()) {
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(0);
|
||||
} else if (searcher->valuesmap.find("title") != searcher->valuesmap.end()) {
|
||||
return document.get_value(searcher->valuesmap["title"]);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
int XapianResult::get_score()
|
||||
{
|
||||
return iterator.get_percent();
|
||||
}
|
||||
std::string XapianResult::get_snippet()
|
||||
{
|
||||
if (searcher->valuesmap.empty()) {
|
||||
/* This is the old legacy version. Guess and try */
|
||||
std::string stored_snippet = document.get_value(1);
|
||||
if (!stored_snippet.empty()) {
|
||||
return stored_snippet;
|
||||
}
|
||||
/* Let's continue here, and see if we can genenate one */
|
||||
} else if (searcher->valuesmap.find("snippet") != searcher->valuesmap.end()) {
|
||||
return document.get_value(searcher->valuesmap["snippet"]);
|
||||
}
|
||||
/* No reader, no snippet */
|
||||
if (!searcher->reader) {
|
||||
return "";
|
||||
}
|
||||
/* Get the content of the article to generate a snippet.
|
||||
We parse it and use the html dump to avoid remove html tags in the
|
||||
content and be able to nicely cut the text at random place. */
|
||||
MyHtmlParser htmlParser;
|
||||
std::string content = get_content();
|
||||
if (content.empty()) {
|
||||
return content;
|
||||
}
|
||||
try {
|
||||
htmlParser.parse_html(content, "UTF-8", true);
|
||||
} catch (...) {
|
||||
}
|
||||
return searcher->results.snippet(htmlParser.dump, 500);
|
||||
}
|
||||
|
||||
std::string XapianResult::get_content()
|
||||
{
|
||||
if (!searcher->reader) {
|
||||
return "";
|
||||
}
|
||||
auto entry = searcher->reader->getEntryFromEncodedPath(get_url());
|
||||
return entry.getContent();
|
||||
}
|
||||
|
||||
int XapianResult::get_size()
|
||||
{
|
||||
if (searcher->valuesmap.empty()) {
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(2).empty() == true
|
||||
? -1
|
||||
: atoi(document.get_value(2).c_str());
|
||||
} else if (searcher->valuesmap.find("size") != searcher->valuesmap.end()) {
|
||||
return atoi(document.get_value(searcher->valuesmap["size"]).c_str());
|
||||
}
|
||||
/* The size is never used. Do we really want to get the content and
|
||||
calculate the size ? */
|
||||
return -1;
|
||||
}
|
||||
|
||||
int XapianResult::get_wordCount()
|
||||
{
|
||||
if (searcher->valuesmap.empty()) {
|
||||
/* This is the old legacy version. Guess and try */
|
||||
return document.get_value(3).empty() == true
|
||||
? -1
|
||||
: atoi(document.get_value(3).c_str());
|
||||
} else if (searcher->valuesmap.find("wordcount")
|
||||
!= searcher->valuesmap.end()) {
|
||||
return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str());
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // Kiwix namespace
|
||||
@@ -1,43 +0,0 @@
|
||||
#!/bin/sh
|
||||
SCRIPT_DIR=$(dirname $0)
|
||||
RESOURCE_FILE=$SCRIPT_DIR/../src/common/resourceTools.h
|
||||
MAP="static std::map<std::string, std::pair<const unsigned char*, unsigned int> > createResourceMap() { \n"
|
||||
MAP=$MAP"\tstd::map<std::string, std::pair<const unsigned char*, unsigned int> > m; \n"
|
||||
|
||||
# Delete old version of the file
|
||||
rm -f "$RESOURCE_FILE"
|
||||
|
||||
# Create header of resourceTools.h
|
||||
cat << EOF > "$RESOURCE_FILE"
|
||||
#ifndef KIWIX_RESOURCETOOLS_H
|
||||
#define KIWIX_RESOURCETOOLS_H
|
||||
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
|
||||
std::string getResourceAsString(const std::string &name);
|
||||
|
||||
EOF
|
||||
|
||||
# Goes through all files in /static/
|
||||
for FILE in `find . -type f | sed 's/\.\///' | grep -v .svn | grep -v Makefile | grep -v .sh | grep -v "~" | grep -v "#"`
|
||||
do
|
||||
FILE_ID=`echo "$FILE" | sed "s/\//_/g" | sed "s/\./_/g" | sed "s/\-/_/g"`
|
||||
echo "Inserting $FILE... [$FILE_ID]"
|
||||
reswrap -s -x -oa $RESOURCE_FILE -r $FILE_ID $FILE
|
||||
MAP=$MAP"\tm[\""$FILE"\"] = std::pair <const unsigned char*, unsigned int>("$FILE_ID", sizeof "$FILE_ID"); \n";
|
||||
done;
|
||||
MAP=$MAP"\treturn m; \n";
|
||||
MAP=$MAP"} \n\n"
|
||||
MAP=$MAP"static std::map<std::string, std::pair<const unsigned char*, unsigned int> > resourceMap = createResourceMap(); \n\n"
|
||||
|
||||
# Create the map table
|
||||
# map<int, int> m = map_list_of (1,2) (3,4) (5,6) (7,8);
|
||||
echo $MAP >> "$RESOURCE_FILE"
|
||||
|
||||
# Create the footer
|
||||
cat << EOF >> "$RESOURCE_FILE"
|
||||
#endif
|
||||
|
||||
EOF
|
||||
9789
static/jquery-ui/external/jquery/jquery.js
vendored
|
Before Width: | Height: | Size: 1.7 KiB |
|
Before Width: | Height: | Size: 212 B |
|
Before Width: | Height: | Size: 208 B |
|
Before Width: | Height: | Size: 335 B |
|
Before Width: | Height: | Size: 207 B |
|
Before Width: | Height: | Size: 262 B |
|
Before Width: | Height: | Size: 262 B |
|
Before Width: | Height: | Size: 332 B |
|
Before Width: | Height: | Size: 280 B |
|
Before Width: | Height: | Size: 6.8 KiB |