mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-12-24 06:57:59 -05:00
Compare commits
2 Commits
clickable_
...
robust_uri
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd9785fe85 | ||
|
|
b7a019469c |
@@ -245,7 +245,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
|
||||
auto bookName = request.get_argument("content");
|
||||
try {
|
||||
const auto bookIds = Library::BookIdSet{mp_nameMapper->getIdForName(bookName)};
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "content";}, true);
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "content";});
|
||||
return {queryString, bookIds};
|
||||
} catch (const std::out_of_range&) {
|
||||
throw Error(noSuchBookErrorMsg(bookName));
|
||||
@@ -270,7 +270,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
|
||||
}
|
||||
}
|
||||
const auto bookIds = Library::BookIdSet(id_vec.begin(), id_vec.end());
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "books.id";}, true);
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "books.id";});
|
||||
return {queryString, bookIds};
|
||||
} catch(const std::out_of_range&) {}
|
||||
|
||||
@@ -288,7 +288,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
|
||||
throw Error(noSuchBookErrorMsg(bookName));
|
||||
}
|
||||
}
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "books.name";}, true);
|
||||
const auto queryString = request.get_query([&](const std::string& key){return key == "books.name";});
|
||||
return {queryString, bookIds};
|
||||
} catch(const std::out_of_range&) {}
|
||||
|
||||
@@ -299,7 +299,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
|
||||
throw Error(nonParameterizedMessage("no-book-found"));
|
||||
}
|
||||
const auto bookIds = Library::BookIdSet(id_vec.begin(), id_vec.end());
|
||||
const auto queryString = request.get_query([&](const std::string& key){return startsWith(key, "books.filter.");}, true);
|
||||
const auto queryString = request.get_query([&](const std::string& key){return startsWith(key, "books.filter.");});
|
||||
return {queryString, bookIds};
|
||||
}
|
||||
|
||||
|
||||
@@ -96,16 +96,15 @@ class RequestContext {
|
||||
std::string get_query() const { return queryString; }
|
||||
|
||||
template<class F>
|
||||
std::string get_query(F filter, bool mustEncode) const {
|
||||
std::string get_query(F filter) const {
|
||||
std::string q;
|
||||
const char* sep = "";
|
||||
auto encode = [=](const std::string& value) { return mustEncode?urlEncode(value):value; };
|
||||
for ( const auto& a : arguments ) {
|
||||
if (!filter(a.first)) {
|
||||
continue;
|
||||
}
|
||||
for (const auto& v: a.second) {
|
||||
q += sep + encode(a.first) + '=' + encode(v);
|
||||
q += sep + urlEncode(a.first) + '=' + urlEncode(v);
|
||||
sep = "&";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,6 +208,43 @@ bool isHarmlessUriChar(char c)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
|
||||
{
|
||||
if (isHarmlessUriChar(c))
|
||||
return false;
|
||||
|
||||
switch (c) {
|
||||
case '/': // There is no reason to encode the path separator in the general
|
||||
// case. It must be encoded only in a path component when its
|
||||
// semantics of a path separator has to be suppressed.
|
||||
return false;
|
||||
|
||||
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
|
||||
// a non-encoded @ will make "abc" and "def" to be interpreted as
|
||||
// username and host components, respectively
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
|
||||
// a non-encoded : will make "abc" and "def" to be interpreted as
|
||||
// host and port components, respectively
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case '?': // A non-encoded '?' acts as a separator between the path
|
||||
// and query components
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case '&': return target == kiwix::URIComponentKind::QUERY;
|
||||
case '=': return target == kiwix::URIComponentKind::QUERY;
|
||||
case '+': return target == kiwix::URIComponentKind::QUERY;
|
||||
|
||||
case '#': // A non-encoded '#' in either path or query-component
|
||||
// would mark the beginning of the fragment component
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int hexToInt(char c) {
|
||||
switch (c) {
|
||||
case '0': return 0;
|
||||
@@ -247,6 +284,26 @@ std::string kiwix::urlEncode(const std::string& value)
|
||||
return os.str();
|
||||
}
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
std::string uriEncode(URIComponentKind target, const std::string& value)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << std::hex << std::uppercase;
|
||||
for (const char c : value) {
|
||||
if ( mustBeUriEncodedFor(target, c) ) {
|
||||
const unsigned int charVal = static_cast<unsigned char>(c);
|
||||
os << '%' << std::setw(2) << std::setfill('0') << charVal;
|
||||
} else {
|
||||
os << c;
|
||||
}
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
} // namespace kiwix
|
||||
|
||||
std::string kiwix::urlDecode(const std::string& value, bool component)
|
||||
{
|
||||
std::ostringstream os;
|
||||
|
||||
@@ -60,6 +60,17 @@ private:
|
||||
std::string urlEncode(const std::string& value);
|
||||
std::string urlDecode(const std::string& value, bool component = false);
|
||||
|
||||
// Only URI components that are of interest to libkiwix
|
||||
// are included in the below enumeration type
|
||||
enum class URIComponentKind
|
||||
{
|
||||
PATH,
|
||||
QUERY
|
||||
};
|
||||
|
||||
// Encode 'value' for usage in a URI componenet specified by 'target'
|
||||
std::string uriEncode(URIComponentKind target, const std::string& value);
|
||||
|
||||
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
||||
|
||||
std::string ucAll(const std::string& word);
|
||||
|
||||
@@ -163,4 +163,35 @@ TEST(stringTools, urlDecode)
|
||||
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
|
||||
}
|
||||
|
||||
};
|
||||
TEST(stringTools, uriEncode)
|
||||
{
|
||||
const char letters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, letters), letters);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, letters), letters);
|
||||
|
||||
const char digits[] = "0123456789";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, digits), digits);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, digits), digits);
|
||||
|
||||
const char nonEncodableSymbols[] = ".-_~()*!/";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, nonEncodableSymbols), nonEncodableSymbols);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, nonEncodableSymbols), nonEncodableSymbols);
|
||||
|
||||
const char uriDelimSymbols[] = ":@?=+&#$;,";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, uriDelimSymbols), "%3A%40%3F=+&%23%24%3B%2C");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, uriDelimSymbols), ":@?%3D%2B%26%23%24%3B%2C");
|
||||
|
||||
const char otherSymbols[] = R"(`%^[]{}\|"<>)";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), "%60%25%5E%5B%5D%7B%7D%5C%7C%22%3C%3E");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), uriEncode(URIComponentKind::QUERY, otherSymbols));
|
||||
|
||||
const char whitespace[] = " \n\t\r";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), "%20%0A%09%0D");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), uriEncode(URIComponentKind::QUERY, whitespace));
|
||||
|
||||
const char someNonASCIIChars[] = "Σ♂♀ツ";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), "%CE%A3%E2%99%82%E2%99%80%E3%83%84");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), uriEncode(URIComponentKind::QUERY, someNonASCIIChars));
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
Reference in New Issue
Block a user