Compare commits

...

2 Commits

Author SHA1 Message Date
Veloman Yunkan
cd9785fe85 Enter uriEncode() 2023-01-25 23:45:18 +04:00
Veloman Yunkan
b7a019469c Unconditional URI-encoding in RequestContext::get_query<F>(F) 2023-01-25 23:41:52 +04:00
5 changed files with 106 additions and 8 deletions

View File

@@ -245,7 +245,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
auto bookName = request.get_argument("content");
try {
const auto bookIds = Library::BookIdSet{mp_nameMapper->getIdForName(bookName)};
const auto queryString = request.get_query([&](const std::string& key){return key == "content";}, true);
const auto queryString = request.get_query([&](const std::string& key){return key == "content";});
return {queryString, bookIds};
} catch (const std::out_of_range&) {
throw Error(noSuchBookErrorMsg(bookName));
@@ -270,7 +270,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
}
}
const auto bookIds = Library::BookIdSet(id_vec.begin(), id_vec.end());
const auto queryString = request.get_query([&](const std::string& key){return key == "books.id";}, true);
const auto queryString = request.get_query([&](const std::string& key){return key == "books.id";});
return {queryString, bookIds};
} catch(const std::out_of_range&) {}
@@ -288,7 +288,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
throw Error(noSuchBookErrorMsg(bookName));
}
}
const auto queryString = request.get_query([&](const std::string& key){return key == "books.name";}, true);
const auto queryString = request.get_query([&](const std::string& key){return key == "books.name";});
return {queryString, bookIds};
} catch(const std::out_of_range&) {}
@@ -299,7 +299,7 @@ std::pair<std::string, Library::BookIdSet> InternalServer::selectBooks(const Req
throw Error(nonParameterizedMessage("no-book-found"));
}
const auto bookIds = Library::BookIdSet(id_vec.begin(), id_vec.end());
const auto queryString = request.get_query([&](const std::string& key){return startsWith(key, "books.filter.");}, true);
const auto queryString = request.get_query([&](const std::string& key){return startsWith(key, "books.filter.");});
return {queryString, bookIds};
}

View File

@@ -96,16 +96,15 @@ class RequestContext {
std::string get_query() const { return queryString; }
template<class F>
std::string get_query(F filter, bool mustEncode) const {
std::string get_query(F filter) const {
std::string q;
const char* sep = "";
auto encode = [=](const std::string& value) { return mustEncode?urlEncode(value):value; };
for ( const auto& a : arguments ) {
if (!filter(a.first)) {
continue;
}
for (const auto& v: a.second) {
q += sep + encode(a.first) + '=' + encode(v);
q += sep + urlEncode(a.first) + '=' + urlEncode(v);
sep = "&";
}
}

View File

@@ -208,6 +208,43 @@ bool isHarmlessUriChar(char c)
return false;
}
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
{
if (isHarmlessUriChar(c))
return false;
switch (c) {
case '/': // There is no reason to encode the path separator in the general
// case. It must be encoded only in a path component when its
// semantics of a path separator has to be suppressed.
return false;
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
// a non-encoded @ will make "abc" and "def" to be interpreted as
// username and host components, respectively
return target == kiwix::URIComponentKind::PATH;
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
// a non-encoded : will make "abc" and "def" to be interpreted as
// host and port components, respectively
return target == kiwix::URIComponentKind::PATH;
case '?': // A non-encoded '?' acts as a separator between the path
// and query components
return target == kiwix::URIComponentKind::PATH;
case '&': return target == kiwix::URIComponentKind::QUERY;
case '=': return target == kiwix::URIComponentKind::QUERY;
case '+': return target == kiwix::URIComponentKind::QUERY;
case '#': // A non-encoded '#' in either path or query-component
// would mark the beginning of the fragment component
return true;
}
return true;
}
int hexToInt(char c) {
switch (c) {
case '0': return 0;
@@ -247,6 +284,26 @@ std::string kiwix::urlEncode(const std::string& value)
return os.str();
}
namespace kiwix
{
std::string uriEncode(URIComponentKind target, const std::string& value)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (const char c : value) {
if ( mustBeUriEncodedFor(target, c) ) {
const unsigned int charVal = static_cast<unsigned char>(c);
os << '%' << std::setw(2) << std::setfill('0') << charVal;
} else {
os << c;
}
}
return os.str();
}
} // namespace kiwix
std::string kiwix::urlDecode(const std::string& value, bool component)
{
std::ostringstream os;

View File

@@ -60,6 +60,17 @@ private:
std::string urlEncode(const std::string& value);
std::string urlDecode(const std::string& value, bool component = false);
// Only URI components that are of interest to libkiwix
// are included in the below enumeration type
enum class URIComponentKind
{
PATH,
QUERY
};
// Encode 'value' for usage in a URI componenet specified by 'target'
std::string uriEncode(URIComponentKind target, const std::string& value);
std::string join(const std::vector<std::string>& list, const std::string& sep);
std::string ucAll(const std::string& word);

View File

@@ -163,4 +163,35 @@ TEST(stringTools, urlDecode)
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
}
};
TEST(stringTools, uriEncode)
{
const char letters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, letters), letters);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, letters), letters);
const char digits[] = "0123456789";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, digits), digits);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, digits), digits);
const char nonEncodableSymbols[] = ".-_~()*!/";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, nonEncodableSymbols), nonEncodableSymbols);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, nonEncodableSymbols), nonEncodableSymbols);
const char uriDelimSymbols[] = ":@?=+&#$;,";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, uriDelimSymbols), "%3A%40%3F=+&%23%24%3B%2C");
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, uriDelimSymbols), ":@?%3D%2B%26%23%24%3B%2C");
const char otherSymbols[] = R"(`%^[]{}\|"<>)";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), "%60%25%5E%5B%5D%7B%7D%5C%7C%22%3C%3E");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), uriEncode(URIComponentKind::QUERY, otherSymbols));
const char whitespace[] = " \n\t\r";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), "%20%0A%09%0D");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), uriEncode(URIComponentKind::QUERY, whitespace));
const char someNonASCIIChars[] = "Σ♂♀ツ";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), "%CE%A3%E2%99%82%E2%99%80%E3%83%84");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), uriEncode(URIComponentKind::QUERY, someNonASCIIChars));
}
} // unnamed namespace