From d8a2d7924020aa9b5df8f69373d284db4e2508b0 Mon Sep 17 00:00:00 2001 From: Andrey Prygunkov Date: Mon, 28 Dec 2015 18:34:47 +0100 Subject: [PATCH] #136: handling of Unicode paths with MSXML - when parsing nzb-files (option NzbDir); - when parsing rss feeds (option TempDir). --- daemon/feed/FeedFile.cpp | 41 ++++++++++++++++++++++++++-------------- daemon/feed/FeedFile.h | 2 +- daemon/queue/NzbFile.cpp | 13 ++++++------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/daemon/feed/FeedFile.cpp b/daemon/feed/FeedFile.cpp index 8f9555f4..2cdf5799 100644 --- a/daemon/feed/FeedFile.cpp +++ b/daemon/feed/FeedFile.cpp @@ -28,6 +28,7 @@ #include "Log.h" #include "DownloadInfo.h" #include "Options.h" +#include "FileSystem.h" #include "Util.h" FeedFile::FeedFile(const char* fileName) @@ -117,14 +118,21 @@ FeedFile* FeedFile::Create(const char* fileName) doc->put_validateOnParse(VARIANT_FALSE); doc->put_async(VARIANT_FALSE); - // filename needs to be properly encoded - char* url = (char*)malloc(strlen(fileName)*3 + 1); - EncodeUrl(fileName, url); - debug("url=\"%s\"", url); - _variant_t v(url); - free(url); + _variant_t vFilename(*WString(fileName)); + + // 1. first trying to load via filename without URL-encoding (certain charaters doesn't work when encoded) + VARIANT_BOOL success = doc->load(vFilename); + if (success == VARIANT_FALSE) + { + // 2. now trying filename encoded as URL + char url[2048]; + EncodeUrl(fileName, url, 2048); + debug("url=\"%s\"", url); + _variant_t vUrl(url); + + success = doc->load(vUrl); + } - VARIANT_BOOL success = doc->load(v); if (success == VARIANT_FALSE) { _bstr_t r(doc->GetparseError()->reason); @@ -143,23 +151,28 @@ FeedFile* FeedFile::Create(const char* fileName) return file; } -void FeedFile::EncodeUrl(const char* filename, char* url) +void FeedFile::EncodeUrl(const char* filename, char* url, int bufLen) { - while (char ch = *filename++) + WString widefilename(filename); + + char* end = url + bufLen; + for (wchar_t* p = widefilename; *p && url < end - 3; p++) { + wchar_t ch = *p; if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || - ('A' <= ch && ch <= 'Z') ) + ('A' <= ch && ch <= 'Z') || + ch == '-' || ch == '.' || ch == '_' || ch == '~') { - *url++ = ch; + *url++ = (char)ch; } else { *url++ = '%'; - int a = ch >> 4; - *url++ = a > 9 ? a - 10 + 'a' : a + '0'; + uint32 a = (uint32)ch >> 4; + *url++ = a > 9 ? a - 10 + 'A' : a + '0'; a = ch & 0xF; - *url++ = a > 9 ? a - 10 + 'a' : a + '0'; + *url++ = a > 9 ? a - 10 + 'A' : a + '0'; } } *url = NULL; diff --git a/daemon/feed/FeedFile.h b/daemon/feed/FeedFile.h index 5749742a..0518eac3 100644 --- a/daemon/feed/FeedFile.h +++ b/daemon/feed/FeedFile.h @@ -40,7 +40,7 @@ private: void ParseSubject(FeedItemInfo* feedItemInfo); #ifdef WIN32 bool ParseFeed(IUnknown* nzb); - static void EncodeUrl(const char* filename, char* url); + static void EncodeUrl(const char* filename, char* url, int bufLen); #else FeedItemInfo* m_feedItemInfo; StringBuilder m_tagContent; diff --git a/daemon/queue/NzbFile.cpp b/daemon/queue/NzbFile.cpp index 4292b509..0a666570 100644 --- a/daemon/queue/NzbFile.cpp +++ b/daemon/queue/NzbFile.cpp @@ -495,7 +495,7 @@ bool NzbFile::Parse() doc->put_validateOnParse(VARIANT_FALSE); doc->put_async(VARIANT_FALSE); - _variant_t vFilename(*m_fileName); + _variant_t vFilename(*WString(*m_fileName)); // 1. first trying to load via filename without URL-encoding (certain charaters doesn't work when encoded) VARIANT_BOOL success = doc->load(vFilename); @@ -538,24 +538,23 @@ bool NzbFile::Parse() void NzbFile::EncodeUrl(const char* filename, char* url, int bufLen) { - BString<1024> utfFilename = filename; - WebUtil::AnsiToUtf8(utfFilename, utfFilename.Capacity()); + WString widefilename(filename); char* end = url + bufLen; - for (char* p = utfFilename; *p && url < end - 3; p++) + for (wchar_t* p = widefilename; *p && url < end - 3; p++) { - char ch = *p; + wchar_t ch = *p; if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '-' || ch == '.' || ch == '_' || ch == '~') { - *url++ = ch; + *url++ = (char)ch; } else { *url++ = '%'; - int a = (uchar)ch >> 4; + uint32 a = (uint32)ch >> 4; *url++ = a > 9 ? a - 10 + 'A' : a + '0'; a = ch & 0xF; *url++ = a > 9 ? a - 10 + 'A' : a + '0';