From ea381cde90d8a2b9705eb43168ddd28c3bd9ee57 Mon Sep 17 00:00:00 2001 From: Andrey Prygunkov Date: Mon, 18 Nov 2013 20:37:20 +0000 Subject: [PATCH] fixed encoding issue for non-ASCII characters in DNZB-Headers --- DownloadInfo.cpp | 6 +- FeedFile.cpp | 4 +- Log.cpp | 2 +- Scanner.cpp | 4 +- ScriptController.cpp | 15 +++- UrlCoordinator.cpp | 4 +- Util.cpp | 171 ++++++++++++++++++++++++------------------- Util.h | 17 ++++- 8 files changed, 134 insertions(+), 89 deletions(-) diff --git a/DownloadInfo.cpp b/DownloadInfo.cpp index d556149a..1e381dd8 100644 --- a/DownloadInfo.cpp +++ b/DownloadInfo.cpp @@ -428,7 +428,7 @@ void NZBInfo::SetFilename(const char * szFilename) MakeNiceNZBName(m_szFilename, szNZBNicename, sizeof(szNZBNicename), true); szNZBNicename[1024-1] = '\0'; #ifdef WIN32 - Util::AnsiToUtf8(szNZBNicename, 1024); + WebUtil::AnsiToUtf8(szNZBNicename, 1024); #endif SetName(szNZBNicename); } @@ -506,7 +506,7 @@ void NZBInfo::BuildDestDirName() } #ifdef WIN32 - Util::Utf8ToAnsi(szDestDir, 1024); + WebUtil::Utf8ToAnsi(szDestDir, 1024); #endif SetDestDir(szDestDir); @@ -548,7 +548,7 @@ void NZBInfo::BuildFinalDirName(char* szFinalDirBuf, int iBufSize) strncpy(szFinalDirBuf, szBuffer, iBufSize); #ifdef WIN32 - Util::Utf8ToAnsi(szFinalDirBuf, iBufSize); + WebUtil::Utf8ToAnsi(szFinalDirBuf, iBufSize); #endif } diff --git a/FeedFile.cpp b/FeedFile.cpp index aa906c9c..3e174e76 100644 --- a/FeedFile.cpp +++ b/FeedFile.cpp @@ -237,7 +237,7 @@ bool FeedFile::ParseFeed(IUnknown* nzb) if (tag) { _bstr_t time(tag->Gettext()); - time_t unixtime = Util::ParseRfc822DateTime(time); + time_t unixtime = WebUtil::ParseRfc822DateTime(time); if (unixtime > 0) { pFeedItemInfo->SetTime(unixtime); @@ -518,7 +518,7 @@ void FeedFile::Parse_EndElement(const char *name) } else if (!strcmp("pubDate", name) && m_pFeedItemInfo) { - time_t unixtime = Util::ParseRfc822DateTime(m_szTagContent); + time_t unixtime = WebUtil::ParseRfc822DateTime(m_szTagContent); if (unixtime > 0) { m_pFeedItemInfo->SetTime(unixtime); diff --git a/Log.cpp b/Log.cpp index e1921c51..890c83f1 100644 --- a/Log.cpp +++ b/Log.cpp @@ -371,7 +371,7 @@ void Log::InitOptions() { m_szLogFilename = strdup(g_pOptions->GetLogFile()); #ifdef WIN32 - Util::Utf8ToAnsi(m_szLogFilename, strlen(m_szLogFilename) + 1); + WebUtil::Utf8ToAnsi(m_szLogFilename, strlen(m_szLogFilename) + 1); #endif } diff --git a/Scanner.cpp b/Scanner.cpp index ae0edabb..14d7aae1 100644 --- a/Scanner.cpp +++ b/Scanner.cpp @@ -493,7 +493,7 @@ bool Scanner::AddFileToQueue(const char* szFilename, const char* szNZBName, cons pNZBFile->GetNZBInfo()->SetName(NULL); #ifdef WIN32 char* szAnsiFilename = strdup(szNZBName); - Util::Utf8ToAnsi(szAnsiFilename, strlen(szAnsiFilename) + 1); + WebUtil::Utf8ToAnsi(szAnsiFilename, strlen(szAnsiFilename) + 1); pNZBFile->GetNZBInfo()->SetFilename(szAnsiFilename); free(szAnsiFilename); #else @@ -586,7 +586,7 @@ Scanner::EAddStatus Scanner::AddExternalFile(const char* szNZBName, const char* Util::MakeValidFilename(szValidNZBName, '_', false); #ifdef WIN32 - Util::Utf8ToAnsi(szValidNZBName, 1024); + WebUtil::Utf8ToAnsi(szValidNZBName, 1024); #endif const char* szExtension = strrchr(szNZBName, '.'); diff --git a/ScriptController.cpp b/ScriptController.cpp index 0bf0fbf5..68766d9d 100644 --- a/ScriptController.cpp +++ b/ScriptController.cpp @@ -277,15 +277,26 @@ void ScriptController::PrepareEnvParameters(NZBInfo* pNZBInfo, const char* szStr for (NZBParameterList::iterator it = pNZBInfo->GetParameters()->begin(); it != pNZBInfo->GetParameters()->end(); it++) { NZBParameter* pParameter = *it; + const char* szValue = pParameter->GetValue(); +#ifdef WIN32 + char* szAnsiValue = strdup(szValue); + WebUtil::Utf8ToAnsi(szAnsiValue, strlen(szAnsiValue) + 1); + szValue = szAnsiValue; +#endif + if (szStripPrefix && !strncmp(pParameter->GetName(), szStripPrefix, iPrefixLen) && (int)strlen(pParameter->GetName()) > iPrefixLen) { - SetEnvVarSpecial("NZBPR", pParameter->GetName() + iPrefixLen, pParameter->GetValue()); + SetEnvVarSpecial("NZBPR", pParameter->GetName() + iPrefixLen, szValue); } else if (!szStripPrefix) { - SetEnvVarSpecial("NZBPR", pParameter->GetName(), pParameter->GetValue()); + SetEnvVarSpecial("NZBPR", pParameter->GetName(), szValue); } + +#ifdef WIN32 + free(szAnsiValue); +#endif } } diff --git a/UrlCoordinator.cpp b/UrlCoordinator.cpp index ec12bc98..1b24d4da 100644 --- a/UrlCoordinator.cpp +++ b/UrlCoordinator.cpp @@ -105,7 +105,9 @@ void UrlDownloader::ProcessHeader(const char* szLine) snprintf(szParamName, 100, "*DNZB:%s", szModLine + 7); szParamName[100-1] = '\0'; - m_ppParameters.SetParameter(szParamName, szValue); + char* szVal = WebUtil::Latin1ToUtf8(szValue); + m_ppParameters.SetParameter(szParamName, szVal); + free(szVal); } free(szModLine); } diff --git a/Util.cpp b/Util.cpp index d8c74f7f..fceb9949 100644 --- a/Util.cpp +++ b/Util.cpp @@ -1156,82 +1156,8 @@ bool Util::RegReadStr(HKEY hKey, const char* szKeyName, const char* szValueName, } return false; } - -bool Util::Utf8ToAnsi(char* szBuffer, int iBufLen) -{ - WCHAR* wstr = (WCHAR*)malloc(iBufLen * 2); - int errcode = MultiByteToWideChar(CP_UTF8, 0, szBuffer, -1, wstr, iBufLen); - if (errcode > 0) - { - errcode = WideCharToMultiByte(CP_ACP, 0, wstr, -1, szBuffer, iBufLen, "_", NULL); - } - free(wstr); - return errcode > 0; -} - -bool Util::AnsiToUtf8(char* szBuffer, int iBufLen) -{ - WCHAR* wstr = (WCHAR*)malloc(iBufLen * 2); - int errcode = MultiByteToWideChar(CP_ACP, 0, szBuffer, -1, wstr, iBufLen); - if (errcode > 0) - { - errcode = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, szBuffer, iBufLen, NULL, NULL); - } - free(wstr); - return errcode > 0; -} #endif -/* - The date/time can be formatted according to RFC822 in different ways. Examples: - Wed, 26 Jun 2013 01:02:54 -0600 - Wed, 26 Jun 2013 01:02:54 GMT - 26 Jun 2013 01:02:54 -0600 - 26 Jun 2013 01:02 -0600 - 26 Jun 2013 01:02 A - This function however supports only the first format! -*/ -time_t Util::ParseRfc822DateTime(const char* szDateTimeStr) -{ - char month[4]; - int day, year, hours, minutes, seconds, zonehours, zoneminutes; - int r = sscanf(szDateTimeStr, "%*s %d %3s %d %d:%d:%d %3d %2d", &day, &month[0], &year, &hours, &minutes, &seconds, &zonehours, &zoneminutes); - if (r != 8) - { - return 0; - } - - int mon = 0; - if (!strcasecmp(month, "Jan")) mon = 0; - else if (!strcasecmp(month, "Feb")) mon = 1; - else if (!strcasecmp(month, "Mar")) mon = 2; - else if (!strcasecmp(month, "Apr")) mon = 3; - else if (!strcasecmp(month, "May")) mon = 4; - else if (!strcasecmp(month, "Jun")) mon = 5; - else if (!strcasecmp(month, "Jul")) mon = 6; - else if (!strcasecmp(month, "Aug")) mon = 7; - else if (!strcasecmp(month, "Sep")) mon = 8; - else if (!strcasecmp(month, "Oct")) mon = 9; - else if (!strcasecmp(month, "Nov")) mon = 10; - else if (!strcasecmp(month, "Dec")) mon = 11; - - struct tm rawtime; - memset(&rawtime, 0, sizeof(rawtime)); - - rawtime.tm_year = year - 1900; - rawtime.tm_mon = mon; - rawtime.tm_mday = day; - rawtime.tm_hour = hours; - rawtime.tm_min = minutes; - rawtime.tm_sec = seconds; - - time_t enctime = mktime(&rawtime); - - enctime = enctime - (zonehours * 60 + (zonehours > 0 ? zoneminutes : -zoneminutes)) * 60; - - return enctime; -} - unsigned int WebUtil::DecodeBase64(char* szInputBuffer, int iInputBufferLength, char* szOutputBuffer) { @@ -1767,6 +1693,103 @@ BreakLoop: *output = '\0'; } +#ifdef WIN32 +bool WebUtil::Utf8ToAnsi(char* szBuffer, int iBufLen) +{ + WCHAR* wstr = (WCHAR*)malloc(iBufLen * 2); + int errcode = MultiByteToWideChar(CP_UTF8, 0, szBuffer, -1, wstr, iBufLen); + if (errcode > 0) + { + errcode = WideCharToMultiByte(CP_ACP, 0, wstr, -1, szBuffer, iBufLen, "_", NULL); + } + free(wstr); + return errcode > 0; +} + +bool WebUtil::AnsiToUtf8(char* szBuffer, int iBufLen) +{ + WCHAR* wstr = (WCHAR*)malloc(iBufLen * 2); + int errcode = MultiByteToWideChar(CP_ACP, 0, szBuffer, -1, wstr, iBufLen); + if (errcode > 0) + { + errcode = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, szBuffer, iBufLen, NULL, NULL); + } + free(wstr); + return errcode > 0; +} +#endif + +char* WebUtil::Latin1ToUtf8(const char* szStr) +{ + char *res = (char*)malloc(strlen(szStr) * 2 + 1); + const unsigned char *in = (const unsigned char*)szStr; + unsigned char *out = (unsigned char*)res; + while (*in) + { + if (*in < 128) + { + *out++ = *in++; + } + else + { + *out++ = 0xc2 + (*in > 0xbf); + *out++ = (*in++ & 0x3f) + 0x80; + } + } + *out = '\0'; + return res; +} + +/* + The date/time can be formatted according to RFC822 in different ways. Examples: + Wed, 26 Jun 2013 01:02:54 -0600 + Wed, 26 Jun 2013 01:02:54 GMT + 26 Jun 2013 01:02:54 -0600 + 26 Jun 2013 01:02 -0600 + 26 Jun 2013 01:02 A + This function however supports only the first format! +*/ +time_t WebUtil::ParseRfc822DateTime(const char* szDateTimeStr) +{ + char month[4]; + int day, year, hours, minutes, seconds, zonehours, zoneminutes; + int r = sscanf(szDateTimeStr, "%*s %d %3s %d %d:%d:%d %3d %2d", &day, &month[0], &year, &hours, &minutes, &seconds, &zonehours, &zoneminutes); + if (r != 8) + { + return 0; + } + + int mon = 0; + if (!strcasecmp(month, "Jan")) mon = 0; + else if (!strcasecmp(month, "Feb")) mon = 1; + else if (!strcasecmp(month, "Mar")) mon = 2; + else if (!strcasecmp(month, "Apr")) mon = 3; + else if (!strcasecmp(month, "May")) mon = 4; + else if (!strcasecmp(month, "Jun")) mon = 5; + else if (!strcasecmp(month, "Jul")) mon = 6; + else if (!strcasecmp(month, "Aug")) mon = 7; + else if (!strcasecmp(month, "Sep")) mon = 8; + else if (!strcasecmp(month, "Oct")) mon = 9; + else if (!strcasecmp(month, "Nov")) mon = 10; + else if (!strcasecmp(month, "Dec")) mon = 11; + + struct tm rawtime; + memset(&rawtime, 0, sizeof(rawtime)); + + rawtime.tm_year = year - 1900; + rawtime.tm_mon = mon; + rawtime.tm_mday = day; + rawtime.tm_hour = hours; + rawtime.tm_min = minutes; + rawtime.tm_sec = seconds; + + time_t enctime = mktime(&rawtime); + + enctime = enctime - (zonehours * 60 + (zonehours > 0 ? zoneminutes : -zoneminutes)) * 60; + + return enctime; +} + URL::URL(const char* szAddress) { diff --git a/Util.h b/Util.h index 463bfcf8..c3c9868b 100644 --- a/Util.h +++ b/Util.h @@ -141,12 +141,8 @@ public: #ifdef WIN32 static bool RegReadStr(HKEY hKey, const char* szKeyName, const char* szValueName, char* szBuffer, int* iBufLen); - static bool Utf8ToAnsi(char* szBuffer, int iBufLen); - static bool AnsiToUtf8(char* szBuffer, int iBufLen); #endif - static time_t ParseRfc822DateTime(const char* szDateTimeStr); - /* * Returns program version and revision number as string formatted like "0.7.0-r295". * If revision number is not available only version is returned ("0.7.0"). @@ -220,6 +216,19 @@ public: * The string is decoded on the place overwriting the content of raw-data. */ static void HttpUnquote(char* raw); + +#ifdef WIN32 + static bool Utf8ToAnsi(char* szBuffer, int iBufLen); + static bool AnsiToUtf8(char* szBuffer, int iBufLen); +#endif + + /* + * Converts ISO-8859-1 (aka Latin-1) into UTF-8. + * Returns new string allocated with malloc, it needs to be freed by caller. + */ + static char* Latin1ToUtf8(const char* szStr); + + static time_t ParseRfc822DateTime(const char* szDateTimeStr); }; class URL