/* * This file is part of nzbget. See . * * Copyright (C) 2004 Sven Henkel * Copyright (C) 2007-2016 Andrey Prygunkov * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "nzbget.h" #include "NzbFile.h" #include "Log.h" #include "DownloadInfo.h" #include "Options.h" #include "DiskState.h" #include "Util.h" #include "FileSystem.h" NzbFile::NzbFile(const char* fileName, const char* category) : m_fileName(fileName) { debug("Creating NZBFile"); m_nzbInfo = std::make_unique(); m_nzbInfo->SetFilename(fileName); m_nzbInfo->SetCategory(category); m_nzbInfo->BuildDestDirName(); } void NzbFile::LogDebugInfo() { info(" NZBFile %s", *m_fileName); } void NzbFile::AddArticle(FileInfo* fileInfo, std::unique_ptr articleInfo) { int index = articleInfo->GetPartNumber() - 1; // make Article-List big enough if (index >= (int)fileInfo->GetArticles()->size()) { fileInfo->GetArticles()->resize(index + 1); } (*fileInfo->GetArticles())[index] = std::move(articleInfo); } void NzbFile::AddFileInfo(std::unique_ptr fileInfo) { // calculate file size and delete empty articles int64 size = 0; int64 missedSize = 0; int64 oneSize = 0; int uncountedArticles = 0; int missedArticles = 0; int totalArticles = (int)fileInfo->GetArticles()->size(); int i = 0; for (ArticleList::iterator it = fileInfo->GetArticles()->begin(); it != fileInfo->GetArticles()->end(); ) { ArticleInfo* article = (*it).get(); if (!article) { fileInfo->GetArticles()->erase(it); it = fileInfo->GetArticles()->begin() + i; missedArticles++; if (oneSize > 0) { missedSize += oneSize; } else { uncountedArticles++; } } else { size += article->GetSize(); if (oneSize == 0) { oneSize = article->GetSize(); } it++; i++; } } if (fileInfo->GetArticles()->empty()) { return; } missedSize += uncountedArticles * oneSize; size += missedSize; fileInfo->SetNzbInfo(m_nzbInfo.get()); fileInfo->SetSize(size); fileInfo->SetRemainingSize(size - missedSize); fileInfo->SetMissedSize(missedSize); fileInfo->SetTotalArticles(totalArticles); fileInfo->SetMissedArticles(missedArticles); m_nzbInfo->GetFileList()->Add(std::move(fileInfo)); } void NzbFile::ParseSubject(FileInfo* fileInfo, bool TryQuotes) { // Example subject: some garbage "title" yEnc (10/99) if (!fileInfo->GetSubject()) { // Malformed file element without subject. We generate subject using internal element id. fileInfo->SetSubject(CString::FormatStr("%d", fileInfo->GetId())); } // strip the "yEnc (10/99)"-suffix BString<1024> subject = fileInfo->GetSubject(); char* end = subject + strlen(subject) - 1; if (*end == ')') { end--; while (strchr("0123456789", *end) && end > subject) end--; if (*end == '/') { end--; while (strchr("0123456789", *end) && end > subject) end--; if (end - 6 > subject && !strncmp(end - 6, " yEnc (", 7)) { end[-6] = '\0'; } } } if (TryQuotes) { // try to use the filename in quatation marks char* p = subject; char* start = strchr(p, '\"'); if (start) { start++; char* end = strchr(start + 1, '\"'); if (end) { int len = (int)(end - start); char* point = strchr(start + 1, '.'); if (point && point < end) { BString<1024> filename; filename.Set(start, len); fileInfo->SetFilename(filename); return; } } } } // tokenize subject, considering spaces as separators and quotation // marks as non separatable token delimiters. // then take the last token containing dot (".") as a filename typedef std::vector TokenList; TokenList tokens; // tokenizing char* p = subject; char* start = p; bool quot = false; while (true) { char ch = *p; bool sep = (ch == '\"') || (!quot && ch == ' ') || (ch == '\0'); if (sep) { // end of token int len = (int)(p - start); if (len > 0) { tokens.emplace_back(start, len); } start = p; if (ch != '\"' || quot) { start++; } quot = *start == '\"'; if (quot) { start++; char* q = strchr(start, '\"'); if (q) { p = q - 1; } else { quot = false; } } } if (ch == '\0') { break; } p++; } if (!tokens.empty()) { // finding the best candidate for being a filename char* besttoken = tokens.back(); for (TokenList::reverse_iterator it = tokens.rbegin(); it != tokens.rend(); it++) { char* s = *it; char* p = strchr(s, '.'); if (p && (p[1] != '\0')) { besttoken = s; break; } } fileInfo->SetFilename(besttoken); } else { // subject is empty or contains only separators? debug("Could not extract Filename from Subject: %s. Using Subject as Filename", fileInfo->GetSubject()); fileInfo->SetFilename(fileInfo->GetSubject()); } } bool NzbFile::HasDuplicateFilenames() { for (FileList::iterator it = m_nzbInfo->GetFileList()->begin(); it != m_nzbInfo->GetFileList()->end(); it++) { FileInfo* fileInfo1 = (*it).get(); int dupe = 1; for (FileList::iterator it2 = it + 1; it2 != m_nzbInfo->GetFileList()->end(); it2++) { FileInfo* fileInfo2 = (*it2).get(); if (!strcmp(fileInfo1->GetFilename(), fileInfo2->GetFilename()) && strcmp(fileInfo1->GetSubject(), fileInfo2->GetSubject())) { dupe++; } } // If more than two files have the same parsed filename but different subjects, // this means, that the parsing was not correct. // in this case we take subjects as filenames to prevent // false "duplicate files"-alarm. // It's Ok for just two files to have the same filename, this is // an often case by posting-errors to repost bad files if (dupe > 2 || (dupe == 2 && m_nzbInfo->GetFileList()->size() == 2)) { return true; } } return false; } /** * Generate filenames from subjects and check if the parsing of subject was correct */ void NzbFile::BuildFilenames() { for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { ParseSubject(fileInfo, true); } if (HasDuplicateFilenames()) { for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { ParseSubject(fileInfo, false); } } if (HasDuplicateFilenames()) { m_nzbInfo->SetManyDupeFiles(true); for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { fileInfo->SetFilename(fileInfo->GetSubject()); } } } void NzbFile::CalcHashes() { RawFileList sortedFiles; for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { sortedFiles.push_back(fileInfo); } std::sort(sortedFiles.begin(), sortedFiles.end(), [](FileInfo* first, FileInfo* second) { return strcmp(first->GetFilename(), second->GetFilename()) > 0; }); uint32 fullContentHash = 0; uint32 filteredContentHash = 0; int useForFilteredCount = 0; for (FileInfo* fileInfo : sortedFiles) { // check file extension bool skip = !fileInfo->GetParFile() && Util::MatchFileExt(fileInfo->GetFilename(), g_Options->GetParIgnoreExt(), ",;"); for (ArticleInfo* article: fileInfo->GetArticles()) { int len = strlen(article->GetMessageId()); fullContentHash = Util::HashBJ96(article->GetMessageId(), len, fullContentHash); if (!skip) { filteredContentHash = Util::HashBJ96(article->GetMessageId(), len, filteredContentHash); useForFilteredCount++; } } } // if filtered hash is based on less than a half of files - do not use filtered hash at all if (useForFilteredCount < (int)sortedFiles.size() / 2) { filteredContentHash = 0; } m_nzbInfo->SetFullContentHash(fullContentHash); m_nzbInfo->SetFilteredContentHash(filteredContentHash); } void NzbFile::ProcessFiles() { BuildFilenames(); for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { fileInfo->MakeValidFilename(); BString<1024> loFileName = fileInfo->GetFilename(); for (char* p = loFileName; *p; p++) *p = tolower(*p); // convert string to lowercase bool parFile = strstr(loFileName, ".par2"); m_nzbInfo->SetFileCount(m_nzbInfo->GetFileCount() + 1); m_nzbInfo->SetTotalArticles(m_nzbInfo->GetTotalArticles() + fileInfo->GetTotalArticles()); m_nzbInfo->SetFailedArticles(m_nzbInfo->GetFailedArticles() + fileInfo->GetMissedArticles()); m_nzbInfo->SetCurrentFailedArticles(m_nzbInfo->GetCurrentFailedArticles() + fileInfo->GetMissedArticles()); m_nzbInfo->SetSize(m_nzbInfo->GetSize() + fileInfo->GetSize()); m_nzbInfo->SetRemainingSize(m_nzbInfo->GetRemainingSize() + fileInfo->GetRemainingSize()); m_nzbInfo->SetFailedSize(m_nzbInfo->GetFailedSize() + fileInfo->GetMissedSize()); m_nzbInfo->SetCurrentFailedSize(m_nzbInfo->GetFailedSize()); fileInfo->SetParFile(parFile); if (parFile) { m_nzbInfo->SetParSize(m_nzbInfo->GetParSize() + fileInfo->GetSize()); m_nzbInfo->SetParFailedSize(m_nzbInfo->GetParFailedSize() + fileInfo->GetMissedSize()); m_nzbInfo->SetParCurrentFailedSize(m_nzbInfo->GetParFailedSize()); m_nzbInfo->SetRemainingParCount(m_nzbInfo->GetRemainingParCount() + 1); } } m_nzbInfo->UpdateMinMaxTime(); CalcHashes(); if (g_Options->GetServerMode()) { for (FileInfo* fileInfo : m_nzbInfo->GetFileList()) { g_DiskState->SaveFile(fileInfo); fileInfo->GetArticles()->clear(); } } if (m_password) { ReadPassword(); } } /** * Password read using XML-parser may have special characters (such as TAB) stripped. * This function rereads password directly from file to keep all characters intact. */ void NzbFile::ReadPassword() { DiskFile file; if (!file.Open(m_fileName, DiskFile::omRead)) { return; } // obtain file size. file.Seek(0, DiskFile::soEnd); int size = (int)file.Position(); file.Seek(0, DiskFile::soSet); // reading first 4KB of the file CharBuffer buf(4096); size = size < 4096 ? size : 4096; // copy the file into the buffer. file.Read(buf, size); file.Close(); buf[size-1] = '\0'; char* metaPassword = strstr(buf, ""); if (metaPassword) { metaPassword += 22; // length of '' char* end = strstr(metaPassword, ""); if (end) { *end = '\0'; WebUtil::XmlDecode(metaPassword); m_password = metaPassword; } } } #ifdef WIN32 bool NzbFile::Parse() { CoInitialize(nullptr); HRESULT hr; MSXML::IXMLDOMDocumentPtr doc; hr = doc.CreateInstance(MSXML::CLSID_DOMDocument); if (FAILED(hr)) { return false; } // Load the XML document file... doc->put_resolveExternals(VARIANT_FALSE); doc->put_validateOnParse(VARIANT_FALSE); doc->put_async(VARIANT_FALSE); _variant_t vFilename(*WString(*m_fileName)); // 1. first trying to load via filename without URL-encoding (certain charaters doesn't work when encoded) VARIANT_BOOL success = doc->load(vFilename); if (success == VARIANT_FALSE) { // 2. now trying filename encoded as URL char url[2048]; EncodeUrl(m_fileName, url, 2048); debug("url=\"%s\"", url); _variant_t vUrl(url); success = doc->load(vUrl); } if (success == VARIANT_FALSE) { _bstr_t r(doc->GetparseError()->reason); const char* errMsg = r; m_nzbInfo->AddMessage(Message::mkError, BString<1024>("Error parsing nzb-file %s: %s", FileSystem::BaseFileName(m_fileName), errMsg)); return false; } if (!ParseNzb(doc)) { return false; } if (m_nzbInfo->GetFileList()->empty()) { m_nzbInfo->AddMessage(Message::mkError, BString<1024>( "Error parsing nzb-file %s: file has no content", FileSystem::BaseFileName(m_fileName))); return false; } ProcessFiles(); return true; } void NzbFile::EncodeUrl(const char* filename, char* url, int bufLen) { WString widefilename(filename); char* end = url + bufLen; for (wchar_t* p = widefilename; *p && url < end - 3; p++) { wchar_t ch = *p; if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '-' || ch == '.' || ch == '_' || ch == '~') { *url++ = (char)ch; } else { *url++ = '%'; uint32 a = (uint32)ch >> 4; *url++ = a > 9 ? a - 10 + 'A' : a + '0'; a = ch & 0xF; *url++ = a > 9 ? a - 10 + 'A' : a + '0'; } } *url = '\0'; } bool NzbFile::ParseNzb(IUnknown* nzb) { MSXML::IXMLDOMDocumentPtr doc = nzb; MSXML::IXMLDOMNodePtr root = doc->documentElement; MSXML::IXMLDOMNodePtr node = root->selectSingleNode("/nzb/head/meta[@type='password']"); if (node) { _bstr_t password(node->Gettext()); m_password = password; } MSXML::IXMLDOMNodeListPtr fileList = root->selectNodes("/nzb/file"); for (int i = 0; i < fileList->Getlength(); i++) { node = fileList->Getitem(i); MSXML::IXMLDOMNodePtr attribute = node->Getattributes()->getNamedItem("subject"); if (!attribute) return false; _bstr_t subject(attribute->Gettext()); std::unique_ptr fileInfo = std::make_unique(); fileInfo->SetSubject(subject); attribute = node->Getattributes()->getNamedItem("date"); if (attribute) { _bstr_t date(attribute->Gettext()); fileInfo->SetTime(atoi(date)); } MSXML::IXMLDOMNodeListPtr groupList = node->selectNodes("groups/group"); for (int g = 0; g < groupList->Getlength(); g++) { MSXML::IXMLDOMNodePtr node = groupList->Getitem(g); _bstr_t group = node->Gettext(); fileInfo->GetGroups()->push_back((const char*)group); } MSXML::IXMLDOMNodeListPtr segmentList = node->selectNodes("segments/segment"); for (int g = 0; g < segmentList->Getlength(); g++) { MSXML::IXMLDOMNodePtr node = segmentList->Getitem(g); _bstr_t bid = node->Gettext(); BString<1024> id("<%s>", (const char*)bid); MSXML::IXMLDOMNodePtr attribute = node->Getattributes()->getNamedItem("number"); if (!attribute) return false; _bstr_t number(attribute->Gettext()); attribute = node->Getattributes()->getNamedItem("bytes"); if (!attribute) return false; _bstr_t bytes(attribute->Gettext()); int partNumber = atoi(number); int lsize = atoi(bytes); if (partNumber > 0) { std::unique_ptr article = std::make_unique(); article->SetPartNumber(partNumber); article->SetMessageId(id); article->SetSize(lsize); AddArticle(fileInfo.get(), std::move(article)); } } AddFileInfo(std::move(fileInfo)); } return true; } #else bool NzbFile::Parse() { #ifdef DISABLE_LIBXML2 error("Could not parse rss feed, program was compiled without libxml2 support"); return false; #else xmlSAXHandler SAX_handler = {0}; SAX_handler.startElement = reinterpret_cast(SAX_StartElement); SAX_handler.endElement = reinterpret_cast(SAX_EndElement); SAX_handler.characters = reinterpret_cast(SAX_characters); SAX_handler.error = reinterpret_cast(SAX_error); SAX_handler.getEntity = reinterpret_cast(SAX_getEntity); m_ignoreNextError = false; int ret = xmlSAXUserParseFile(&SAX_handler, this, m_fileName); if (ret != 0) { m_nzbInfo->AddMessage(Message::mkError, BString<1024>( "Error parsing nzb-file %s", FileSystem::BaseFileName(m_fileName))); return false; } if (m_nzbInfo->GetFileList()->empty()) { m_nzbInfo->AddMessage(Message::mkError, BString<1024>( "Error parsing nzb-file %s: file has no content", FileSystem::BaseFileName(m_fileName))); return false; } ProcessFiles(); return true; #endif } void NzbFile::Parse_StartElement(const char *name, const char **atts) { BString<1024> tagAttrMessage("Malformed nzb-file, tag <%s> must have attributes", name); m_tagContent.Clear(); if (!strcmp("file", name)) { m_fileInfo = std::make_unique(); m_fileInfo->SetFilename(m_fileName); if (!atts) { m_nzbInfo->AddMessage(Message::mkWarning, tagAttrMessage); return; } for (int i = 0; atts[i]; i += 2) { const char* attrname = atts[i]; const char* attrvalue = atts[i + 1]; if (!strcmp("subject", attrname)) { m_fileInfo->SetSubject(attrvalue); } if (!strcmp("date", attrname)) { m_fileInfo->SetTime(atoi(attrvalue)); } } } else if (!strcmp("segment", name)) { if (!m_fileInfo) { m_nzbInfo->AddMessage(Message::mkWarning, "Malformed nzb-file, tag without tag "); return; } if (!atts) { m_nzbInfo->AddMessage(Message::mkWarning, tagAttrMessage); return; } int64 lsize = -1; int partNumber = -1; for (int i = 0; atts[i]; i += 2) { const char* attrname = atts[i]; const char* attrvalue = atts[i + 1]; if (!strcmp("bytes", attrname)) { lsize = atol(attrvalue); } if (!strcmp("number", attrname)) { partNumber = atol(attrvalue); } } if (partNumber > 0) { // new segment, add it! std::unique_ptr article = std::make_unique(); article->SetPartNumber(partNumber); article->SetSize(lsize); m_article = article.get(); AddArticle(m_fileInfo.get(), std::move(article)); } } else if (!strcmp("meta", name)) { if (!atts) { m_nzbInfo->AddMessage(Message::mkWarning, tagAttrMessage); return; } m_hasPassword = atts[0] && atts[1] && !strcmp("type", atts[0]) && !strcmp("password", atts[1]); } } void NzbFile::Parse_EndElement(const char *name) { if (!strcmp("file", name)) { // Close the file element, add the new file to file-list AddFileInfo(std::move(m_fileInfo)); m_article = nullptr; } else if (!strcmp("group", name)) { if (!m_fileInfo) { // error: bad nzb-file return; } m_fileInfo->GetGroups()->push_back(*m_tagContent); m_tagContent.Clear(); } else if (!strcmp("segment", name)) { if (!m_fileInfo || !m_article) { // error: bad nzb-file return; } // Get the #text part BString<1024> id("<%s>", *m_tagContent); m_article->SetMessageId(id); m_article = nullptr; } else if (!strcmp("meta", name) && m_hasPassword) { m_password = m_tagContent; } } void NzbFile::Parse_Content(const char *buf, int len) { m_tagContent.Append(buf, len); } void NzbFile::SAX_StartElement(NzbFile* file, const char *name, const char **atts) { file->Parse_StartElement(name, atts); } void NzbFile::SAX_EndElement(NzbFile* file, const char *name) { file->Parse_EndElement(name); } void NzbFile::SAX_characters(NzbFile* file, const char * xmlstr, int len) { char* str = (char*)xmlstr; // trim starting blanks int off = 0; for (int i = 0; i < len; i++) { char ch = str[i]; if (ch == ' ' || ch == 10 || ch == 13 || ch == 9) { off++; } else { break; } } int newlen = len - off; // trim ending blanks for (int i = len - 1; i >= off; i--) { char ch = str[i]; if (ch == ' ' || ch == 10 || ch == 13 || ch == 9) { newlen--; } else { break; } } if (newlen > 0) { // interpret tag content file->Parse_Content(str + off, newlen); } } void* NzbFile::SAX_getEntity(NzbFile* file, const char * name) { #ifdef DISABLE_LIBXML2 void* e = nullptr; #else xmlEntityPtr e = xmlGetPredefinedEntity((xmlChar* )name); #endif if (!e) { file->m_nzbInfo->AddMessage(Message::mkWarning, "entity not found"); file->m_ignoreNextError = true; } return e; } void NzbFile::SAX_error(NzbFile* file, const char *msg, ...) { if (file->m_ignoreNextError) { file->m_ignoreNextError = false; return; } va_list argp; va_start(argp, msg); char errMsg[1024]; vsnprintf(errMsg, sizeof(errMsg), msg, argp); errMsg[1024-1] = '\0'; va_end(argp); // remove trailing CRLF for (char* pend = errMsg + strlen(errMsg) - 1; pend >= errMsg && (*pend == '\n' || *pend == '\r' || *pend == ' '); pend--) *pend = '\0'; file->m_nzbInfo->AddMessage(Message::mkError, BString<1024>("Error parsing nzb-file: %s", errMsg)); } #endif