mirror of
https://github.com/nzbget/nzbget.git
synced 2026-01-05 20:48:09 -05:00
574 lines
14 KiB
C++
574 lines
14 KiB
C++
/*
|
|
* This file is part of nzbget
|
|
*
|
|
* Copyright (C) 2004 Sven Henkel <sidddy@users.sourceforge.net>
|
|
* Copyright (C) 2007 Andrei Prygounkov <hugbug@users.sourceforge.net>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*
|
|
* $Revision$
|
|
* $Date$
|
|
*
|
|
*/
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#ifdef WIN32
|
|
#include "win32.h"
|
|
#endif
|
|
|
|
#include <string.h>
|
|
#include <list>
|
|
#ifdef WIN32
|
|
#include <comutil.h>
|
|
#import "MSXML.dll" named_guids
|
|
using namespace MSXML;
|
|
#else
|
|
#include <libxml/parser.h>
|
|
#include <libxml/xmlreader.h>
|
|
#endif
|
|
|
|
#include "nzbget.h"
|
|
#include "NZBFile.h"
|
|
#include "Log.h"
|
|
#include "DownloadInfo.h"
|
|
#include "Options.h"
|
|
#include "DiskState.h"
|
|
#include "Util.h"
|
|
|
|
extern Options* g_pOptions;
|
|
extern DiskState* g_pDiskState;
|
|
|
|
NZBFile::NZBFile(const char* szFileName, const char* szCategory)
|
|
{
|
|
debug("Creating NZBFile");
|
|
|
|
m_szFileName = strdup(szFileName);
|
|
m_pNZBInfo = new NZBInfo();
|
|
m_pNZBInfo->AddReference();
|
|
m_pNZBInfo->SetFilename(szFileName);
|
|
m_pNZBInfo->SetCategory(szCategory);
|
|
m_pNZBInfo->BuildDestDirName();
|
|
|
|
m_FileInfos.clear();
|
|
}
|
|
|
|
NZBFile::~NZBFile()
|
|
{
|
|
debug("Destroying NZBFile");
|
|
|
|
// Cleanup
|
|
if (m_szFileName)
|
|
{
|
|
free(m_szFileName);
|
|
}
|
|
|
|
for (FileInfos::iterator it = m_FileInfos.begin(); it != m_FileInfos.end(); it++)
|
|
{
|
|
delete *it;
|
|
}
|
|
m_FileInfos.clear();
|
|
|
|
if (m_pNZBInfo)
|
|
{
|
|
m_pNZBInfo->Release();
|
|
}
|
|
}
|
|
|
|
void NZBFile::LogDebugInfo()
|
|
{
|
|
debug(" NZBFile %s", m_szFileName);
|
|
}
|
|
|
|
void NZBFile::DetachFileInfos()
|
|
{
|
|
m_FileInfos.clear();
|
|
}
|
|
|
|
NZBFile* NZBFile::CreateFromBuffer(const char* szFileName, const char* szCategory, const char* szBuffer, int iSize)
|
|
{
|
|
return Create(szFileName, szCategory, szBuffer, iSize, true);
|
|
}
|
|
|
|
NZBFile* NZBFile::CreateFromFile(const char* szFileName, const char* szCategory)
|
|
{
|
|
return Create(szFileName, szCategory, NULL, 0, false);
|
|
}
|
|
|
|
void NZBFile::AddArticle(FileInfo* pFileInfo, ArticleInfo* pArticleInfo)
|
|
{
|
|
// make Article-List big enough
|
|
while ((int)pFileInfo->GetArticles()->size() < pArticleInfo->GetPartNumber())
|
|
pFileInfo->GetArticles()->push_back(NULL);
|
|
|
|
(*pFileInfo->GetArticles())[pArticleInfo->GetPartNumber() - 1] = pArticleInfo;
|
|
}
|
|
|
|
void NZBFile::AddFileInfo(FileInfo* pFileInfo)
|
|
{
|
|
// deleting empty articles
|
|
FileInfo::Articles* pArticles = pFileInfo->GetArticles();
|
|
int i = 0;
|
|
for (FileInfo::Articles::iterator it = pArticles->begin(); it != pArticles->end();)
|
|
{
|
|
if (*it == NULL)
|
|
{
|
|
pArticles->erase(it);
|
|
it = pArticles->begin() + i;
|
|
}
|
|
else
|
|
{
|
|
it++;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
if (!pArticles->empty())
|
|
{
|
|
ParseSubject(pFileInfo);
|
|
m_FileInfos.push_back(pFileInfo);
|
|
pFileInfo->SetNZBInfo(m_pNZBInfo);
|
|
m_pNZBInfo->SetSize(m_pNZBInfo->GetSize() + pFileInfo->GetSize());
|
|
m_pNZBInfo->SetFileCount(m_pNZBInfo->GetFileCount() + 1);
|
|
|
|
if (g_pOptions->GetSaveQueue() && g_pOptions->GetServerMode())
|
|
{
|
|
g_pDiskState->SaveFile(pFileInfo);
|
|
pFileInfo->ClearArticles();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
delete pFileInfo;
|
|
}
|
|
}
|
|
|
|
void NZBFile::ParseSubject(FileInfo* pFileInfo)
|
|
{
|
|
// tokenize subject, considering spaces as separators and quotation
|
|
// marks as non separatable token delimiters.
|
|
// then take the last token containing dot (".") as a filename
|
|
|
|
typedef std::list<char*> TokenList;
|
|
TokenList tokens;
|
|
tokens.clear();
|
|
|
|
// tokenizing
|
|
char* p = (char*)pFileInfo->GetSubject();
|
|
char* start = p;
|
|
bool quot = false;
|
|
while (true)
|
|
{
|
|
char ch = *p;
|
|
bool sep = (ch == '\"') || (!quot && ch == ' ') || (ch == '\0');
|
|
if (sep)
|
|
{
|
|
// end of token
|
|
int len = (int)(p - start);
|
|
if (len > 0)
|
|
{
|
|
char* token = (char*)malloc(len + 1);
|
|
strncpy(token, start, len);
|
|
token[len] = '\0';
|
|
tokens.push_back(token);
|
|
}
|
|
start = p;
|
|
if (ch != '\"' || quot)
|
|
{
|
|
start++;
|
|
}
|
|
quot = *start == '\"';
|
|
if (quot)
|
|
{
|
|
start++;
|
|
char* q = strchr(start, '\"');
|
|
if (q)
|
|
{
|
|
p = q - 1;
|
|
}
|
|
else
|
|
{
|
|
quot = false;
|
|
}
|
|
}
|
|
}
|
|
if (ch == '\0')
|
|
{
|
|
break;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
if (!tokens.empty())
|
|
{
|
|
// finding the best candidate for being a filename
|
|
char* besttoken = tokens.back();
|
|
for (TokenList::reverse_iterator it = tokens.rbegin(); it != tokens.rend(); it++)
|
|
{
|
|
char* s = *it;
|
|
char* p = strchr(s, '.');
|
|
if (p && (p[1] != '\0'))
|
|
{
|
|
besttoken = s;
|
|
break;
|
|
}
|
|
}
|
|
pFileInfo->SetFilename(besttoken);
|
|
|
|
// free mem
|
|
for (TokenList::iterator it = tokens.begin(); it != tokens.end(); it++)
|
|
{
|
|
free(*it);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// subject is empty or contains only separators?
|
|
debug("Could not extract Filename from Subject: %s. Using Subject as Filename", pFileInfo->GetSubject());
|
|
pFileInfo->SetFilename(pFileInfo->GetSubject());
|
|
}
|
|
|
|
pFileInfo->MakeValidFilename();
|
|
}
|
|
|
|
/**
|
|
* Check if the parsing of subject was correct
|
|
*/
|
|
void NZBFile::CheckFilenames()
|
|
{
|
|
for (FileInfos::iterator it = m_FileInfos.begin(); it != m_FileInfos.end(); it++)
|
|
{
|
|
FileInfo* pFileInfo1 = *it;
|
|
int iDupe = 0;
|
|
for (FileInfos::iterator it2 = it + 1; it2 != m_FileInfos.end(); it2++)
|
|
{
|
|
FileInfo* pFileInfo2 = *it2;
|
|
if (!strcmp(pFileInfo1->GetFilename(), pFileInfo2->GetFilename()) &&
|
|
strcmp(pFileInfo1->GetSubject(), pFileInfo2->GetSubject()))
|
|
{
|
|
iDupe++;
|
|
}
|
|
}
|
|
|
|
// If more than two files have the same parsed filename but different subjects,
|
|
// this means, that the parsing was not correct.
|
|
// in this case we take subjects as filenames to prevent
|
|
// false "duplicate files"-alarm.
|
|
// It's Ok for just two files to have the same filename, this is
|
|
// an often case by posting-errors to repost bad files
|
|
if (iDupe > 2 || (iDupe == 2 && m_FileInfos.size() == 2))
|
|
{
|
|
for (FileInfos::iterator it2 = it; it2 != m_FileInfos.end(); it2++)
|
|
{
|
|
FileInfo* pFileInfo2 = *it2;
|
|
pFileInfo2->SetFilename(pFileInfo2->GetSubject());
|
|
pFileInfo2->MakeValidFilename();
|
|
|
|
if (g_pOptions->GetSaveQueue() && g_pOptions->GetServerMode())
|
|
{
|
|
g_pDiskState->LoadArticles(pFileInfo2);
|
|
g_pDiskState->SaveFile(pFileInfo2);
|
|
pFileInfo2->ClearArticles();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef WIN32
|
|
NZBFile* NZBFile::Create(const char* szFileName, const char* szCategory, const char* szBuffer, int iSize, bool bFromBuffer)
|
|
{
|
|
CoInitialize(NULL);
|
|
|
|
HRESULT hr;
|
|
|
|
MSXML::IXMLDOMDocumentPtr doc;
|
|
hr = doc.CreateInstance(MSXML::CLSID_DOMDocument);
|
|
if (FAILED(hr))
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
// Load the XML document file...
|
|
doc->put_resolveExternals(VARIANT_FALSE);
|
|
doc->put_validateOnParse(VARIANT_FALSE);
|
|
doc->put_async(VARIANT_FALSE);
|
|
VARIANT_BOOL success;
|
|
if (bFromBuffer)
|
|
{
|
|
success = doc->loadXML(szBuffer);
|
|
}
|
|
else
|
|
{
|
|
// filename needs to be properly encoded
|
|
char* szURL = (char*)malloc(strlen(szFileName)*3 + 1);
|
|
EncodeURL(szFileName, szURL);
|
|
debug("url=\"%s\"", szURL);
|
|
_variant_t v(szURL);
|
|
free(szURL);
|
|
success = doc->load(v);
|
|
}
|
|
if (success == VARIANT_FALSE)
|
|
{
|
|
_bstr_t r(doc->GetparseError()->reason);
|
|
const char* szErrMsg = r;
|
|
error("Error parsing nzb-file: %s", szErrMsg);
|
|
return NULL;
|
|
}
|
|
|
|
NZBFile* pFile = new NZBFile(szFileName, szCategory);
|
|
if (pFile->ParseNZB(doc))
|
|
{
|
|
pFile->CheckFilenames();
|
|
}
|
|
else
|
|
{
|
|
delete pFile;
|
|
pFile = NULL;
|
|
}
|
|
|
|
return pFile;
|
|
}
|
|
|
|
void NZBFile::EncodeURL(const char* szFilename, char* szURL)
|
|
{
|
|
while (char ch = *szFilename++)
|
|
{
|
|
if (('0' <= ch && ch <= '9') ||
|
|
('a' <= ch && ch <= 'z') ||
|
|
('A' <= ch && ch <= 'Z') )
|
|
{
|
|
*szURL++ = ch;
|
|
}
|
|
else
|
|
{
|
|
*szURL++ = '%';
|
|
int a = ch >> 4;
|
|
*szURL++ = a > 9 ? a - 10 + 'a' : a + '0';
|
|
a = ch & 0xF;
|
|
*szURL++ = a > 9 ? a - 10 + 'a' : a + '0';
|
|
}
|
|
}
|
|
*szURL = NULL;
|
|
}
|
|
|
|
bool NZBFile::ParseNZB(IUnknown* nzb)
|
|
{
|
|
MSXML::IXMLDOMDocumentPtr doc = nzb;
|
|
MSXML::IXMLDOMNodePtr root = doc->documentElement;
|
|
|
|
MSXML::IXMLDOMNodeListPtr fileList = root->selectNodes("/nzb/file");
|
|
for (int i = 0; i < fileList->Getlength(); i++)
|
|
{
|
|
MSXML::IXMLDOMNodePtr node = fileList->Getitem(i);
|
|
MSXML::IXMLDOMNodePtr attribute = node->Getattributes()->getNamedItem("subject");
|
|
if (!attribute) return false;
|
|
_bstr_t subject(attribute->Gettext());
|
|
FileInfo* pFileInfo = new FileInfo();
|
|
pFileInfo->SetSubject(subject);
|
|
|
|
MSXML::IXMLDOMNodeListPtr groupList = node->selectNodes("groups/group");
|
|
for (int g = 0; g < groupList->Getlength(); g++)
|
|
{
|
|
MSXML::IXMLDOMNodePtr node = groupList->Getitem(g);
|
|
_bstr_t group = node->Gettext();
|
|
pFileInfo->GetGroups()->push_back(strdup((const char*)group));
|
|
}
|
|
|
|
MSXML::IXMLDOMNodeListPtr segmentList = node->selectNodes("segments/segment");
|
|
for (int g = 0; g < segmentList->Getlength(); g++)
|
|
{
|
|
MSXML::IXMLDOMNodePtr node = segmentList->Getitem(g);
|
|
_bstr_t id = node->Gettext();
|
|
char szId[2048];
|
|
snprintf(szId, 2048, "<%s>", (const char*)id);
|
|
|
|
MSXML::IXMLDOMNodePtr attribute = node->Getattributes()->getNamedItem("number");
|
|
if (!attribute) return false;
|
|
_bstr_t number(attribute->Gettext());
|
|
|
|
attribute = node->Getattributes()->getNamedItem("bytes");
|
|
if (!attribute) return false;
|
|
_bstr_t bytes(attribute->Gettext());
|
|
|
|
int partNumber = atoi(number);
|
|
int lsize = atoi(bytes);
|
|
|
|
ArticleInfo* pArticle = new ArticleInfo();
|
|
pArticle->SetPartNumber(partNumber);
|
|
pArticle->SetMessageID(szId);
|
|
pArticle->SetSize(lsize);
|
|
AddArticle(pFileInfo, pArticle);
|
|
|
|
if (lsize > 0)
|
|
{
|
|
pFileInfo->SetSize(pFileInfo->GetSize() + lsize);
|
|
}
|
|
}
|
|
|
|
AddFileInfo(pFileInfo);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#else
|
|
|
|
NZBFile* NZBFile::Create(const char* szFileName, const char* szCategory, const char* szBuffer, int iSize, bool bFromBuffer)
|
|
{
|
|
xmlTextReaderPtr doc;
|
|
if (bFromBuffer)
|
|
{
|
|
doc = xmlReaderForMemory(szBuffer, iSize-1, "", NULL, 0);
|
|
}
|
|
else
|
|
{
|
|
doc = xmlReaderForFile(szFileName, NULL, 0);
|
|
}
|
|
if (!doc)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
NZBFile* pFile = new NZBFile(szFileName, szCategory);
|
|
if (pFile->ParseNZB(doc))
|
|
{
|
|
pFile->CheckFilenames();
|
|
}
|
|
else
|
|
{
|
|
delete pFile;
|
|
pFile = NULL;
|
|
}
|
|
|
|
xmlFreeTextReader(doc);
|
|
|
|
return pFile;
|
|
}
|
|
|
|
bool NZBFile::ParseNZB(void* nzb)
|
|
{
|
|
FileInfo* pFileInfo = NULL;
|
|
xmlTextReaderPtr node = (xmlTextReaderPtr)nzb;
|
|
// walk through whole doc and search for segments-tags
|
|
int ret = xmlTextReaderRead(node);
|
|
while (ret == 1)
|
|
{
|
|
if (node)
|
|
{
|
|
xmlChar *name, *value;
|
|
|
|
name = xmlTextReaderName(node);
|
|
if (name == NULL)
|
|
{
|
|
name = xmlStrdup(BAD_CAST "--");
|
|
}
|
|
value = xmlTextReaderValue(node);
|
|
|
|
if (xmlTextReaderNodeType(node) == 1)
|
|
{
|
|
if (!strcmp("file", (char*)name))
|
|
{
|
|
pFileInfo = new FileInfo();
|
|
pFileInfo->SetFilename(m_szFileName);
|
|
|
|
while (xmlTextReaderMoveToNextAttribute(node))
|
|
{
|
|
xmlFree(name);
|
|
name = xmlTextReaderName(node);
|
|
if (!strcmp("subject",(char*)name))
|
|
{
|
|
xmlFree(value);
|
|
value = xmlTextReaderValue(node);
|
|
pFileInfo->SetSubject((char*)value);
|
|
}
|
|
}
|
|
}
|
|
else if (!strcmp("segment",(char*)name))
|
|
{
|
|
long long lsize = -1;
|
|
int partNumber = -1;
|
|
|
|
while (xmlTextReaderMoveToNextAttribute(node))
|
|
{
|
|
xmlFree(name);
|
|
name = xmlTextReaderName(node);
|
|
xmlFree(value);
|
|
value = xmlTextReaderValue(node);
|
|
if (!strcmp("bytes",(char*)name))
|
|
{
|
|
lsize = atol((char*)value);
|
|
}
|
|
if (!strcmp("number",(char*)name))
|
|
{
|
|
partNumber = atol((char*)value);
|
|
}
|
|
}
|
|
if (lsize > 0)
|
|
{
|
|
pFileInfo->SetSize(pFileInfo->GetSize() + lsize);
|
|
}
|
|
|
|
/* Get the #text part */
|
|
ret = xmlTextReaderRead(node);
|
|
|
|
if (partNumber > 0)
|
|
{
|
|
// new segment, add it!
|
|
xmlFree(value);
|
|
value = xmlTextReaderValue(node);
|
|
char tmp[2048];
|
|
snprintf(tmp, 2048, "<%s>", (char*)value);
|
|
ArticleInfo* pArticle = new ArticleInfo();
|
|
pArticle->SetPartNumber(partNumber);
|
|
pArticle->SetMessageID(tmp);
|
|
pArticle->SetSize(lsize);
|
|
AddArticle(pFileInfo, pArticle);
|
|
}
|
|
}
|
|
else if (!strcmp("group",(char*)name))
|
|
{
|
|
ret = xmlTextReaderRead(node);
|
|
xmlFree(value);
|
|
value = xmlTextReaderValue(node);
|
|
pFileInfo->GetGroups()->push_back(strdup((char*)value));
|
|
}
|
|
}
|
|
|
|
if (xmlTextReaderNodeType(node) == 15)
|
|
{
|
|
/* Close the file element, add the new file to file-list */
|
|
if (!strcmp("file",(char*)name))
|
|
{
|
|
AddFileInfo(pFileInfo);
|
|
}
|
|
}
|
|
|
|
xmlFree(name);
|
|
xmlFree(value);
|
|
}
|
|
ret = xmlTextReaderRead(node);
|
|
}
|
|
if (ret != 0)
|
|
{
|
|
error("Failed to parse nzb-file\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
#endif
|