#362: computing 16k-hashes for downloaded files

This commit is contained in:
Andrey Prygunkov
2017-04-17 15:32:32 +02:00
parent 3ac91a4bb6
commit f7be22893d
9 changed files with 184 additions and 7 deletions

View File

@@ -108,6 +108,8 @@ nzbget_SOURCES = \
daemon/postprocess/Repair.h \
daemon/postprocess/Unpack.cpp \
daemon/postprocess/Unpack.h \
daemon/queue/DirectRenamer.cpp \
daemon/queue/DirectRenamer.h \
daemon/queue/DiskState.cpp \
daemon/queue/DiskState.h \
daemon/queue/DownloadInfo.cpp \

31
Makefile.in vendored
View File

@@ -208,7 +208,8 @@ am__nzbget_SOURCES_DIST = daemon/connect/Connection.cpp \
daemon/postprocess/RarReader.h daemon/postprocess/Rename.cpp \
daemon/postprocess/Rename.h daemon/postprocess/Repair.cpp \
daemon/postprocess/Repair.h daemon/postprocess/Unpack.cpp \
daemon/postprocess/Unpack.h daemon/queue/DiskState.cpp \
daemon/postprocess/Unpack.h daemon/queue/DirectRenamer.cpp \
daemon/queue/DirectRenamer.h daemon/queue/DiskState.cpp \
daemon/queue/DiskState.h daemon/queue/DownloadInfo.cpp \
daemon/queue/DownloadInfo.h daemon/queue/DupeCoordinator.cpp \
daemon/queue/DupeCoordinator.h \
@@ -313,11 +314,11 @@ am_nzbget_OBJECTS = Connection.$(OBJEXT) TlsSocket.$(OBJEXT) \
ParChecker.$(OBJEXT) ParParser.$(OBJEXT) ParRenamer.$(OBJEXT) \
PrePostProcessor.$(OBJEXT) RarRenamer.$(OBJEXT) \
RarReader.$(OBJEXT) Rename.$(OBJEXT) Repair.$(OBJEXT) \
Unpack.$(OBJEXT) DiskState.$(OBJEXT) DownloadInfo.$(OBJEXT) \
DupeCoordinator.$(OBJEXT) HistoryCoordinator.$(OBJEXT) \
NzbFile.$(OBJEXT) QueueCoordinator.$(OBJEXT) \
QueueEditor.$(OBJEXT) Scanner.$(OBJEXT) \
UrlCoordinator.$(OBJEXT) BinRpc.$(OBJEXT) \
Unpack.$(OBJEXT) DirectRenamer.$(OBJEXT) DiskState.$(OBJEXT) \
DownloadInfo.$(OBJEXT) DupeCoordinator.$(OBJEXT) \
HistoryCoordinator.$(OBJEXT) NzbFile.$(OBJEXT) \
QueueCoordinator.$(OBJEXT) QueueEditor.$(OBJEXT) \
Scanner.$(OBJEXT) UrlCoordinator.$(OBJEXT) BinRpc.$(OBJEXT) \
RemoteClient.$(OBJEXT) RemoteServer.$(OBJEXT) \
WebServer.$(OBJEXT) XmlRpc.$(OBJEXT) Log.$(OBJEXT) \
NString.$(OBJEXT) Observer.$(OBJEXT) Script.$(OBJEXT) \
@@ -539,7 +540,8 @@ nzbget_SOURCES = daemon/connect/Connection.cpp \
daemon/postprocess/RarReader.h daemon/postprocess/Rename.cpp \
daemon/postprocess/Rename.h daemon/postprocess/Repair.cpp \
daemon/postprocess/Repair.h daemon/postprocess/Unpack.cpp \
daemon/postprocess/Unpack.h daemon/queue/DiskState.cpp \
daemon/postprocess/Unpack.h daemon/queue/DirectRenamer.cpp \
daemon/queue/DirectRenamer.h daemon/queue/DiskState.cpp \
daemon/queue/DiskState.h daemon/queue/DownloadInfo.cpp \
daemon/queue/DownloadInfo.h daemon/queue/DupeCoordinator.cpp \
daemon/queue/DupeCoordinator.h \
@@ -868,6 +870,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/CommandScript.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Connection.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Decoder.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/DirectRenamer.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/DiskService.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/DiskState.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/DownloadInfo.Po@am__quote@
@@ -1595,6 +1598,20 @@ Unpack.obj: daemon/postprocess/Unpack.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o Unpack.obj `if test -f 'daemon/postprocess/Unpack.cpp'; then $(CYGPATH_W) 'daemon/postprocess/Unpack.cpp'; else $(CYGPATH_W) '$(srcdir)/daemon/postprocess/Unpack.cpp'; fi`
DirectRenamer.o: daemon/queue/DirectRenamer.cpp
@am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT DirectRenamer.o -MD -MP -MF "$(DEPDIR)/DirectRenamer.Tpo" -c -o DirectRenamer.o `test -f 'daemon/queue/DirectRenamer.cpp' || echo '$(srcdir)/'`daemon/queue/DirectRenamer.cpp; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/DirectRenamer.Tpo" "$(DEPDIR)/DirectRenamer.Po"; else rm -f "$(DEPDIR)/DirectRenamer.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='daemon/queue/DirectRenamer.cpp' object='DirectRenamer.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o DirectRenamer.o `test -f 'daemon/queue/DirectRenamer.cpp' || echo '$(srcdir)/'`daemon/queue/DirectRenamer.cpp
DirectRenamer.obj: daemon/queue/DirectRenamer.cpp
@am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT DirectRenamer.obj -MD -MP -MF "$(DEPDIR)/DirectRenamer.Tpo" -c -o DirectRenamer.obj `if test -f 'daemon/queue/DirectRenamer.cpp'; then $(CYGPATH_W) 'daemon/queue/DirectRenamer.cpp'; else $(CYGPATH_W) '$(srcdir)/daemon/queue/DirectRenamer.cpp'; fi`; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/DirectRenamer.Tpo" "$(DEPDIR)/DirectRenamer.Po"; else rm -f "$(DEPDIR)/DirectRenamer.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='daemon/queue/DirectRenamer.cpp' object='DirectRenamer.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o DirectRenamer.obj `if test -f 'daemon/queue/DirectRenamer.cpp'; then $(CYGPATH_W) 'daemon/queue/DirectRenamer.cpp'; else $(CYGPATH_W) '$(srcdir)/daemon/queue/DirectRenamer.cpp'; fi`
DiskState.o: daemon/queue/DiskState.cpp
@am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT DiskState.o -MD -MP -MF "$(DEPDIR)/DiskState.Tpo" -c -o DiskState.o `test -f 'daemon/queue/DiskState.cpp' || echo '$(srcdir)/'`daemon/queue/DiskState.cpp; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/DiskState.Tpo" "$(DEPDIR)/DiskState.Po"; else rm -f "$(DEPDIR)/DiskState.Tpo"; exit 1; fi

View File

@@ -296,6 +296,11 @@ ArticleDownloader::EStatus ArticleDownloader::Download()
m_writingStarted = false;
m_articleInfo->SetCrc(0);
if (m_contentAnalyzer)
{
m_contentAnalyzer->Reset();
}
if (m_connection->GetNewsServer()->GetJoinGroup())
{
// change group
@@ -541,6 +546,11 @@ bool ArticleDownloader::Write(char* line, int len)
bool ok = len == 0 || m_articleWriter.Write(line, len);
if (m_contentAnalyzer)
{
m_contentAnalyzer->Append(line, len);
}
return ok;
}

View File

@@ -29,6 +29,15 @@
#include "NntpConnection.h"
#include "Decoder.h"
#include "ArticleWriter.h"
#include "Util.h"
class ArticleContentAnalyzer
{
public:
virtual ~ArticleContentAnalyzer() {};
virtual void Reset() = 0;
virtual void Append(const void* buffer, int len) = 0;
};
class ArticleDownloader : public Thread, public Subject
{
@@ -77,6 +86,8 @@ public:
void SetConnection(NntpConnection* connection) { m_connection = connection; }
void CompleteFileParts() { m_articleWriter.CompleteFileParts(); }
int GetDownloadedSize() { return m_downloadedSize; }
void SetContentAnalyzer(std::unique_ptr<ArticleContentAnalyzer> contentAnalyzer) { m_contentAnalyzer = std::move(contentAnalyzer); }
ArticleContentAnalyzer* GetContentAnalyzer() { return m_contentAnalyzer.get(); }
void LogDebugInfo();
@@ -97,6 +108,7 @@ private:
ServerStatList m_serverStats;
bool m_writingStarted;
int m_downloadedSize = 0;
std::unique_ptr<ArticleContentAnalyzer> m_contentAnalyzer;
EStatus Download();
EStatus DecodeCheck();

View File

@@ -0,0 +1,79 @@
/*
* This file is part of nzbget. See <http://nzbget.net>.
*
* Copyright (C) 2017 Andrey Prygunkov <hugbug@users.sourceforge.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "nzbget.h"
#include "DirectRenamer.h"
#include "Log.h"
#include "Options.h"
#ifndef DISABLE_PARCHECK
#include "par2cmdline.h"
#include "md5.h"
#endif
RenameContentAnalyzer::~RenameContentAnalyzer()
{
Reset();
}
void RenameContentAnalyzer::Reset()
{
#ifndef DISABLE_PARCHECK
delete (Par2::MD5Context*)m_md5Context;
#endif
m_md5Context = nullptr;
m_dataSize = 0;
}
void RenameContentAnalyzer::Append(const void* buffer, int len)
{
#ifndef DISABLE_PARCHECK
if (!m_md5Context)
{
m_md5Context = new Par2::MD5Context();
}
int rem16kSize = std::min(len, 16 * 1024 - m_dataSize);
if (rem16kSize > 0)
{
((Par2::MD5Context*)m_md5Context)->Update(buffer, rem16kSize);
}
m_dataSize += len;
#endif
}
// Must be called with locked DownloadQueue
void RenameContentAnalyzer::Finish(FileInfo* fileInfo, ArticleInfo* articleInfo)
{
#ifndef DISABLE_PARCHECK
Par2::MD5Hash hash;
((Par2::MD5Context*)m_md5Context)->Final(hash);
// we don't support analyzing of files split into articles smaller than 16KB
if (articleInfo->GetSize() >= 16 * 1024 || fileInfo->GetArticles()->size() == 1)
{
fileInfo->SetHash16k(hash.print().c_str());
}
#endif
debug("file: %s; article-hash16k: %s", fileInfo->GetFilename(), fileInfo->GetHash16k());
}

View File

@@ -0,0 +1,40 @@
/*
* This file is part of nzbget. See <http://nzbget.net>.
*
* Copyright (C) 2017 Andrey Prygunkov <hugbug@users.sourceforge.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef DIRECTRENAMER_H
#define DIRECTRENAMER_H
#include "ArticleDownloader.h"
class RenameContentAnalyzer : public ArticleContentAnalyzer
{
public:
virtual ~RenameContentAnalyzer();
virtual void Reset();
virtual void Append(const void* buffer, int len);
void Finish(FileInfo* fileInfo, ArticleInfo* articleInfo);
private:
// declared as void* to prevent inclusion if par2-modules into this header file
void* m_md5Context = nullptr;
int m_dataSize = 0;
};
#endif

View File

@@ -192,6 +192,9 @@ public:
void SetPartialState(EPartialState partialState) { m_partialState = partialState; }
uint32 GetCrc() { return m_crc; }
void SetCrc(uint32 crc) { m_crc = crc; }
const char* GetHash16k() { return m_hash16k; }
void SetHash16k(const char* hash16k) { m_hash16k = hash16k; }
ServerStatList* GetServerStats() { return &m_serverStats; }
private:
@@ -228,6 +231,7 @@ private:
bool m_forceDirectWrite = false;
EPartialState m_partialState = psNone;
uint32 m_crc = 0;
CString m_hash16k;
static int m_idGen;
static int m_idMax;

View File

@@ -30,6 +30,7 @@
#include "FileSystem.h"
#include "Decoder.h"
#include "StatMeter.h"
#include "DirectRenamer.h"
bool QueueCoordinator::CoordinatorDownloadQueue::EditEntry(
int ID, EEditAction action, const char* args)
@@ -589,6 +590,11 @@ void QueueCoordinator::StartArticleDownload(FileInfo* fileInfo, ArticleInfo* art
articleDownloader->SetArticleInfo(articleInfo);
articleDownloader->SetConnection(connection);
if (articleInfo->GetPartNumber() == 1 && g_Options->GetDirectRename())
{
articleDownloader->SetContentAnalyzer(std::make_unique<RenameContentAnalyzer>());
}
BString<1024> infoName("%s%c%s [%i/%i]", fileInfo->GetNzbInfo()->GetName(), (int)PATH_SEPARATOR, fileInfo->GetFilename(), articleInfo->GetPartNumber(), (int)fileInfo->GetArticles()->size());
articleDownloader->SetInfoName(infoName);
@@ -692,6 +698,11 @@ void QueueCoordinator::ArticleCompleted(ArticleDownloader* articleDownloader)
}
}
if (articleDownloader->GetContentAnalyzer() && articleDownloader->GetStatus() == ArticleDownloader::adFinished)
{
((RenameContentAnalyzer*)articleDownloader->GetContentAnalyzer())->Finish(fileInfo, articleInfo);
}
nzbInfo->SetDownloadedSize(nzbInfo->GetDownloadedSize() + articleDownloader->GetDownloadedSize());
CheckHealth(downloadQueue, fileInfo);

View File

@@ -141,6 +141,7 @@
<ClCompile Include="daemon\postprocess\RarRenamer.cpp" />
<ClCompile Include="daemon\postprocess\Rename.cpp" />
<ClCompile Include="daemon\postprocess\Unpack.cpp" />
<ClCompile Include="daemon\queue\DirectRenamer.cpp" />
<ClCompile Include="daemon\queue\DiskState.cpp" />
<ClCompile Include="daemon\queue\DownloadInfo.cpp" />
<ClCompile Include="daemon\queue\DupeCoordinator.cpp" />
@@ -239,6 +240,7 @@
<ClInclude Include="daemon\postprocess\RarRenamer.h" />
<ClInclude Include="daemon\postprocess\Rename.h" />
<ClInclude Include="daemon\postprocess\Unpack.h" />
<ClInclude Include="daemon\queue\DirectRenamer.h" />
<ClInclude Include="daemon\queue\DiskState.h" />
<ClInclude Include="daemon\queue\DownloadInfo.h" />
<ClInclude Include="daemon\queue\DupeCoordinator.h" />