mirror of
https://github.com/RsyncProject/rsync.git
synced 2026-01-25 07:18:12 -05:00
Improve performance of file_checksum()
Previously files were hashed in blocks of CSUM_CHUNK (64) bytes. This causes significant overhead. The CSUM_CHUNK define cannot be changed as md5.c depends on it, but there is no obvious reason to use it in file_checksum(). By using CHUNK_SIZE (32 kB) instead, in some test cases throughput more than doubles.
This commit is contained in:
committed by
Wayne Davison
parent
a863c62cd1
commit
d474f2986e
20
checksum.c
20
checksum.c
@@ -294,7 +294,7 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
if (fd == -1)
|
||||
return;
|
||||
|
||||
buf = map_file(fd, len, MAX_MAP_SIZE, CSUM_CHUNK);
|
||||
buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
|
||||
|
||||
switch (checksum_type) {
|
||||
case CSUM_MD5: {
|
||||
@@ -302,8 +302,8 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
|
||||
MD5_Init(&m5);
|
||||
|
||||
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
|
||||
MD5_Update(&m5, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
|
||||
for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
|
||||
MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
|
||||
|
||||
remainder = (int32)(len - i);
|
||||
if (remainder > 0)
|
||||
@@ -319,8 +319,8 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
|
||||
MD4_Init(&m4);
|
||||
|
||||
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
|
||||
MD4_Update(&m4, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
|
||||
for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
|
||||
MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
|
||||
|
||||
remainder = (int32)(len - i);
|
||||
if (remainder > 0)
|
||||
@@ -337,8 +337,8 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
|
||||
mdfour_begin(&m);
|
||||
|
||||
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
|
||||
mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
|
||||
for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
|
||||
mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
|
||||
|
||||
/* Prior to version 27 an incorrect MD4 checksum was computed
|
||||
* by failing to call mdfour_tail() for block sizes that
|
||||
@@ -362,9 +362,9 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
exit_cleanup(RERR_STREAMIO);
|
||||
}
|
||||
|
||||
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
|
||||
for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) {
|
||||
XXH_errorcode const updateResult =
|
||||
XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
|
||||
XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
|
||||
if (updateResult == XXH_ERROR) {
|
||||
rprintf(FERROR, "error computing XXH64 hash");
|
||||
exit_cleanup(RERR_STREAMIO);
|
||||
@@ -373,7 +373,7 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
|
||||
|
||||
remainder = (int32)(len - i);
|
||||
if (remainder > 0)
|
||||
XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
|
||||
XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), remainder);
|
||||
SIVAL64(sum, 0, XXH64_digest(state));
|
||||
|
||||
XXH64_freeState(state);
|
||||
|
||||
Reference in New Issue
Block a user