From a71eb349995ecf84ab28536fc7f8f4f87f91c115 Mon Sep 17 00:00:00 2001 From: "Micah Snyder (micasnyd)" Date: Thu, 17 Jun 2021 11:30:23 -0700 Subject: [PATCH] Fix invalid zip & macho scan recursion If zip content is detected within a file by way of the embedded file type recognition scan (in `scanraw()`), a raw scan of that "ZIPSFX" will detect all subsequent zip entries as new ZIPSFX's. Though they aren't actually scanned later, it shows up in the metadata JSON. This commit prevents embedded file type detection for ZIPSFX like we already have for ZIP. Semi-related, the mach-o unibin parser presently allows scanning of FAT partitions anywhere in the fmap, to include the very beginning of the fmap. This would be an infinite loop, scanning the same file over and over again, were it not for the scan recursion limit. With the recursion limit, it's ok, but still bad behavior. This commit prevents scanning FAT files from the mach-o unibin parser where the offset is less than the end of the headers. Also fixed an unsigned integer comparison in the OLE2 parser that might overflow. --- libclamav/macho.c | 9 +++++++++ libclamav/ole2_extract.c | 2 +- libclamav/scanners.c | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/libclamav/macho.c b/libclamav/macho.c index cb9d1e823..b566d665a 100644 --- a/libclamav/macho.c +++ b/libclamav/macho.c @@ -562,6 +562,15 @@ int cli_scanmacho_unibin(cli_ctx *ctx) cli_dbgmsg("UNIBIN: Binary %u of %u\n", i + 1, fat_header.nfats); cli_dbgmsg("UNIBIN: File offset: %u\n", fat_arch.offset); cli_dbgmsg("UNIBIN: File size: %u\n", fat_arch.size); + + /* The offset must be greater than the location of the header or we risk + re-scanning the same data over and over again. The scan recursion max + will save us, but it will still cause other problems and waste CPU. */ + if (fat_arch.offset < at) { + cli_dbgmsg("Invalid fat offset: %d\n", fat_arch.offset); + RETURN_BROKEN; + } + ret = cli_magic_scan_nested_fmap_type(map, fat_arch.offset, fat_arch.size, ctx, CL_TYPE_ANY, NULL); if (ret == CL_VIRUS) break; diff --git a/libclamav/ole2_extract.c b/libclamav/ole2_extract.c index a6e11d127..04c893538 100644 --- a/libclamav/ole2_extract.c +++ b/libclamav/ole2_extract.c @@ -673,7 +673,7 @@ ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index, break; case 2: /* File */ ole2_listmsg("file node\n"); - if (ctx && ctx->engine->maxfiles && ctx->scannedfiles + *file_count > ctx->engine->maxfiles) { + if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) { cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles); ole2_list_delete(&node_list); return CL_EMAXFILES; diff --git a/libclamav/scanners.c b/libclamav/scanners.c index 809a997af..6b3d0e65b 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -3122,6 +3122,7 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi (type != CL_TYPE_GPT) && /* Omit GPT files because it's an image format that we can extract and scan manually. */ (type != CL_TYPE_CPIO_OLD) && /* Omit CPIO_OLD files because it's an image format that we can extract and scan manually. */ (type != CL_TYPE_ZIP) && /* Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan. */ + (type != CL_TYPE_ZIPSFX) && /* Omit ZIPSFX files because we should've already detected each entry with embedded file type recognition already! */ (type != CL_TYPE_OLD_TAR) && /* Omit OLD TAR files because it's a raw archive format that we can extract and scan manually. */ (type != CL_TYPE_POSIX_TAR)) { /* Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually. */ /*