From f27fab00d4c00a8f31a7ee453db3a9faa532bde6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edvin?= Date: Sat, 6 Dec 2008 14:49:00 +0000 Subject: [PATCH] Improve Trojan.Swizzor.Gen detection: do per file statistics in addition to per string. It is amazing how a much simpler rule can do the same job better. git-svn: trunk@4539 --- ChangeLog | 7 +++ libclamav/pe.c | 22 +++++---- libclamav/special.c | 111 ++++++++++++++++++++++++++------------------ libclamav/special.h | 1 + 4 files changed, 87 insertions(+), 54 deletions(-) diff --git a/ChangeLog b/ChangeLog index ed6e2c42d..7c8e3023a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sat Dec 6 16:54:43 EET 2008 (edwin) +------------------------------------ + * libclamav/pe.c, libclamav/special.c, libclamav/special.h: Improve + Trojan.Swizzor.Gen detection: do per file statistics in addition to + per string. It is amazing how a much simpler rule can do the same + job better. + Thu Dec 4 17:43:01 CET 2008 (acab) ----------------------------------- * clamav-milter: r4519:r4536 merge new clamav milter diff --git a/libclamav/pe.c b/libclamav/pe.c index b244d2552..404f3c809 100644 --- a/libclamav/pe.c +++ b/libclamav/pe.c @@ -1215,15 +1215,21 @@ int cli_scanpe(int desc, cli_ctx *ctx) /* Trojan.Swizzor.Gen */ if (SCAN_ALGO && (DCONF & PE_CONF_SWIZZOR) && nsections > 1 && fsize > 64*1024 && fsize < 4*1024*1024) { - int ret = CL_CLEAN; if(dirs[2].Size) { - struct swizz_stats stats; - unsigned int m = 10000; - memset(&stats, 0, sizeof(stats)); - cli_parseres_special(EC32(dirs[2].VirtualAddress), EC32(dirs[2].VirtualAddress), desc, exe_sections, nsections, fsize, hdr_size, 0, 0, &m, &stats); - if (cli_detect_swizz(&stats) == CL_VIRUS) { - *ctx->virname = "Trojan.Swizzor.Gen"; - ret = CL_VIRUS; + struct swizz_stats *stats = cli_calloc(1, sizeof(*stats)); + unsigned int m = 1000; + int ret = CL_CLEAN; + + if (!stats) + ret = CL_EMEM; + else { + cli_parseres_special(EC32(dirs[2].VirtualAddress), EC32(dirs[2].VirtualAddress), desc, exe_sections, nsections, fsize, hdr_size, 0, 0, &m, stats); + if ((ret = cli_detect_swizz(stats)) == CL_VIRUS) { + *ctx->virname = "Trojan.Swizzor.Gen"; + } + free(stats); + } + if (ret != CL_CLEAN) { free(exe_sections); return ret; } diff --git a/libclamav/special.c b/libclamav/special.c index 623b74947..ba6fafb56 100644 --- a/libclamav/special.c +++ b/libclamav/special.c @@ -360,46 +360,12 @@ static inline int swizz_j48(const uint16_t n[]) { cli_dbgmsg("swizz_j48: %u, %u, %u\n",n[0],n[1],n[2]); /* rules based on J48 tree */ - if (n[0] <= 924) - return CL_CLEAN; - if (n[0] <= 940) { - return (n[2] > 1 && n[2] <= 8) ? CL_VIRUS : CL_CLEAN; - } - if (n[2] <= 14) { - if (n[2] <= 0) { - if (n[0] <= 999) - return CL_CLEAN; - if (n[0] <= 1012) { - if (n[1] <= 23) { - if (n[0] <= 1003) - return CL_CLEAN; - return (n[1] <= 19 && n[0] > 1007 && n[1] > 15) || (n[1] > 19) ? CL_VIRUS : CL_CLEAN; - } - return CL_VIRUS; - } - return n[1] == 0 ? CL_CLEAN : CL_VIRUS; - } - if (n[2] <= 8) - return CL_VIRUS; - if (n[0] <= 954) - return CL_CLEAN; - if (n[2] <= 10) - return CL_VIRUS; - if (n[2] <= 12) { - if (n[0] <= 1011) { - if (n[1] <=32) - return CL_VIRUS; - return (n[2] <= 11 || n[1] > 51) ? CL_VIRUS : CL_CLEAN; - } - return CL_CLEAN; - } - if (n[1] <= 52) { - return (n[1] <= 43 && n[1] > 6 && - (n[2] <= 13 || n[1] <= 30 || n[1] > 40)) - ? CL_CLEAN : CL_VIRUS; - } - } - return CL_CLEAN; + if (n[0] <= 945 || !n[1]) + return 0; + if (n[0] <= 1006) + return (n[2] > 0 && n[2] <= 6); + else + return n[1] <= 10; } void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_stats *stats, int blob) @@ -440,8 +406,10 @@ void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_s for(i=0;igngrams[idx]++; + } } else if (stri[i] == ' ') words++; } @@ -461,21 +429,72 @@ void cli_detect_swizz_str(const unsigned char *str, uint32_t len, struct swizz_s uint32_t v = ngram_cnts[i]; ngram_cnts[i] = (v<<10)/all; } - ret = swizz_j48(ngram_cnts); + ret = swizz_j48(ngram_cnts) ? CL_VIRUS : CL_CLEAN; cli_dbgmsg("cli_detect_swizz_str: %s, %u words\n", ret == CL_VIRUS ? "suspicious" : "ok", words); if (ret == CL_VIRUS) stats->suspicious += j; stats->total += j; } +static inline swizz_j48_global(const uint32_t gn[]) +{ + if (gn[0] <= 24185) { + return gn[0] > 22980 && gn[8] > 0 && gn[8] <= 97; + } + if (!gn[8]) { + if (gn[4] <= 311) { + if (!gn[4]) { + return gn[1] > 0 && + ((gn[0] <= 26579 && gn[3] > 0) || + (gn[0] > 28672 && gn[0] <= 30506)); + } + if (gn[5] <= 616) { + if (gn[6] <= 104) { + return gn[9] <= 167; + } + return gn[6] <= 286; + } + } + return 0; + } + return 1; +} + int cli_detect_swizz(struct swizz_stats *stats) { + uint32_t gn[10]; + uint32_t all = 0; + unsigned i; + int global_swizz = CL_CLEAN; + cli_dbgmsg("cli_detect_swizz: %lu/%lu, version:%d, manifest: %d \n", (unsigned long)stats->suspicious, (unsigned long)stats->total, stats->has_version, stats->has_manifest); - /* not all have version/manifest */ - if (stats->total > 128 && stats->suspicious > 2*stats->total/10) { - return CL_VIRUS; + memset(gn, 0, sizeof(gn)); + for(i=0;i<17576;i++) { + uint8_t v = stats->gngrams[i]; + if (v > 10) v = 10; + if (v) { + gn[v-1]++; + all++; + } } - return CL_CLEAN; + if (all) { + /* normalize */ + cli_dbgmsg("cli_detect_swizz: gn: "); + for(i=0;itotal <= 337) + return CL_CLEAN; + if (stats->suspicious<<10 > 20*stats->total) + return CL_VIRUS; + return global_swizz; } diff --git a/libclamav/special.h b/libclamav/special.h index 5903241d2..a0d38c4c6 100644 --- a/libclamav/special.h +++ b/libclamav/special.h @@ -23,6 +23,7 @@ #include "others.h" struct swizz_stats { + uint16_t gngrams[17576]; uint32_t total; uint32_t suspicious; int has_version;