From 7df8437a87157db75b60d5e253d8d04f409b4da0 Mon Sep 17 00:00:00 2001 From: Mickey Sola Date: Tue, 22 Feb 2022 13:31:41 -0500 Subject: [PATCH] Single commit to add clam mods to regex code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes include: * Change include of system regex headers to internal * Add cli prefix to regex functions * Change cli_regcomp to cli_regcomp_real to work with the others_common.c regex interface * Optimize re_guts struct: - Reordering fields allows the struct to fit within 16 bytes vs 20 bytes. This helps to fix a bug on legacy 64-bit systems where there was a behaviour difference between 32 and 64 systems. - see bb 474 for further details * Fix out of memory condition - see bb 849 for further details - reported by Gianluigi Tiesi * Remove duplicate nomem check * Avoid passing out-of-range values to isalnum - reported by Nigel * Avoid name collisions on AIX * Fix compiler warnings * Fix error path leak in regex/engine.c * Fix regex when sizeof(void*) != sizeof(long) for 64bit Windows - see bb 2232 for further Details - reported by Martin Olsen * Add error case safety checks and cleanups * Add patch for 'possible' heap overflow - see bb11264 for further details - patch submitted by the Debian team * Use clam internal allocation functions * Replace bounds check asserts with if checks (asserts are compiled out of production builds) Contributors to the above include: * Nigel Horne * aCaB * Török Edvin * David Raynor * Shawn Webb * Steven Morgan * Micah Snyder * Mickey Sola --- libclamav/regex/engine.c | 9 +++-- libclamav/regex/regcomp.c | 75 +++++++++++++++++++++++++++----------- libclamav/regex/regerror.c | 8 ++-- libclamav/regex/regex2.h | 12 +++--- libclamav/regex/regexec.c | 16 ++++---- libclamav/regex/regfree.c | 5 +-- libclamav/regex/utils.h | 8 +++- 7 files changed, 84 insertions(+), 49 deletions(-) diff --git a/libclamav/regex/engine.c b/libclamav/regex/engine.c index 62da73f99..ed359c3d6 100644 --- a/libclamav/regex/engine.c +++ b/libclamav/regex/engine.c @@ -209,9 +209,10 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, /* oh my, he wants the subexpressions... */ if (m->pmatch == NULL) - m->pmatch = reallocarray(NULL, m->g->nsub + 1, - sizeof(regmatch_t)); + m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) * + sizeof(regmatch_t)); if (m->pmatch == NULL) { + free(m->lastpos); STATETEARDOWN(m); return(REG_ESPACE); } @@ -222,8 +223,8 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = reallocarray(NULL, - g->nplus+1, sizeof(char *)); + m->lastpos = (char **)cli_malloc((g->nplus+1) * + sizeof(char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); diff --git a/libclamav/regex/regcomp.c b/libclamav/regex/regcomp.c index cf6acbb7d..c0ba09f22 100644 --- a/libclamav/regex/regcomp.c +++ b/libclamav/regex/regcomp.c @@ -40,7 +40,8 @@ #include #include #include -#include +#include "others.h" +#include "regex.h" #include "utils.h" #include "regex2.h" @@ -138,13 +139,14 @@ static int never = 0; /* for use in asserts; shuts lint up */ - regcomp - interface for parser and compilation */ int /* 0 success, otherwise REG_something */ -regcomp(regex_t *preg, const char *pattern, int cflags) +cli_regcomp_real(regex_t *preg, const char *pattern, int cflags) { struct parse pa; struct re_guts *g; struct parse *p = &pa; int i; size_t len; + size_t maxlen; #ifdef REDEBUG # define GOODFLAGS(f) (f) #else @@ -163,11 +165,32 @@ regcomp(regex_t *preg, const char *pattern, int cflags) len = strlen((char *)pattern); /* do the mallocs early so failure handling is easy */ - g = malloc(sizeof(struct re_guts)); + g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(unsigned char)); if (g == NULL) return(REG_ESPACE); + /* Patch for bb11264 submitted by the Debian team: */ + /* + * Limit the pattern space to avoid a 32-bit overflow on buffer + * extension. Also avoid any signed overflow in case of conversion + * so make the real limit based on a 31-bit overflow. + * + * Likely not applicable on 64-bit systems but handle the case + * generically (who are we to stop people from using ~715MB+ + * patterns?). + */ + maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3; + if (len >= maxlen) { + free((char *)g); + return(REG_ESPACE); + } p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = reallocarray(NULL, p->ssize, sizeof(sop)); + if (p->ssize < len) { + free((char *)g); + return(REG_ESPACE); + } + + p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop)); p->slen = 0; if (p->strip == NULL) { free(g); @@ -219,13 +242,13 @@ regcomp(regex_t *preg, const char *pattern, int cflags) preg->re_magic = MAGIC1; #ifndef REDEBUG /* not debugging, so can't rely on the assert() in regexec() */ - if (g->iflags&BAD) + if (g->iflags®EX_BAD) SETERROR(REG_ASSERT); #endif /* win or lose, we're done */ if (p->error != 0) /* lose */ - regfree(preg); + cli_regfree(preg); return(p->error); } @@ -236,8 +259,8 @@ static void p_ere(struct parse *p, int stop) /* character this ERE should end at */ { char c; - sopno prevback; - sopno prevfwd; + sopno prevback = 0; + sopno prevfwd = 0; sopno conc; int first = 1; /* is this the first alternative? */ @@ -387,7 +410,7 @@ p_ere_exp(struct parse *p) count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGEX_INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -566,7 +589,7 @@ p_simp_re(struct parse *p, count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGEX_INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -928,13 +951,13 @@ static void repeat(struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ - int to) /* to this number of times (maybe INFINITY) */ + int to) /* to this number of times (maybe REGEX_INFINITY) */ { sopno finish = HERE(); # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) -# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ @@ -1022,13 +1045,14 @@ allocset(struct parse *p) p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT *css; - ptr = reallocarray(p->g->sets, nc, sizeof(cset)); + ptr = (cset *)cli_realloc((char*)p->g->sets, nc * sizeof(cset)); if (ptr == NULL) goto nomem; p->g->sets = ptr; - ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css); + ptr = (uch *)cli_realloc((char*)p->g->setbits, nbytes); if (ptr == NULL) goto nomem; nbytes = (nc / CHAR_BIT) * css; @@ -1208,7 +1232,10 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos) sn = HERE(); EMIT(op, opnd); /* do checks, ensure space */ - assert(HERE() == sn+1); + if (HERE() != sn+1) { + SETERROR(REG_ASSERT); + return; + } s = p->strip[sn]; /* adjust paren pointers */ @@ -1252,7 +1279,7 @@ enlarge(struct parse *p, sopno size) if (p->ssize >= size) return 1; - sp = reallocarray(p->strip, size, sizeof(sop)); + sp = (sop *)cli_realloc(p->strip, size * sizeof(sop)); if (sp == NULL) { SETERROR(REG_ESPACE); return 0; @@ -1269,7 +1296,7 @@ static void stripsnug(struct parse *p, struct re_guts *g) { g->nstates = p->slen; - g->strip = reallocarray(p->strip, p->slen, sizeof(sop)); + g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop)); if (g->strip == NULL) { SETERROR(REG_ESPACE); g->strip = p->strip; @@ -1289,8 +1316,8 @@ static void findmust(struct parse *p, struct re_guts *g) { sop *scan; - sop *start; /* start initialized in the default case, after that */ - sop *newstart; /* newstart was initialized in the OCHAR case */ + sop *start = NULL; /* start initialized in the default case, after that */ + sop *newstart = NULL; /* newstart was initialized in the OCHAR case */ sopno newlen; sop s; char *cp; @@ -1324,7 +1351,7 @@ findmust(struct parse *p, struct re_guts *g) /* assert() interferes w debug printouts */ if (OP(s) != O_QUEST && OP(s) != O_CH && OP(s) != OOR2) { - g->iflags |= BAD; + g->iflags |= REGEX_BAD; return; } } while (OP(s) != O_QUEST && OP(s) != O_CH); @@ -1341,9 +1368,13 @@ findmust(struct parse *p, struct re_guts *g) if (g->mlen == 0) /* there isn't one */ return; + if (start == NULL) { /* something went wrong */ + g->mlen = 0; + return; + } /* turn it into a character string */ - g->must = malloc((size_t)g->mlen + 1); + g->must = cli_malloc((size_t)g->mlen + 1); if (g->must == NULL) { /* argh; just forget it */ g->mlen = 0; return; @@ -1389,6 +1420,6 @@ pluscount(struct parse *p, struct re_guts *g) } } while (OP(s) != OEND); if (plusnest != 0) - g->iflags |= BAD; + g->iflags |= REGEX_BAD; return(maxnest); } diff --git a/libclamav/regex/regerror.c b/libclamav/regex/regerror.c index 692b66681..49fc2b549 100644 --- a/libclamav/regex/regerror.c +++ b/libclamav/regex/regerror.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include "regex.h" #include "utils.h" @@ -75,7 +75,7 @@ static const struct rerr { = extern size_t regerror(int, const regex_t *, char *, size_t); */ size_t -regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) +cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { const struct rerr *r; size_t len; @@ -93,7 +93,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) if (errcode®_ITOA) { if (r->code != 0) { assert(strlen(r->name) < sizeof(convbuf)); - (void) strlcpy(convbuf, r->name, sizeof convbuf); + (void) cli_strlcpy(convbuf, r->name, sizeof convbuf); } else (void)snprintf(convbuf, sizeof convbuf, "REG_0x%x", target); @@ -103,7 +103,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) } if (errbuf_size != 0) - len = strlcpy(errbuf, s, errbuf_size); + len = cli_strlcpy(errbuf, s, errbuf_size); else len = strlen(s); diff --git a/libclamav/regex/regex2.h b/libclamav/regex/regex2.h index 6fb9dcca2..e406a3d64 100644 --- a/libclamav/regex/regex2.h +++ b/libclamav/regex/regex2.h @@ -132,13 +132,13 @@ CHIN(const cset *cs, char c) * main compiled-expression structure */ struct re_guts { - int magic; # define MAGIC2 ((('R'^0200)<<8)|'E') sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ cset *sets; /* -> cset [ncsets] */ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int magic; + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ int cflags; /* copy of regcomp() cflags argument */ sopno nstates; /* = number of sops */ sopno firststate; /* the initial OEND (normally 0) */ @@ -146,16 +146,16 @@ struct re_guts { int iflags; /* internal flags */ # define USEBOL 01 /* used ^ */ # define USEEOL 02 /* used $ */ -# define BAD 04 /* something wrong */ +# define REGEX_BAD 04 /* something wrong */ int nbol; /* number of ^ used */ int neol; /* number of $ used */ char *must; /* match must contain this string */ int mlen; /* length of must */ - size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ + size_t nsub; /* copy of re_nsub */ sopno nplus; /* how deep does it nest +s? */ }; /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(c) (isalnum((c)&0xff) || (c) == '_') diff --git a/libclamav/regex/regexec.c b/libclamav/regex/regexec.c index c59aef582..0c210a3c1 100644 --- a/libclamav/regex/regexec.c +++ b/libclamav/regex/regexec.c @@ -47,14 +47,15 @@ #include #include #include -#include +#include "others.h" +#include "regex.h" #include "utils.h" #include "regex2.h" /* macros for manipulating states, small version */ #define states long -#define states1 states /* for later use in regexec() decision */ +#define states1 long /* for later use in cli_regexec() decision */ #define CLEAR(v) ((v) = 0) #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) #define SET1(v, n) ((v) |= (unsigned long)1 << (n)) @@ -109,8 +110,7 @@ #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) #define STATEVARS long vn; char *space -#define STATESETUP(m, nv) { (m)->space = reallocarray(NULL, \ - (m)->g->nstates, (nv)); \ +#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \ if ((m)->space == NULL) return(REG_ESPACE); \ (m)->vn = 0; } #define STATETEARDOWN(m) { free((m)->space); } @@ -137,7 +137,7 @@ * have been prototyped. */ int /* 0 success, REG_NOMATCH failure */ -regexec(const regex_t *preg, const char *string, size_t nmatch, +cli_regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { struct re_guts *g = preg->re_g; @@ -150,12 +150,12 @@ regexec(const regex_t *preg, const char *string, size_t nmatch, if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) return(REG_BADPAT); - assert(!(g->iflags&BAD)); - if (g->iflags&BAD) /* backstop for no-debug case */ + assert(!(g->iflags®EX_BAD)); + if (g->iflags®EX_BAD) /* backstop for no-debug case */ return(REG_BADPAT); eflags = GOODFLAGS(eflags); - if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + if ((unsigned long)g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) return(smatcher(g, string, nmatch, pmatch, eflags)); else return(lmatcher(g, string, nmatch, pmatch, eflags)); diff --git a/libclamav/regex/regfree.c b/libclamav/regex/regfree.c index 1145739a4..4edb4c293 100644 --- a/libclamav/regex/regfree.c +++ b/libclamav/regex/regfree.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include "regex.h" #include #include "utils.h" @@ -47,7 +47,7 @@ - regfree - free everything */ void -regfree(regex_t *preg) +cli_regfree(regex_t *preg) { struct re_guts *g; @@ -66,4 +66,3 @@ regfree(regex_t *preg) free(g->must); free(g); } -DEF_WEAK(regfree); diff --git a/libclamav/regex/utils.h b/libclamav/regex/utils.h index 2dea7103a..3cf9dd2b1 100644 --- a/libclamav/regex/utils.h +++ b/libclamav/regex/utils.h @@ -36,8 +36,12 @@ */ /* utility definitions */ -#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ -#define INFINITY (DUPMAX + 1) +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define REGEX_INFINITY (DUPMAX + 1) #define NC (CHAR_MAX - CHAR_MIN + 1) typedef unsigned char uch;