mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2026-05-18 21:40:17 -04:00
Single commit to add clam mods to regex code
Changes include: * Change include of system regex headers to internal * Add cli prefix to regex functions * Change cli_regcomp to cli_regcomp_real to work with the others_common.c regex interface * Optimize re_guts struct: - Reordering fields allows the struct to fit within 16 bytes vs 20 bytes. This helps to fix a bug on legacy 64-bit systems where there was a behaviour difference between 32 and 64 systems. - see bb 474 for further details * Fix out of memory condition - see bb 849 for further details - reported by Gianluigi Tiesi <sherpya*netfarm.it> * Remove duplicate nomem check * Avoid passing out-of-range values to isalnum - reported by Nigel * Avoid name collisions on AIX * Fix compiler warnings * Fix error path leak in regex/engine.c * Fix regex when sizeof(void*) != sizeof(long) for 64bit Windows - see bb 2232 for further Details - reported by Martin Olsen * Add error case safety checks and cleanups * Add patch for 'possible' heap overflow - see bb11264 for further details - patch submitted by the Debian team * Use clam internal allocation functions * Replace bounds check asserts with if checks (asserts are compiled out of production builds) Contributors to the above include: * Nigel Horne * aCaB * Török Edvin * David Raynor * Shawn Webb * Steven Morgan * Micah Snyder * Mickey Sola
This commit is contained in:
committed by
Micah Snyder
parent
87cdd70037
commit
7df8437a87
@@ -209,9 +209,10 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
|
||||
|
||||
/* oh my, he wants the subexpressions... */
|
||||
if (m->pmatch == NULL)
|
||||
m->pmatch = reallocarray(NULL, m->g->nsub + 1,
|
||||
sizeof(regmatch_t));
|
||||
m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) *
|
||||
sizeof(regmatch_t));
|
||||
if (m->pmatch == NULL) {
|
||||
free(m->lastpos);
|
||||
STATETEARDOWN(m);
|
||||
return(REG_ESPACE);
|
||||
}
|
||||
@@ -222,8 +223,8 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
|
||||
dp = dissect(m, m->coldp, endp, gf, gl);
|
||||
} else {
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
m->lastpos = reallocarray(NULL,
|
||||
g->nplus+1, sizeof(char *));
|
||||
m->lastpos = (char **)cli_malloc((g->nplus+1) *
|
||||
sizeof(char *));
|
||||
if (g->nplus > 0 && m->lastpos == NULL) {
|
||||
free(m->pmatch);
|
||||
STATETEARDOWN(m);
|
||||
|
||||
@@ -40,7 +40,8 @@
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
#include "others.h"
|
||||
#include "regex.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
@@ -138,13 +139,14 @@ static int never = 0; /* for use in asserts; shuts lint up */
|
||||
- regcomp - interface for parser and compilation
|
||||
*/
|
||||
int /* 0 success, otherwise REG_something */
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
struct parse pa;
|
||||
struct re_guts *g;
|
||||
struct parse *p = &pa;
|
||||
int i;
|
||||
size_t len;
|
||||
size_t maxlen;
|
||||
#ifdef REDEBUG
|
||||
# define GOODFLAGS(f) (f)
|
||||
#else
|
||||
@@ -163,11 +165,32 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
len = strlen((char *)pattern);
|
||||
|
||||
/* do the mallocs early so failure handling is easy */
|
||||
g = malloc(sizeof(struct re_guts));
|
||||
g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) +
|
||||
(NC-1)*sizeof(unsigned char));
|
||||
if (g == NULL)
|
||||
return(REG_ESPACE);
|
||||
/* Patch for bb11264 submitted by the Debian team: */
|
||||
/*
|
||||
* Limit the pattern space to avoid a 32-bit overflow on buffer
|
||||
* extension. Also avoid any signed overflow in case of conversion
|
||||
* so make the real limit based on a 31-bit overflow.
|
||||
*
|
||||
* Likely not applicable on 64-bit systems but handle the case
|
||||
* generically (who are we to stop people from using ~715MB+
|
||||
* patterns?).
|
||||
*/
|
||||
maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
|
||||
if (len >= maxlen) {
|
||||
free((char *)g);
|
||||
return(REG_ESPACE);
|
||||
}
|
||||
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
|
||||
p->strip = reallocarray(NULL, p->ssize, sizeof(sop));
|
||||
if (p->ssize < len) {
|
||||
free((char *)g);
|
||||
return(REG_ESPACE);
|
||||
}
|
||||
|
||||
p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop));
|
||||
p->slen = 0;
|
||||
if (p->strip == NULL) {
|
||||
free(g);
|
||||
@@ -219,13 +242,13 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
preg->re_magic = MAGIC1;
|
||||
#ifndef REDEBUG
|
||||
/* not debugging, so can't rely on the assert() in regexec() */
|
||||
if (g->iflags&BAD)
|
||||
if (g->iflags®EX_BAD)
|
||||
SETERROR(REG_ASSERT);
|
||||
#endif
|
||||
|
||||
/* win or lose, we're done */
|
||||
if (p->error != 0) /* lose */
|
||||
regfree(preg);
|
||||
cli_regfree(preg);
|
||||
return(p->error);
|
||||
}
|
||||
|
||||
@@ -236,8 +259,8 @@ static void
|
||||
p_ere(struct parse *p, int stop) /* character this ERE should end at */
|
||||
{
|
||||
char c;
|
||||
sopno prevback;
|
||||
sopno prevfwd;
|
||||
sopno prevback = 0;
|
||||
sopno prevfwd = 0;
|
||||
sopno conc;
|
||||
int first = 1; /* is this the first alternative? */
|
||||
|
||||
@@ -387,7 +410,7 @@ p_ere_exp(struct parse *p)
|
||||
count2 = p_count(p);
|
||||
REQUIRE(count <= count2, REG_BADBR);
|
||||
} else /* single number with comma */
|
||||
count2 = INFINITY;
|
||||
count2 = REGEX_INFINITY;
|
||||
} else /* just a single number */
|
||||
count2 = count;
|
||||
repeat(p, pos, count, count2);
|
||||
@@ -566,7 +589,7 @@ p_simp_re(struct parse *p,
|
||||
count2 = p_count(p);
|
||||
REQUIRE(count <= count2, REG_BADBR);
|
||||
} else /* single number with comma */
|
||||
count2 = INFINITY;
|
||||
count2 = REGEX_INFINITY;
|
||||
} else /* just a single number */
|
||||
count2 = count;
|
||||
repeat(p, pos, count, count2);
|
||||
@@ -928,13 +951,13 @@ static void
|
||||
repeat(struct parse *p,
|
||||
sopno start, /* operand from here to end of strip */
|
||||
int from, /* repeated from this number */
|
||||
int to) /* to this number of times (maybe INFINITY) */
|
||||
int to) /* to this number of times (maybe REGEX_INFINITY) */
|
||||
{
|
||||
sopno finish = HERE();
|
||||
# define N 2
|
||||
# define INF 3
|
||||
# define REP(f, t) ((f)*8 + (t))
|
||||
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
|
||||
# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF : N)
|
||||
sopno copy;
|
||||
|
||||
if (p->error != 0) /* head off possible runaway recursion */
|
||||
@@ -1022,13 +1045,14 @@ allocset(struct parse *p)
|
||||
p->ncsalloc += CHAR_BIT;
|
||||
nc = p->ncsalloc;
|
||||
assert(nc % CHAR_BIT == 0);
|
||||
nbytes = nc / CHAR_BIT *css;
|
||||
|
||||
ptr = reallocarray(p->g->sets, nc, sizeof(cset));
|
||||
ptr = (cset *)cli_realloc((char*)p->g->sets, nc * sizeof(cset));
|
||||
if (ptr == NULL)
|
||||
goto nomem;
|
||||
p->g->sets = ptr;
|
||||
|
||||
ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css);
|
||||
ptr = (uch *)cli_realloc((char*)p->g->setbits, nbytes);
|
||||
if (ptr == NULL)
|
||||
goto nomem;
|
||||
nbytes = (nc / CHAR_BIT) * css;
|
||||
@@ -1208,7 +1232,10 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
|
||||
|
||||
sn = HERE();
|
||||
EMIT(op, opnd); /* do checks, ensure space */
|
||||
assert(HERE() == sn+1);
|
||||
if (HERE() != sn+1) {
|
||||
SETERROR(REG_ASSERT);
|
||||
return;
|
||||
}
|
||||
s = p->strip[sn];
|
||||
|
||||
/* adjust paren pointers */
|
||||
@@ -1252,7 +1279,7 @@ enlarge(struct parse *p, sopno size)
|
||||
if (p->ssize >= size)
|
||||
return 1;
|
||||
|
||||
sp = reallocarray(p->strip, size, sizeof(sop));
|
||||
sp = (sop *)cli_realloc(p->strip, size * sizeof(sop));
|
||||
if (sp == NULL) {
|
||||
SETERROR(REG_ESPACE);
|
||||
return 0;
|
||||
@@ -1269,7 +1296,7 @@ static void
|
||||
stripsnug(struct parse *p, struct re_guts *g)
|
||||
{
|
||||
g->nstates = p->slen;
|
||||
g->strip = reallocarray(p->strip, p->slen, sizeof(sop));
|
||||
g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop));
|
||||
if (g->strip == NULL) {
|
||||
SETERROR(REG_ESPACE);
|
||||
g->strip = p->strip;
|
||||
@@ -1289,8 +1316,8 @@ static void
|
||||
findmust(struct parse *p, struct re_guts *g)
|
||||
{
|
||||
sop *scan;
|
||||
sop *start; /* start initialized in the default case, after that */
|
||||
sop *newstart; /* newstart was initialized in the OCHAR case */
|
||||
sop *start = NULL; /* start initialized in the default case, after that */
|
||||
sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
|
||||
sopno newlen;
|
||||
sop s;
|
||||
char *cp;
|
||||
@@ -1324,7 +1351,7 @@ findmust(struct parse *p, struct re_guts *g)
|
||||
/* assert() interferes w debug printouts */
|
||||
if (OP(s) != O_QUEST && OP(s) != O_CH &&
|
||||
OP(s) != OOR2) {
|
||||
g->iflags |= BAD;
|
||||
g->iflags |= REGEX_BAD;
|
||||
return;
|
||||
}
|
||||
} while (OP(s) != O_QUEST && OP(s) != O_CH);
|
||||
@@ -1341,9 +1368,13 @@ findmust(struct parse *p, struct re_guts *g)
|
||||
|
||||
if (g->mlen == 0) /* there isn't one */
|
||||
return;
|
||||
if (start == NULL) { /* something went wrong */
|
||||
g->mlen = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* turn it into a character string */
|
||||
g->must = malloc((size_t)g->mlen + 1);
|
||||
g->must = cli_malloc((size_t)g->mlen + 1);
|
||||
if (g->must == NULL) { /* argh; just forget it */
|
||||
g->mlen = 0;
|
||||
return;
|
||||
@@ -1389,6 +1420,6 @@ pluscount(struct parse *p, struct re_guts *g)
|
||||
}
|
||||
} while (OP(s) != OEND);
|
||||
if (plusnest != 0)
|
||||
g->iflags |= BAD;
|
||||
g->iflags |= REGEX_BAD;
|
||||
return(maxnest);
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
#include "regex.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
@@ -75,7 +75,7 @@ static const struct rerr {
|
||||
= extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
*/
|
||||
size_t
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const struct rerr *r;
|
||||
size_t len;
|
||||
@@ -93,7 +93,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
if (errcode®_ITOA) {
|
||||
if (r->code != 0) {
|
||||
assert(strlen(r->name) < sizeof(convbuf));
|
||||
(void) strlcpy(convbuf, r->name, sizeof convbuf);
|
||||
(void) cli_strlcpy(convbuf, r->name, sizeof convbuf);
|
||||
} else
|
||||
(void)snprintf(convbuf, sizeof convbuf,
|
||||
"REG_0x%x", target);
|
||||
@@ -103,7 +103,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
}
|
||||
|
||||
if (errbuf_size != 0)
|
||||
len = strlcpy(errbuf, s, errbuf_size);
|
||||
len = cli_strlcpy(errbuf, s, errbuf_size);
|
||||
else
|
||||
len = strlen(s);
|
||||
|
||||
|
||||
@@ -132,13 +132,13 @@ CHIN(const cset *cs, char c)
|
||||
* main compiled-expression structure
|
||||
*/
|
||||
struct re_guts {
|
||||
int magic;
|
||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int magic;
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
sopno firststate; /* the initial OEND (normally 0) */
|
||||
@@ -146,16 +146,16 @@ struct re_guts {
|
||||
int iflags; /* internal flags */
|
||||
# define USEBOL 01 /* used ^ */
|
||||
# define USEEOL 02 /* used $ */
|
||||
# define BAD 04 /* something wrong */
|
||||
# define REGEX_BAD 04 /* something wrong */
|
||||
int nbol; /* number of ^ used */
|
||||
int neol; /* number of $ used */
|
||||
char *must; /* match must contain this string */
|
||||
int mlen; /* length of must */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
int backrefs; /* does it use back references? */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
sopno nplus; /* how deep does it nest +s? */
|
||||
};
|
||||
|
||||
/* misc utilities */
|
||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
||||
#define ISWORD(c) (isalnum((c)&0xff) || (c) == '_')
|
||||
|
||||
@@ -47,14 +47,15 @@
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
#include <regex.h>
|
||||
#include "others.h"
|
||||
#include "regex.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
/* macros for manipulating states, small version */
|
||||
#define states long
|
||||
#define states1 states /* for later use in regexec() decision */
|
||||
#define states1 long /* for later use in cli_regexec() decision */
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
|
||||
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
|
||||
@@ -109,8 +110,7 @@
|
||||
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
||||
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||
#define STATEVARS long vn; char *space
|
||||
#define STATESETUP(m, nv) { (m)->space = reallocarray(NULL, \
|
||||
(m)->g->nstates, (nv)); \
|
||||
#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \
|
||||
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||
(m)->vn = 0; }
|
||||
#define STATETEARDOWN(m) { free((m)->space); }
|
||||
@@ -137,7 +137,7 @@
|
||||
* have been prototyped.
|
||||
*/
|
||||
int /* 0 success, REG_NOMATCH failure */
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
struct re_guts *g = preg->re_g;
|
||||
@@ -150,12 +150,12 @@ regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
|
||||
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||
return(REG_BADPAT);
|
||||
assert(!(g->iflags&BAD));
|
||||
if (g->iflags&BAD) /* backstop for no-debug case */
|
||||
assert(!(g->iflags®EX_BAD));
|
||||
if (g->iflags®EX_BAD) /* backstop for no-debug case */
|
||||
return(REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
if ((unsigned long)g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, string, nmatch, pmatch, eflags));
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
#include "regex.h"
|
||||
#include <limits.h>
|
||||
|
||||
#include "utils.h"
|
||||
@@ -47,7 +47,7 @@
|
||||
- regfree - free everything
|
||||
*/
|
||||
void
|
||||
regfree(regex_t *preg)
|
||||
cli_regfree(regex_t *preg)
|
||||
{
|
||||
struct re_guts *g;
|
||||
|
||||
@@ -66,4 +66,3 @@ regfree(regex_t *preg)
|
||||
free(g->must);
|
||||
free(g);
|
||||
}
|
||||
DEF_WEAK(regfree);
|
||||
|
||||
@@ -36,8 +36,12 @@
|
||||
*/
|
||||
|
||||
/* utility definitions */
|
||||
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
|
||||
#define INFINITY (DUPMAX + 1)
|
||||
#ifdef _POSIX2_RE_DUP_MAX
|
||||
#define DUPMAX _POSIX2_RE_DUP_MAX
|
||||
#else
|
||||
#define DUPMAX 255
|
||||
#endif
|
||||
#define REGEX_INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
typedef unsigned char uch;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user