Single commit to add clam mods to regex code

Changes include:
	* Change include of system regex headers to internal
	* Add cli prefix to regex functions
	* Change cli_regcomp to cli_regcomp_real to work with the
	  others_common.c regex interface
	* Optimize re_guts struct:
	  - Reordering fields allows the struct to fit within 16 bytes vs 20
	    bytes. This helps to fix a bug on legacy 64-bit systems where
	    there was a behaviour difference between 32 and 64 systems.
	  - see bb 474 for further details
	* Fix out of memory condition
	  - see bb 849 for further details
	  - reported by Gianluigi Tiesi <sherpya*netfarm.it>
	* Remove duplicate nomem check
	* Avoid passing out-of-range values to isalnum
	  - reported by Nigel
	* Avoid name collisions on AIX
	* Fix compiler warnings
	* Fix error path leak in regex/engine.c
	* Fix regex when sizeof(void*) != sizeof(long) for 64bit Windows
	  - see bb 2232 for further Details
	  - reported by Martin Olsen
	* Add error case safety checks and cleanups
	* Add patch for 'possible' heap overflow
	  - see bb11264 for further details
	  - patch submitted by the Debian team
	* Use clam internal allocation functions
	* Replace bounds check asserts with if checks (asserts are compiled
	  out of production builds)

Contributors to the above include:
	* Nigel Horne
	* aCaB
	* Török Edvin
	* David Raynor
	* Shawn Webb
	* Steven Morgan
	* Micah Snyder
	* Mickey Sola
This commit is contained in:
Mickey Sola
2022-02-22 13:31:41 -05:00
committed by Micah Snyder
parent 87cdd70037
commit 7df8437a87
7 changed files with 84 additions and 49 deletions

View File

@@ -209,9 +209,10 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
/* oh my, he wants the subexpressions... */
if (m->pmatch == NULL)
m->pmatch = reallocarray(NULL, m->g->nsub + 1,
sizeof(regmatch_t));
m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) *
sizeof(regmatch_t));
if (m->pmatch == NULL) {
free(m->lastpos);
STATETEARDOWN(m);
return(REG_ESPACE);
}
@@ -222,8 +223,8 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
m->lastpos = reallocarray(NULL,
g->nplus+1, sizeof(char *));
m->lastpos = (char **)cli_malloc((g->nplus+1) *
sizeof(char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);

View File

@@ -40,7 +40,8 @@
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <regex.h>
#include "others.h"
#include "regex.h"
#include "utils.h"
#include "regex2.h"
@@ -138,13 +139,14 @@ static int never = 0; /* for use in asserts; shuts lint up */
- regcomp - interface for parser and compilation
*/
int /* 0 success, otherwise REG_something */
regcomp(regex_t *preg, const char *pattern, int cflags)
cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
{
struct parse pa;
struct re_guts *g;
struct parse *p = &pa;
int i;
size_t len;
size_t maxlen;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
@@ -163,11 +165,32 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
len = strlen((char *)pattern);
/* do the mallocs early so failure handling is easy */
g = malloc(sizeof(struct re_guts));
g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) +
(NC-1)*sizeof(unsigned char));
if (g == NULL)
return(REG_ESPACE);
/* Patch for bb11264 submitted by the Debian team: */
/*
* Limit the pattern space to avoid a 32-bit overflow on buffer
* extension. Also avoid any signed overflow in case of conversion
* so make the real limit based on a 31-bit overflow.
*
* Likely not applicable on 64-bit systems but handle the case
* generically (who are we to stop people from using ~715MB+
* patterns?).
*/
maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
if (len >= maxlen) {
free((char *)g);
return(REG_ESPACE);
}
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
p->strip = reallocarray(NULL, p->ssize, sizeof(sop));
if (p->ssize < len) {
free((char *)g);
return(REG_ESPACE);
}
p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop));
p->slen = 0;
if (p->strip == NULL) {
free(g);
@@ -219,13 +242,13 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
preg->re_magic = MAGIC1;
#ifndef REDEBUG
/* not debugging, so can't rely on the assert() in regexec() */
if (g->iflags&BAD)
if (g->iflags&REGEX_BAD)
SETERROR(REG_ASSERT);
#endif
/* win or lose, we're done */
if (p->error != 0) /* lose */
regfree(preg);
cli_regfree(preg);
return(p->error);
}
@@ -236,8 +259,8 @@ static void
p_ere(struct parse *p, int stop) /* character this ERE should end at */
{
char c;
sopno prevback;
sopno prevfwd;
sopno prevback = 0;
sopno prevfwd = 0;
sopno conc;
int first = 1; /* is this the first alternative? */
@@ -387,7 +410,7 @@ p_ere_exp(struct parse *p)
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = INFINITY;
count2 = REGEX_INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@@ -566,7 +589,7 @@ p_simp_re(struct parse *p,
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = INFINITY;
count2 = REGEX_INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@@ -928,13 +951,13 @@ static void
repeat(struct parse *p,
sopno start, /* operand from here to end of strip */
int from, /* repeated from this number */
int to) /* to this number of times (maybe INFINITY) */
int to) /* to this number of times (maybe REGEX_INFINITY) */
{
sopno finish = HERE();
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF : N)
sopno copy;
if (p->error != 0) /* head off possible runaway recursion */
@@ -1022,13 +1045,14 @@ allocset(struct parse *p)
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT *css;
ptr = reallocarray(p->g->sets, nc, sizeof(cset));
ptr = (cset *)cli_realloc((char*)p->g->sets, nc * sizeof(cset));
if (ptr == NULL)
goto nomem;
p->g->sets = ptr;
ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css);
ptr = (uch *)cli_realloc((char*)p->g->setbits, nbytes);
if (ptr == NULL)
goto nomem;
nbytes = (nc / CHAR_BIT) * css;
@@ -1208,7 +1232,10 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
sn = HERE();
EMIT(op, opnd); /* do checks, ensure space */
assert(HERE() == sn+1);
if (HERE() != sn+1) {
SETERROR(REG_ASSERT);
return;
}
s = p->strip[sn];
/* adjust paren pointers */
@@ -1252,7 +1279,7 @@ enlarge(struct parse *p, sopno size)
if (p->ssize >= size)
return 1;
sp = reallocarray(p->strip, size, sizeof(sop));
sp = (sop *)cli_realloc(p->strip, size * sizeof(sop));
if (sp == NULL) {
SETERROR(REG_ESPACE);
return 0;
@@ -1269,7 +1296,7 @@ static void
stripsnug(struct parse *p, struct re_guts *g)
{
g->nstates = p->slen;
g->strip = reallocarray(p->strip, p->slen, sizeof(sop));
g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);
g->strip = p->strip;
@@ -1289,8 +1316,8 @@ static void
findmust(struct parse *p, struct re_guts *g)
{
sop *scan;
sop *start; /* start initialized in the default case, after that */
sop *newstart; /* newstart was initialized in the OCHAR case */
sop *start = NULL; /* start initialized in the default case, after that */
sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
sopno newlen;
sop s;
char *cp;
@@ -1324,7 +1351,7 @@ findmust(struct parse *p, struct re_guts *g)
/* assert() interferes w debug printouts */
if (OP(s) != O_QUEST && OP(s) != O_CH &&
OP(s) != OOR2) {
g->iflags |= BAD;
g->iflags |= REGEX_BAD;
return;
}
} while (OP(s) != O_QUEST && OP(s) != O_CH);
@@ -1341,9 +1368,13 @@ findmust(struct parse *p, struct re_guts *g)
if (g->mlen == 0) /* there isn't one */
return;
if (start == NULL) { /* something went wrong */
g->mlen = 0;
return;
}
/* turn it into a character string */
g->must = malloc((size_t)g->mlen + 1);
g->must = cli_malloc((size_t)g->mlen + 1);
if (g->must == NULL) { /* argh; just forget it */
g->mlen = 0;
return;
@@ -1389,6 +1420,6 @@ pluscount(struct parse *p, struct re_guts *g)
}
} while (OP(s) != OEND);
if (plusnest != 0)
g->iflags |= BAD;
g->iflags |= REGEX_BAD;
return(maxnest);
}

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <regex.h>
#include "regex.h"
#include "utils.h"
@@ -75,7 +75,7 @@ static const struct rerr {
= extern size_t regerror(int, const regex_t *, char *, size_t);
*/
size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const struct rerr *r;
size_t len;
@@ -93,7 +93,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
if (errcode&REG_ITOA) {
if (r->code != 0) {
assert(strlen(r->name) < sizeof(convbuf));
(void) strlcpy(convbuf, r->name, sizeof convbuf);
(void) cli_strlcpy(convbuf, r->name, sizeof convbuf);
} else
(void)snprintf(convbuf, sizeof convbuf,
"REG_0x%x", target);
@@ -103,7 +103,7 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
}
if (errbuf_size != 0)
len = strlcpy(errbuf, s, errbuf_size);
len = cli_strlcpy(errbuf, s, errbuf_size);
else
len = strlen(s);

View File

@@ -132,13 +132,13 @@ CHIN(const cset *cs, char c)
* main compiled-expression structure
*/
struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int magic;
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
int cflags; /* copy of regcomp() cflags argument */
sopno nstates; /* = number of sops */
sopno firststate; /* the initial OEND (normally 0) */
@@ -146,16 +146,16 @@ struct re_guts {
int iflags; /* internal flags */
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
# define REGEX_BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
char *must; /* match must contain this string */
int mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
size_t nsub; /* copy of re_nsub */
sopno nplus; /* how deep does it nest +s? */
};
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c) || (c) == '_')
#define ISWORD(c) (isalnum((c)&0xff) || (c) == '_')

View File

@@ -47,14 +47,15 @@
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <regex.h>
#include "others.h"
#include "regex.h"
#include "utils.h"
#include "regex2.h"
/* macros for manipulating states, small version */
#define states long
#define states1 states /* for later use in regexec() decision */
#define states1 long /* for later use in cli_regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
@@ -109,8 +110,7 @@
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
#define STATEVARS long vn; char *space
#define STATESETUP(m, nv) { (m)->space = reallocarray(NULL, \
(m)->g->nstates, (nv)); \
#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \
if ((m)->space == NULL) return(REG_ESPACE); \
(m)->vn = 0; }
#define STATETEARDOWN(m) { free((m)->space); }
@@ -137,7 +137,7 @@
* have been prototyped.
*/
int /* 0 success, REG_NOMATCH failure */
regexec(const regex_t *preg, const char *string, size_t nmatch,
cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
struct re_guts *g = preg->re_g;
@@ -150,12 +150,12 @@ regexec(const regex_t *preg, const char *string, size_t nmatch,
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
return(REG_BADPAT);
assert(!(g->iflags&BAD));
if (g->iflags&BAD) /* backstop for no-debug case */
assert(!(g->iflags&REGEX_BAD));
if (g->iflags&REGEX_BAD) /* backstop for no-debug case */
return(REG_BADPAT);
eflags = GOODFLAGS(eflags);
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
if ((unsigned long)g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
return(smatcher(g, string, nmatch, pmatch, eflags));
else
return(lmatcher(g, string, nmatch, pmatch, eflags));

View File

@@ -37,7 +37,7 @@
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include "regex.h"
#include <limits.h>
#include "utils.h"
@@ -47,7 +47,7 @@
- regfree - free everything
*/
void
regfree(regex_t *preg)
cli_regfree(regex_t *preg)
{
struct re_guts *g;
@@ -66,4 +66,3 @@ regfree(regex_t *preg)
free(g->must);
free(g);
}
DEF_WEAK(regfree);

View File

@@ -36,8 +36,12 @@
*/
/* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#ifdef _POSIX2_RE_DUP_MAX
#define DUPMAX _POSIX2_RE_DUP_MAX
#else
#define DUPMAX 255
#endif
#define REGEX_INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;