diff --git a/ChangeLog b/ChangeLog
index a73a38572..561e16f7e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Wed Jul 23 16:32:32 EEST 2008 (edwin)
+------------------------------------
+  * libclamav: performance improvements for URL matching (bb #725, bb #650):
+	* use a suffix AC-trie and a shift-or FSM to filter
+	* rewrite the URL regex in C
+	* use a perfect hash to lookup TLD and ccTLD, instead of a regex
+	* TODO: suffixes having a common prefix: loop over all of them
+		cli_ac_free: multiple virname pointing to same location
+
 Mon Jul 21 12:16:44 CEST 2008 (tk)
 ----------------------------------
   * sigtool/vba.c: fix crash on error in vba code (bb#1106)
diff --git a/contrib/entitynorm/Makefile b/contrib/entitynorm/Makefile
index 7ff2f0821..f4e619f00 100644
--- a/contrib/entitynorm/Makefile
+++ b/contrib/entitynorm/Makefile
@@ -1,7 +1,7 @@
 PERL=perl
 CC=cc
 
-all: entitylist.h encoding_aliases.h gentbl encname_chars.h
+all: entitylist.h encoding_aliases.h gentbl encname_chars.h generate_hash
 
 entities_parsed: entities entities/* entity_decl_parse.pl
 	$(PERL) entity_decl_parse.pl $</* | sort -u >$@
@@ -9,6 +9,9 @@ entities_parsed: entities entities/* entity_decl_parse.pl
 generate_entitylist: generate_entitylist.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
 	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
 
+generate_hash: generate_hash.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
+	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
+
 generate_encoding_aliases: generate_encoding_aliases.c ../../libclamav/hashtab.c ../../libclamav/others.c ../../libclamav/htmlnorm.h ../../libclamav/entconv.h ../../libclamav/cltypes.h ../../libclamav/hashtab.h ../../libclamav/hashtab.h
 	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
 
diff --git a/contrib/phishing/update_iana_data.sh b/contrib/phishing/update_iana_data.sh
index fa412e90a..7ceec5253 100755
--- a/contrib/phishing/update_iana_data.sh
+++ b/contrib/phishing/update_iana_data.sh
@@ -26,30 +26,11 @@ OUTFILE=iana_tld.h
 echo "Downloading updated tld list from iana.org"
 wget $IANA_TLD -O $TMP || exit 2
 echo "Download complete, parsing data"
-# 174 is the code for |
-TLDLIST=$(egrep -v ^# $TMP | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating tld list in $OUTFILE"
-cat >$OUTFILE <<EOF
-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-EOF
-echo -n "#define iana_tld \"(" >>$OUTFILE
-echo -n $TLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
+grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_tld.h
 
 echo "Downloading updated country-code list from iana.org"
 wget $IANA_CCTLD -O $TMP || exit 2
 echo "Download complete, parsing data"
-CCTLDLIST=$(cat $TMP | egrep -oi "<a href=[^>]+>\\.([a-zA-Z]+).+</a>" | egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating cctld list in $OUTFILE"
-echo -n "#define iana_cctld \"(" >>$OUTFILE
-echo -n $CCTLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
-
-
-echo "#endif" >>$OUTFILE
-echo "Finished succesfully"
+cat $TMP | grep country-code|egrep -oi "<a
+href=[^>]+>\\.([a-zA-Z]+).+</a>"|egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr [A-Z] [a-z]| gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set |grep -v '^#line'|sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_cctld.h
+echo "Done"
diff --git a/contrib/phishing/update_iana_tld.sh b/contrib/phishing/update_iana_tld.sh
index 816f9f02c..2bf06aca8 100755
--- a/contrib/phishing/update_iana_tld.sh
+++ b/contrib/phishing/update_iana_tld.sh
@@ -26,17 +26,4 @@ echo "Downloading updated tld list from iana.org"
 wget $IANA_TLD -O $TMP || exit 2
 echo "Download complete, parsing data"
 # 174 is the code for |
-TLDLIST=$(egrep -v ^# $TMP|tr \\n \\174 )
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating $OUTFILE"
-cat >$OUTFILE <<EOF
-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-EOF
-echo -n "#define iana_tld \"(" >>$OUTFILE
-echo -n $TLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
-echo "#endif" >>$OUTFILE
-echo "Finished succesfully"
-
+grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -H tld_hash -N in_tld_set -l|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g'
diff --git a/docs/clamdoc.tex b/docs/clamdoc.tex
index a654606d7..fcae20bc5 100644
--- a/docs/clamdoc.tex
+++ b/docs/clamdoc.tex
@@ -361,7 +361,7 @@ All 4 tests passed
 	 \item The exact output from \verb+make check+	 
 	 \item Output of \verb+uname -mrsp+ 
 	 \item your \verb+config.log+	 
-	 \item The following files from the \verb+unit-tests/+ directory:
+	 \item The following files from the \verb+unit_tests/+ directory:
 		\begin{itemize}
 			\item \verb+test.log+
 	 		\item \verb+clamscan.log+
diff --git a/libclamav/hashtab.c b/libclamav/hashtab.c
index c47183f89..ef7626383 100644
--- a/libclamav/hashtab.c
+++ b/libclamav/hashtab.c
@@ -367,10 +367,18 @@ void hashtab_clear(struct hashtable *s)
 		if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
 			free((void *)s->htable[i].key);
 	}
-	memset(s->htable, 0, s->capacity);
+	if(s->htable)
+		memset(s->htable, 0, s->capacity);
 	s->used = 0;
 }
 
+void hashtab_free(struct hashtable *s)
+{
+	hashtab_clear(s);
+	free(s->htable);
+	s->htable = NULL;
+	s->capacity = 0;
+}
 
 int hashtab_store(const struct hashtable *s,FILE* out)
 {
diff --git a/libclamav/hashtab.h b/libclamav/hashtab.h
index 2d3faa37f..6410a67b3 100644
--- a/libclamav/hashtab.h
+++ b/libclamav/hashtab.h
@@ -82,7 +82,7 @@ int hashtab_init(struct hashtable *s,size_t capacity);
 const struct element* hashtab_insert(struct hashtable *s, const char* key, const size_t len, const element_data data);
 void hashtab_delete(struct hashtable *s,const char* key,const size_t len);
 void hashtab_clear(struct hashtable *s);
-
+void hashtab_free(struct hashtable *s);
 int hashtab_load(FILE* in, struct hashtable *s);
 int hashtab_store(const struct hashtable *s,FILE* out);
 
diff --git a/libclamav/iana_cctld.h b/libclamav/iana_cctld.h
new file mode 100644
index 000000000..6bceb9422
--- /dev/null
+++ b/libclamav/iana_cctld.h
@@ -0,0 +1,505 @@
+/* ANSI-C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set  */
+/* Computed positions: -k'1-2' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+/* maximum key range = 472, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+cctld_hash (const char *str, unsigned int len)
+{
+  static const unsigned short asso_values[] =
+    {
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 119,  97,  33,
+      103,   4,  59, 115, 210, 149, 169, 143, 175,  55,
+      145,  89, 178,  37,  85,  18,  34, 239,   2,  73,
+      112,   3,  25,  10,  15, 117, 209, 229, 150, 223,
+      200,  78, 225,  54,   5, 215, 215, 190,  25,  23,
+        0,  20, 233, 234,  14, 476,  33, 204, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476
+    };
+  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]+25];
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const char *
+in_cctld_set (const char *str, unsigned int len)
+{
+  enum
+    {
+      TOTAL_KEYWORDS = 252,
+      MIN_WORD_LENGTH = 2,
+      MAX_WORD_LENGTH = 2,
+      MIN_HASH_VALUE = 4,
+      MAX_HASH_VALUE = 475
+    };
+
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,
+       2,  2,  2,  0,  0,  2,  2,  0,  0,  0,  2,  2,  0,  2,
+       0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  2,  2,  2,
+       2,  2,  2,  2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       2,  2,  2,  2,  2,  2,  2,  0,  2,  0,  2,  2,  0,  2,
+       0,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,
+       2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,
+       2,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,  2,  2,  2,
+       2,  0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  2,  0,  2,
+       2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,  2,
+       0,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  2,
+       0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,
+       0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  0,  2,
+       2,  0,  0,  2,  2,  2,  0,  2,  0,  2,  2,  0,  0,  2,
+       2,  2,  0,  2,  2,  0,  2,  0,  0,  2,  2,  2,  2,  0,
+       2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,  2,  2,  0,
+       0,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  0,  2,  2,
+       2,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  2,  2,  2,
+       2,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  0,  2,  2,
+       2,  2,  0,  2,  0,  2,  2,  0,  2,  0,  2,  2,  0,  2,
+       2,  0,  2,  0,  0,  0,  2,  2,  2,  0,  2,  2,  0,  0,
+       0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,
+       0,  0,  2,  2,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,
+       0,  0,  0,  2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       2,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  2,  0,  0,  2,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  2,  2,  2,  0,  2,  0,  2,  0,  2,  0,  2,  2,
+       2,  0,  2,  2,  0,  0,  0,  2,  0,  0,  0,  0,  0,  2,
+       2,  0,  0,  2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2
+    };
+  static const char * const wordlist[] =
+    {
+      "", "", "", "",
+      "sv",
+      "sy",
+      "se",
+      "", "",
+      "mv",
+      "my",
+      "me",
+      "", "",
+      "bv",
+      "by",
+      "be",
+      "", "",
+      "cv",
+      "cy",
+      "", "", "",
+      "tv",
+      "ms",
+      "",
+      "sz",
+      "",
+      "re",
+      "bs",
+      "ae",
+      "mz",
+      "",
+      "ws",
+      "sc",
+      "st",
+      "bz",
+      "",
+      "ye",
+      "mc",
+      "mt",
+      "cz",
+      "rs",
+      "mq",
+      "as",
+      "bt",
+      "tz",
+      "", "",
+      "cc",
+      "",
+      "az",
+      "", "",
+      "tc",
+      "tt",
+      "sm",
+      "lv",
+      "ly",
+      "ac",
+      "at",
+      "mm",
+      "",
+      "aq",
+      "",
+      "mf",
+      "bm",
+      "",
+      "yt",
+      "",
+      "bf",
+      "cm",
+      "",
+      "ls",
+      "wf",
+      "cf",
+      "tm",
+      "", "",
+      "mw",
+      "tf",
+      "am",
+      "",
+      "je",
+      "bw",
+      "af",
+      "sr",
+      "",
+      "lc",
+      "lt",
+      "so",
+      "mr",
+      "", "",
+      "tw",
+      "mo",
+      "br",
+      "rw",
+      "sb",
+      "aw",
+      "bo",
+      "cr",
+      "", "",
+      "sd",
+      "co",
+      "tr",
+      "",
+      "bb",
+      "md",
+      "to",
+      "ar",
+      "",
+      "ro",
+      "bd",
+      "ao",
+      "sg",
+      "",
+      "mx",
+      "cd",
+      "sa",
+      "mg",
+      "de",
+      "",
+      "td",
+      "ma",
+      "bg",
+      "",
+      "cx",
+      "ad",
+      "ba",
+      "cg",
+      "", "",
+      "jm",
+      "ca",
+      "tg",
+      "",
+      "ax",
+      "",
+      "lr",
+      "ag",
+      "",
+      "dz",
+      "sk",
+      "qa",
+      "sn",
+      "", "",
+      "mk",
+      "si",
+      "mn",
+      "lb",
+      "",
+      "gy",
+      "ge",
+      "bn",
+      "", "",
+      "ck",
+      "bi",
+      "cn",
+      "", "",
+      "tk",
+      "ci",
+      "tn",
+      "",
+      "jo",
+      "gs",
+      "sj",
+      "an",
+      "",
+      "dm",
+      "la",
+      "ai",
+      "sl",
+      "", "", "",
+      "bj",
+      "ml",
+      "", "",
+      "mp",
+      "gt",
+      "bl",
+      "",
+      "gq",
+      "",
+      "tj",
+      "cl",
+      "", "",
+      "py",
+      "pe",
+      "tl",
+      "",
+      "lk",
+      "tp",
+      "",
+      "al",
+      "", "",
+      "li",
+      "ie",
+      "gm",
+      "do",
+      "",
+      "ps",
+      "gf",
+      "sh",
+      "", "",
+      "ee",
+      "",
+      "mh",
+      "", "",
+      "is",
+      "ne",
+      "bh",
+      "", "",
+      "gw",
+      "pt",
+      "ch",
+      "",
+      "es",
+      "ky",
+      "ke",
+      "th",
+      "", "", "",
+      "it",
+      "gr",
+      "uy",
+      "iq",
+      "ve",
+      "su",
+      "nz",
+      "",
+      "ec",
+      "et",
+      "mu",
+      "pm",
+      "",
+      "gb",
+      "nc",
+      "pf",
+      "kz",
+      "us",
+      "",
+      "gd",
+      "cu",
+      "im",
+      "jp",
+      "ht",
+      "uz",
+      "zm",
+      "dk",
+      "",
+      "ru",
+      "pw",
+      "au",
+      "gg",
+      "",
+      "vc",
+      "",
+      "ga",
+      "om",
+      "",
+      "yu",
+      "",
+      "nf",
+      "pr",
+      "",
+      "zw",
+      "hm",
+      "",
+      "km",
+      "", "", "",
+      "fm",
+      "ir",
+      "dj",
+      "",
+      "um",
+      "io",
+      "", "", "",
+      "lu",
+      "er",
+      "gn",
+      "", "",
+      "kw",
+      "gi",
+      "nr",
+      "", "",
+      "id",
+      "no",
+      "pg",
+      "", "",
+      "hr",
+      "pa",
+      "kr",
+      "", "", "",
+      "fr",
+      "", "", "",
+      "fo",
+      "", "", "", "",
+      "za",
+      "eg",
+      "gl",
+      "", "",
+      "gp",
+      "",
+      "ng",
+      "", "",
+      "pk",
+      "na",
+      "pn",
+      "", "", "", "",
+      "kg",
+      "", "", "", "",
+      "in",
+      "", "",
+      "ug",
+      "vg",
+      "", "",
+      "ua",
+      "va",
+      "", "", "", "", "", "",
+      "gh",
+      "", "", "",
+      "ni",
+      "pl",
+      "hk",
+      "",
+      "hn",
+      "",
+      "kn",
+      "",
+      "fk",
+      "",
+      "ki",
+      "il",
+      "uk",
+      "",
+      "fi",
+      "vn",
+      "", "", "",
+      "vi",
+      "", "", "", "", "",
+      "gu",
+      "nl",
+      "", "",
+      "np",
+      "", "", "", "",
+      "fj",
+      "",
+      "ph",
+      "", "",
+      "kp",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "eh",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "kh",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "",
+      "eu",
+      "", "", "", "", "",
+      "nu",
+      "", "", "", "", "", "", "",
+      "hu",
+      "", "", "", "", "", "", "", "", "",
+      "",
+      "vu"
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      int key = cctld_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        if (len == lengthtable[key])
+          {
+            const char *s = wordlist[key];
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return s;
+          }
+    }
+  return 0;
+}
diff --git a/libclamav/iana_tld.h b/libclamav/iana_tld.h
index e3fd17b08..f2568f675 100644
--- a/libclamav/iana_tld.h
+++ b/libclamav/iana_tld.h
@@ -1,28 +1,746 @@
-/*
- *  Phishing module: iana tld list.
- *
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
- *
- *  Authors: Török Edvin
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- *  MA 02110-1301, USA.
- */
+/* ANSI-C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set  */
+/* Computed positions: -k'1-2,6' */
 
-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-#define iana_tld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|INT|MIL|NET|ORG|PRO|TEL|AERO|ARPA|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM|TRAVEL|XN--ZCKZAH|XN--0ZWM56D|XN--DEBA0AD|XN--G6W251D|XN--JXALPDLP|XN--KGBECHTV|XN--9T4B11YI5A|XN--80AKHBYKNJ4F|XN--11B5BS3A9AJ6G|XN--HGBK6AJ7F53BBA)"
-#define iana_cctld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJLMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGHRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEFGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|IN[TT]|MIL|NET|ORG|PRO|TEL|AERO|ARP[AA]|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM)"
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
 #endif
 
+/* maximum key range = 983, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+tld_hash (const char *str, unsigned int len)
+{
+  static const unsigned short asso_values[] =
+    {
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988,   0,  15,
+      988, 988, 988, 988,   0, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 170, 328,  88,
+        3,  50, 293, 205, 123, 430, 500, 238, 115, 320,
+      375,  30, 413, 348,  70,  43, 475,  18,   6, 283,
+       95,  58,  10, 220,   5, 485, 480,   8, 190, 390,
+      225, 113, 420,  95,   0,  15,  50, 295,  20, 128,
+      130,  80, 405, 470, 340,   0, 305, 415, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988
+    };
+  int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[5]];
+      /*FALLTHROUGH*/
+      case 5:
+      case 4:
+      case 3:
+      case 2:
+        hval += asso_values[(unsigned char)str[1]];
+      /*FALLTHROUGH*/
+      case 1:
+        hval += asso_values[(unsigned char)str[0]+25];
+        break;
+    }
+  return hval;
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const char *
+in_tld_set (const char *str, unsigned int len)
+{
+  enum
+    {
+      TOTAL_KEYWORDS = 280,
+      MIN_WORD_LENGTH = 2,
+      MAX_WORD_LENGTH = 18,
+      MIN_HASH_VALUE = 5,
+      MAX_HASH_VALUE = 987
+    };
+
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,  2,  2,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  3,  4,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,
+       0,  4,  0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  3,  0,
+       0,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,
+       4,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  3,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  3,  0,  2,  0,  0,  2,  0,
+       2,  0,  2,  0,  0,  2,  0,  4,  2,  0,  2,  0,  2,  0,
+       0,  2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  2,  0,  0,  0,  2,  3,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  3,
+       0,  2,  0,  0,  2,  0,  2,  0,  2,  0,  0,  2,  0,  0,
+       0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  2,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  6,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  0,  6,  2,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2,  3,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  0,  0,  2,  0,  0,  2, 11,  2,  0,  0,  0, 16,
+       2,  0,  0,  0, 11,  2,  0,  0,  0,  0,  2,  0,  0,  0,
+       0, 17,  0,  0,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  3,
+       0,  2, 11,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,
+       0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2, 10,  0,  2,
+       0,  2,  0,  0,  2,  0, 12,  0,  0,  2,  3,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2, 18,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  2,  0,  0,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,
+       0,  0,  2,  0, 12,  0,  0,  0,  0,  2, 18,  0,  0,  0,
+       2,  3,  4,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  2,  3,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  2,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 14,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  2
+    };
+  static const char * const wordlist[] =
+    {
+      "", "", "", "", "",
+      "md",
+      "", "",
+      "mv",
+      "",
+      "cd",
+      "",
+      "mz",
+      "cv",
+      "",
+      "ad",
+      "",
+      "cz",
+      "", "",
+      "mu",
+      "",
+      "az",
+      "", "",
+      "cu",
+      "",
+      "nz",
+      "", "",
+      "au",
+      "",
+      "mo",
+      "",
+      "mobi",
+      "nu",
+      "",
+      "co",
+      "com",
+      "coop",
+      "fo",
+      "",
+      "ao",
+      "", "",
+      "ms",
+      "",
+      "no",
+      "", "", "", "",
+      "me",
+      "", "",
+      "as",
+      "",
+      "asia",
+      "", "",
+      "my",
+      "",
+      "ae",
+      "",
+      "aero",
+      "cy",
+      "",
+      "ne",
+      "net",
+      "", "", "",
+      "mr",
+      "", "", "", "",
+      "cr",
+      "", "",
+      "fr",
+      "",
+      "ar",
+      "",
+      "arpa",
+      "td",
+      "",
+      "nr",
+      "tv",
+      "",
+      "mc",
+      "",
+      "tz",
+      "", "",
+      "cc",
+      "",
+      "mx",
+      "", "",
+      "ac",
+      "",
+      "cx",
+      "lv",
+      "",
+      "nc",
+      "",
+      "ax",
+      "", "", "", "",
+      "to",
+      "", "",
+      "lu",
+      "",
+      "ml",
+      "", "", "", "",
+      "cl",
+      "org",
+      "",
+      "mh",
+      "",
+      "al",
+      "", "",
+      "ch",
+      "",
+      "nl",
+      "tel",
+      "",
+      "sd",
+      "", "",
+      "sv",
+      "",
+      "ls",
+      "",
+      "sz",
+      "", "",
+      "jo",
+      "",
+      "jobs",
+      "ru",
+      "",
+      "su",
+      "",
+      "tr",
+      "", "",
+      "ly",
+      "", "", "", "",
+      "ro",
+      "",
+      "so",
+      "", "",
+      "je",
+      "",
+      "lr",
+      "", "",
+      "tc",
+      "",
+      "ma",
+      "rs",
+      "", "", "",
+      "ca",
+      "cat",
+      "",
+      "re",
+      "",
+      "se",
+      "", "",
+      "lc",
+      "",
+      "na",
+      "",
+      "name",
+      "sy",
+      "",
+      "qa",
+      "", "",
+      "gd",
+      "",
+      "tl",
+      "", "", "", "",
+      "sr",
+      "", "",
+      "th",
+      "",
+      "mg",
+      "", "",
+      "gu",
+      "",
+      "cg",
+      "", "", "", "",
+      "ag",
+      "", "",
+      "sc",
+      "",
+      "ng",
+      "gov",
+      "",
+      "bd",
+      "", "",
+      "bv",
+      "",
+      "id",
+      "",
+      "bz",
+      "", "",
+      "gs",
+      "", "", "", "",
+      "mk",
+      "",
+      "ge",
+      "", "",
+      "ck",
+      "",
+      "sl",
+      "fk",
+      "",
+      "gy",
+      "",
+      "bo",
+      "", "",
+      "sh",
+      "",
+      "io",
+      "", "", "", "",
+      "gr",
+      "", "",
+      "bs",
+      "",
+      "la",
+      "", "",
+      "is",
+      "travel",
+      "be",
+      "", "", "", "",
+      "ie",
+      "", "",
+      "by",
+      "", "", "", "",
+      "mw",
+      "",
+      "tg",
+      "", "", "", "",
+      "br",
+      "", "",
+      "aw",
+      "",
+      "ir",
+      "", "",
+      "cf",
+      "",
+      "sa",
+      "", "",
+      "af",
+      "",
+      "gl",
+      "", "",
+      "nf",
+      "", "", "", "",
+      "gh",
+      "", "", "", "",
+      "tk",
+      "",
+      "mm",
+      "", "",
+      "yu",
+      "",
+      "cm",
+      "", "",
+      "fm",
+      "",
+      "am",
+      "", "",
+      "lk",
+      "",
+      "sg",
+      "", "",
+      "ps",
+      "",
+      "il",
+      "",
+      "museum",
+      "bh",
+      "",
+      "pe",
+      "", "",
+      "mq",
+      "", "", "", "",
+      "py",
+      "",
+      "ye",
+      "", "",
+      "aq",
+      "",
+      "ga",
+      "", "",
+      "tw",
+      "",
+      "pr",
+      "pro",
+      "",
+      "sk",
+      "",
+      "om",
+      "", "",
+      "tf",
+      "",
+      "mn",
+      "", "", "", "",
+      "cn",
+      "", "",
+      "ws",
+      "xn--g6w251d",
+      "an",
+      "", "", "",
+      "xn--80akhbyknj4f",
+      "ba",
+      "", "", "",
+      "xn--0zwm56d",
+      "gg",
+      "", "", "", "",
+      "tm",
+      "", "", "", "",
+      "xn--11b5bs3a9aj6g",
+      "", "",
+      "hu",
+      "",
+      "pl",
+      "rw",
+      "",
+      "mp",
+      "",
+      "uz",
+      "", "",
+      "ph",
+      "", "", "", "",
+      "lb",
+      "",
+      "bg",
+      "", "",
+      "np",
+      "",
+      "kz",
+      "mil",
+      "",
+      "jm",
+      "xn--deba0ad",
+      "ci",
+      "", "",
+      "fi",
+      "",
+      "ai",
+      "", "", "", "",
+      "ni",
+      "", "",
+      "us",
+      "",
+      "sm",
+      "", "", "", "",
+      "tn",
+      "", "",
+      "sb",
+      "",
+      "hr",
+      "", "",
+      "uy",
+      "",
+      "pa",
+      "", "", "", "",
+      "ke",
+      "xn--zckzah",
+      "",
+      "gw",
+      "",
+      "mt",
+      "", "",
+      "ky",
+      "",
+      "xn--jxalpdlp",
+      "", "",
+      "gf",
+      "edu",
+      "at",
+      "", "",
+      "vu",
+      "",
+      "kr",
+      "", "",
+      "tp",
+      "",
+      "dz",
+      "", "",
+      "eu",
+      "",
+      "pg",
+      "", "",
+      "bw",
+      "",
+      "sn",
+      "xn--hlcj6aya9esc7a",
+      "",
+      "fj",
+      "",
+      "gm",
+      "", "",
+      "bf",
+      "",
+      "do",
+      "", "",
+      "gb",
+      "",
+      "ve",
+      "", "",
+      "es",
+      "",
+      "li",
+      "jp",
+      "", "", "",
+      "ee",
+      "", "",
+      "pk",
+      "",
+      "de",
+      "", "",
+      "gq",
+      "",
+      "bm",
+      "", "",
+      "kh",
+      "",
+      "im",
+      "", "",
+      "bb",
+      "",
+      "er",
+      "", "", "", "",
+      "tt",
+      "", "",
+      "vc",
+      "",
+      "si",
+      "", "", "", "",
+      "gn",
+      "", "",
+      "ec",
+      "",
+      "lt",
+      "", "",
+      "iq",
+      "",
+      "ua",
+      "", "",
+      "pw",
+      "",
+      "tj",
+      "", "", "", "",
+      "za",
+      "", "",
+      "pf",
+      "",
+      "xn--kgbechtv",
+      "", "", "", "",
+      "bn",
+      "xn--hgbk6aj7f53bba",
+      "", "", "",
+      "in",
+      "int",
+      "info",
+      "gp",
+      "",
+      "st",
+      "", "", "", "",
+      "ug",
+      "", "", "", "",
+      "pm",
+      "", "", "", "",
+      "gi",
+      "", "", "", "",
+      "kg",
+      "", "",
+      "hk",
+      "",
+      "sj",
+      "", "",
+      "wf",
+      "", "", "", "", "", "",
+      "va",
+      "", "",
+      "uk",
+      "", "", "", "", "", "",
+      "bi",
+      "biz",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "",
+      "gt",
+      "", "", "", "",
+      "pn",
+      "", "", "", "",
+      "vg",
+      "", "", "", "", "", "", "", "", "",
+      "eg",
+      "", "", "", "", "", "", "", "", "",
+      "bt",
+      "", "",
+      "zw",
+      "",
+      "it",
+      "", "",
+      "kw",
+      "", "", "", "", "", "",
+      "hm",
+      "", "", "", "", "", "", "", "", "",
+      "bj",
+      "", "",
+      "dk",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "zm",
+      "", "", "", "",
+      "km",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "hn",
+      "", "", "", "",
+      "pt",
+      "", "", "", "", "", "", "", "", "",
+      "yt",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "",
+      "kn",
+      "", "", "", "", "", "", "", "", "",
+      "dm",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "kp",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "vn",
+      "", "", "", "",
+      "ki",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "xn--9t4b11yi5a",
+      "", "",
+      "ht",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "",
+      "vi",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "et",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "dj"
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      int key = tld_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        if (len == lengthtable[key])
+          {
+            const char *s = wordlist[key];
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return s;
+          }
+    }
+  return 0;
+}
diff --git a/libclamav/phish_domaincheck_db.c b/libclamav/phish_domaincheck_db.c
index c5efe4922..5da9cc40f 100644
--- a/libclamav/phish_domaincheck_db.c
+++ b/libclamav/phish_domaincheck_db.c
@@ -49,16 +49,6 @@ int domainlist_match(const struct cl_engine* engine,char* real_url,const char* d
 {
 	const char* info;
 	int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly ? pre_fixup : NULL,hostOnly,&info,0) : 0;
-	if(rc && info && info[0] && info[0] != ':') {/*match successful, and has custom flags*/
-		if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
-			unsigned short notwantedflags=0;
-			sscanf(info,"%hx",&notwantedflags);
-		        *flags &= ~notwantedflags;/* filter unwanted phishcheck flags */	
-		}
-		else {
-			cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected");
-		}
-	}
 	return rc;
 }
 
@@ -79,13 +69,6 @@ int is_domainlist_ok(const struct cl_engine* engine)
 	return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
 }
 
-void domainlist_cleanup(const struct cl_engine* engine)
-{
-	if(engine && engine->domainlist_matcher) {
-		regex_list_cleanup(engine->domainlist_matcher);
-	}
-}
-
 void domainlist_done(struct cl_engine* engine)
 {
 	if(engine && engine->domainlist_matcher) {
diff --git a/libclamav/phish_whitelist.c b/libclamav/phish_whitelist.c
index a9bbbed03..55fd2fd52 100644
--- a/libclamav/phish_whitelist.c
+++ b/libclamav/phish_whitelist.c
@@ -69,13 +69,6 @@ int is_whitelist_ok(const struct cl_engine* engine)
 	return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
 }
 
-void whitelist_cleanup(const struct cl_engine* engine)
-{
-	if(engine && engine->whitelist_matcher) {
-		regex_list_cleanup(engine->whitelist_matcher);
-	}
-}
-
 void whitelist_done(struct cl_engine* engine)
 {
 	if(engine && engine->whitelist_matcher) {
diff --git a/libclamav/phishcheck.c b/libclamav/phishcheck.c
index 4589eb5d4..ba8cb57e1 100644
--- a/libclamav/phishcheck.c
+++ b/libclamav/phishcheck.c
@@ -39,6 +39,7 @@
 #include <ctype.h>
 
 #include "clamav.h"
+#include "cltypes.h"
 #include "others.h"
 #include "mbox.h"
 #include "message.h"
@@ -47,6 +48,7 @@
 #include "phish_domaincheck_db.h"
 #include "phish_whitelist.h"
 #include "iana_tld.h"
+#include "iana_cctld.h"
 
 
 #define DOMAIN_REAL 1
@@ -140,8 +142,6 @@ static char empty_string[]="";
 #define CLOAKED_URL "^"ANY_CLOAK"(\\."ANY_CLOAK"){0,3}$"
 
 static const char cloaked_host_regex[] = CLOAKED_URL;
-static const char tld_regex[] = "^"iana_tld"$";
-static const char cctld_regex[] = "^"iana_cctld"$";
 static const char dotnet[] = ".net";
 static const char adonet[] = "ado.net";
 static const char aspnet[] = "asp.net";
@@ -151,7 +151,10 @@ static const char gt[]="&gt";
 static const char src_text[] = "src";
 static const char href_text[] = "href";
 static const char mailto[] = "mailto:";
+static const char mailto_proto[] = "mailto://";
 static const char https[]="https://";
+static const char http[]="http://";
+static const char ftp[] = "ftp://";
 
 static const size_t href_text_len = sizeof(href_text);
 static const size_t src_text_len = sizeof(src_text);
@@ -161,7 +164,10 @@ static const size_t aspnet_len = sizeof(aspnet)-1;
 static const size_t lt_len = sizeof(lt)-1;
 static const size_t gt_len = sizeof(gt)-1;
 static const size_t mailto_len = sizeof(mailto)-1;
+static const size_t mailto_proto_len = sizeof(mailto_proto)-1;
 static const size_t https_len  = sizeof(https)-1;
+static const size_t http_len  = sizeof(http)-1;
+static const size_t ftp_len  = sizeof(ftp)-1;
 
 /* for urls, including mailto: urls, and (broken) http:www... style urls*/
 /* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
@@ -169,41 +175,13 @@ static const size_t https_len  = sizeof(https)-1;
  * So the 'safe' char class has been split up
  * */
 /* character classes */
-#define URI_alpha	"a-zA-Z"
 #define URI_digit	"0-9"
-#define URI_safe_nodot  "-$_@&"
-#define URI_safe	"-$_@.&"
-#define URI_extra	"!*\"'(),"
-
-#define URI_hex		 "[0-9a-fA-f]"
-#define URI_escape      "%"URI_hex"{2}"
-#define URI_xalpha "([" URI_safe URI_alpha URI_digit  URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
-#define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
-
-#define URI_xalphas_nodot URI_xalpha_nodot"*"
-
-#define URI_ialpha  "["URI_alpha"]"URI_xalphas_nodot""
-#define URI_xpalpha URI_xalpha"|\\+"
-#define URI_xpalpha_nodot URI_xalpha_nodot"|\\+"
-#define URI_xpalphas_nodot "("URI_xpalpha_nodot")+"
-
-#define URI_scheme URI_ialpha
-#define URI_tld iana_tld
-#define URI_path1 URI_xpalphas_nodot"\\.("URI_xpalphas_nodot"\\.)*"
-
 #define URI_IP_digits "["URI_digit"]{1,3}"
 #define URI_path_start "[/?:]?"
 #define URI_numeric_path URI_IP_digits"(\\."URI_IP_digits"){3}"URI_path_start
-#define URI_numeric_URI "("URI_scheme":(//)?)?"URI_numeric_path
+#define URI_numeric_URI "(http|https|ftp:(//)?)?"URI_numeric_path
 #define URI_numeric_fragmentaddress URI_numeric_URI
 
-#define URI_URI1 "("URI_scheme":(//)?)?"URI_path1
-#define URI_URI2 URI_tld
-
-#define URI_fragmentaddress1 URI_URI1
-#define URI_fragmentaddress2 URI_URI2""URI_path_start
-
-#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"
 
 /*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
  * there is fragmentaddress1, and 2  to work around the ISO limitation of 509 bytes max length for string constants*/
@@ -235,7 +213,6 @@ static int string_assign_concatenated(struct string* dest, const char* prefix, c
 static void string_assign_null(struct string* dest);
 static char *rfind(char *start, char c, size_t len);
 static char hex2int(const unsigned char* src);
-static int isTLD(const struct phishcheck* pchk,const char* str,int len);
 static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
 static const char* phishing_ret_toString(enum phish_status rc);
 
@@ -416,7 +393,7 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
 			}
 
 			tld = strrchr(realhost,'.');
-			rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
+			rc = tld ? !!in_tld_set(tld,tld-realhost-1) : 0;
 			if(rc < 0)
 				return rc;
 			if(rc)
@@ -438,28 +415,6 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
 	return 0;
 }
 
-static int isCountryCode(const struct phishcheck* s,const char* str)
-{
-	return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
-}
-
-static int isTLD(const struct phishcheck* pchk,const char* str,int len)
-{
-	if (!str)
-		return 0;
-	else {
-		char*	s  = cli_malloc(len+1);
-		int rc;
-
-		if(!s)
-			return CL_EMEM;
-		strncpy(s,str,len);
-		s[len]='\0';
-		rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
-		free(s);
-		return rc ? 1 : 0;
-	}
-}
 
 /*
  * memrchr isn't standard, so I use this
@@ -486,7 +441,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
 		string_assign(dest,host);
 		return;
 	}
-	if(isCountryCode(pchk,tld+1)) {
+	if(in_cctld_set(tld+1, strlen(tld+1))) {
 		const char* countrycode = tld+1;
 		tld = rfind(host->data,'.',tld-host->data-1);
 		if(!tld) {
@@ -495,7 +450,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
 			string_assign(dest,host);
 			return;
 		}
-		if(!isTLD(pchk,tld+1,countrycode-tld-2)) {
+		if(!in_tld_set(tld+1, countrycode-tld-2)) {
 			string_assign_ref(dest,host,tld+1);
 			return;/*it was a name like: subdomain.domain.uk, return domain.uk*/
 		}
@@ -737,11 +692,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
 			/* @end points to last character we want to be part of the URL */
 			end = host_begin + host_len - 1;
 		}
-		/* terminate URL with a slash, except when we're at end of string */
-		if(host_begin[host_len]) {
-			host_begin[host_len] = '/';
-			end++;
-		}
+		host_begin[host_len] = '\0';
 		/* convert hostname to lowercase, but only hostname! */
 		str_make_lowercase(host_begin, host_len);
 		/* some broken MUAs put > in the href, and then
@@ -797,6 +748,40 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
 
 	if(!ctx->found_possibly_unwanted)
 		*ctx->virname=NULL;
+#if 0
+	FILE *f = fopen("/home/edwin/quarantine/urls","r");
+	if(!f)
+		abort();
+	while(!feof(f)) {
+		struct url_check urls;
+		char line1[4096];
+		char line2[4096];
+		char line3[4096];
+
+		fgets(line1, sizeof(line1), f);
+		fgets(line2, sizeof(line2), f);
+		fgets(line3, sizeof(line3), f);
+		if(strcmp(line3, "\n") != 0) {
+			strcpy(line1, line2);
+			strcpy(line2, line3);
+			fgets(line3, sizeof(line3), f);
+			while(strcmp(line3, "\n") != 0) {
+				fgets(line3, sizeof(line3),f);
+			}
+		}
+		urls.flags = CL_PHISH_ALL_CHECKS;
+		urls.link_type = 0;
+		string_init_c(&urls.realLink, line1);
+		string_init_c(&urls.displayLink, line2);
+		string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
+		urls.realLink.refcount=-1;
+		urls.displayLink.refcount=-1;
+		int rc = phishingCheck(ctx->engine, &urls);
+		//printf("%d\n",rc);
+	}
+	fclose(f);
+	return 0;
+#endif
 	for(i=0;i<hrefs->count;i++)
 		if(hrefs->contents[i]) {
 			struct url_check urls;
@@ -928,44 +913,7 @@ int phishing_init(struct cl_engine* engine)
 		return CL_EFORMAT;
 	}
 
-	if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	if(build_regex(&pchk->preg_tld,tld_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_fragmentaddress1,URI_fragmentaddress2")) *$");
-	if(!url_regex || build_regex(&pchk->preg,url_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free(url_regex);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	free(url_regex);
-	realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_path1,URI_fragmentaddress2")) *$");
-	if(!realurl_regex || build_regex(&pchk->preg_realurl, realurl_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free_regex(&pchk->preg);
-		free(url_regex);
-		free(realurl_regex);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	free(realurl_regex);
 	if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free_regex(&pchk->preg);
-		free_regex(&pchk->preg_realurl);
 		free(pchk);
 		engine->phishcheck = NULL;
 		return CL_EFORMAT;
@@ -980,12 +928,8 @@ void phishing_done(struct cl_engine* engine)
 	struct phishcheck* pchk = engine->phishcheck;
 	cli_dbgmsg("Cleaning up phishcheck\n");
 	if(pchk && !pchk->is_disabled) {
-		free_regex(&pchk->preg);
 		free_regex(&pchk->preg_hexurl);
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
 		free_regex(&pchk->preg_numeric);
-		free_regex(&pchk->preg_realurl);
 		pchk->is_disabled = 1;
 	}
 	whitelist_done(engine);
@@ -998,22 +942,165 @@ void phishing_done(struct cl_engine* engine)
 	cli_dbgmsg("Phishcheck cleaned up\n");
 }
 
+
+/*ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_alpha[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*!"$%&'()*,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_xalpha_nodot[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*!"$%&'()*+,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_xpalpha_nodot[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static inline int validate_uri_xalphas_nodot(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	for(p=start;p < (const unsigned char*)end; p++) {
+		if(!URI_xalpha_nodot[*p])
+			return 0;
+	}
+	return 1;
+}
+
+static inline int validate_uri_xpalphas_nodot(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	for(p=start;p < (const unsigned char*)end; p++) {
+		if(!URI_xpalpha_nodot[*p])
+			return 0;
+	}
+	/* must have at least on char */
+	return p > (const unsigned char*)start;
+}
+
+
+static inline int validate_uri_ialpha(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	if(start >= end || !URI_alpha[*p])
+		return 0;
+	return validate_uri_xalphas_nodot(start + 1, end);
+}
+
 /*
  * Only those URLs are identified as URLs for which phishing detection can be performed.
  */
-static int isURL(const struct phishcheck* pchk,const char* URL)
+static int isURL(const struct phishcheck* pchk,const char* URL, int accept_anyproto)
 {
-	return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
+	const char *start = NULL, *p, *q;
+	if(!URL)
+		return 0;
+
+	switch (URL[0]) {
+		case 'h':
+			if (strncmp(URL, https, https_len) == 0)
+				start = URL + https_len;
+			else if (strncmp(URL, http, http_len) == 0)
+				start = URL + http_len;
+			break;
+		case 'f':
+		       if (strncmp(URL, ftp, ftp_len) == 0)
+			       start = URL + ftp_len;
+		       break;
+		case 'm':
+		       if (strncmp(URL, mailto_proto, mailto_proto_len) == 0)
+			       start = URL + mailto_proto_len;
+		       break;
+	}
+	if(start) {
+		if(start[0] == '\0')
+			return 0;/* empty URL */
+		/* has a valid protocol, it is a URL */
+		return 1;
+	}
+	start = accept_anyproto ?  strchr(URL, ':') : NULL;
+	if(start) {
+		/* validate URI scheme */
+		if(validate_uri_ialpha(URL, start)) {
+			if(start[1] == '/' && start[2] == '/')
+				start += 3; /* skip :// */
+			else
+				start++;
+		}
+		else
+			start = URL; /* scheme invalid */
+	} else
+		start = URL;
+	p = start;
+	do {
+		q = strchr(p, '.');
+		if(q) {
+			if(!validate_uri_xpalphas_nodot(p, q))
+				return 0;
+			p = q+1;
+		}
+	} while(q);
+	if (p == start) /* must have at least one dot in the URL */
+		return 0;
+	return !!in_tld_set(p, strlen(p));
 }
 
 /*
  * Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
  * This prevents false positives with outbind:// and blocked:: links.
  */
+#if 0
 static int isRealURL(const struct phishcheck* pchk,const char* URL)
 {
 	return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
 }
+#endif
 
 static int isNumericURL(const struct phishcheck* pchk,const char* URL)
 {
@@ -1139,7 +1226,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
 	cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data,
 		urls->displayLink.data);
 
-	if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) ) &&
+	if((!isURL(pchk, urls->displayLink.data, 1) || !isURL(pchk, urls->realLink.data, 0) ) &&
 			( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
 			  !(phishy&PHISHY_NUMERIC_IP))) {
 		cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
diff --git a/libclamav/phishcheck.h b/libclamav/phishcheck.h
index cb4bff581..822d3ff64 100644
--- a/libclamav/phishcheck.h
+++ b/libclamav/phishcheck.h
@@ -44,10 +44,6 @@ struct string {
 };
 
 struct phishcheck {
-	regex_t preg;
-	regex_t preg_realurl;
-	regex_t preg_tld;
-	regex_t preg_cctld;
 	regex_t preg_numeric;
 	regex_t preg_hexurl;
 	int      is_disabled;
diff --git a/libclamav/readdb.c b/libclamav/readdb.c
index d95d80a17..ef77469d0 100644
--- a/libclamav/readdb.c
+++ b/libclamav/readdb.c
@@ -1839,6 +1839,12 @@ int cl_build(struct cl_engine *engine)
 	}
     }
 
+    if((ret = cli_build_regex_list(engine->whitelist_matcher))) {
+	    return ret;
+    }
+    if((ret = cli_build_regex_list(engine->domainlist_matcher))) {
+	    return ret;
+    }
     cli_md5db_build(engine->md5_mdb);
     cli_freeign(engine);
     cli_dconf_print(engine->dconf);
diff --git a/libclamav/regex_list.c b/libclamav/regex_list.c
index d33bc7b10..f4400f83e 100644
--- a/libclamav/regex_list.c
+++ b/libclamav/regex_list.c
@@ -42,6 +42,8 @@
 
 #include <limits.h>
 #include <sys/types.h>
+#include <assert.h>
+
 
 #include "regex/regex.h"
 
@@ -53,152 +55,471 @@
 #include "matcher.h"
 #include "str.h"
 #include "readdb.h"
+#include "jsparse/textbuf.h"
 
-/*Tree*/
-enum token_op_t {OP_CHAR,OP_STDCLASS,OP_CUSTOMCLASS,OP_DOT,OP_LEAF,OP_ROOT,OP_PARCLOSE};
-typedef unsigned char* char_bitmap_p;
-/*
- *
- * OP_CHAR: 1 character, c = character
- * complex stuff:
- * OP_STDCLASS: standard character class, c = char class, class: 1<<(index into std_class of class name)
- * OP_CUSTOMCLASS: custom character class, first pointer in ptr array is a pointer to the bitmap table for this class
- * OP_DOT: single . matching any character except \n
- * OP_LEAF: this is a leaf node, reinterpret structure
- */
-struct tree_node {
-	struct tree_node* next;/* next regex/complex sibling, or parent, if no more siblings , can't be NULL except for root node*/
+/* ------- parse a regular expression, and extract a static suffix ------*/
+enum node_type {
+	root=0,
+	concat,
+	alternate, /* | */
+	optional,/* ?, * */
+	leaf, /* a character */
+	leaf_class /* character class */
+	/* (x)+ is transformed into (x)*(x) */
+};
+
+struct node {
+	enum node_type type;
+	struct node *parent;
 	union {
-		struct tree_node** children;/* alternatives nr. of children, followed by (a null pointer terminated) regex leaf node pointers) */
-		char_bitmap_p* bitmap;
-		struct leaf_info*  leaf;
+		struct {
+			struct node* left;
+			struct node* right;
+		} children;
+		uint8_t*    leaf_class_bitmap;
+		uint8_t     leaf_char;
 	} u;
-	enum token_op_t op;
-	unsigned char c;
-	char alternatives;/* number of (non-regex) children of node, i.e. sizeof(children)*/
-	char listend;/* no more siblings, next pointer is pointer to parent*/
 };
 
-struct leaf_info {
-	char* info;/* what does it mean that we reached the leaf...*/
-	regex_t* preg;/* this is NULL if leaf node, and non-regex*/
-};
-
-/* Character classes */
-static const char* std_class[] = {
-	"[:alnum:]",
-	"[:digit:]",
-	"[:punct:]",
-	"[:alpha:]",
-	"[:graph:]",
-	"[:space:]",
-	"[:blank:]",
-	"[:lower:]", 
-	"[:upper:]",
-	"[:cntrl:]",
-	"[:print:]",
-	"[:xdigit:]"
-	/* don't change the order of these strings, unless you change them in generate_tables.c too, and regenerate the tables*/
-};
-
-
-#define STD_CLASS_CNT sizeof(std_class)/sizeof(std_class[0])
-
-/* generated by contrib/phishing/generate_tables.c */
-static const unsigned char char_class_bitmap[STD_CLASS_CNT][32] = {
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03, 
-         0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0x00, 0xfc, 
-         0x01, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x78, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff, 
-         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x3e, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x02, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0xfe, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 
-         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-
-        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x03, 
-         0x7e, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
-};
-
-static const unsigned short int char_class[256] = {
-        0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x260, 0x220, 0x220, 0x220, 0x220, 0x200, 0x200, 
-        0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 
-        0x460, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 
-        0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0xc13, 0x414, 0x414, 0x414, 0x414, 0x414, 0x414, 
-        0x414, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0xd19, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 
-        0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x519, 0x414, 0x414, 0x414, 0x414, 0x414, 
-        0x414, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0xc99, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 
-        0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x499, 0x414, 0x414, 0x414, 0x414, 0x200, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 
-        0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000
-};
-
-static const size_t std_class_cnt =  sizeof(std_class)/sizeof(std_class[0]);
-
 /* Prototypes */
-static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info,int hostOnly);
-static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info);
-static void destroy_tree(struct regex_matcher* matcher);
-static struct tree_node* tree_root_alloc(void);
-static int build_regex_list(struct regex_matcher* matcher);
-static void stack_destroy(struct node_stack* stack);
+static size_t reverse_string(char *pattern);
+static int add_pattern(struct regex_matcher *matcher, char *pattern);
+static int add_pattern_suffix(struct regex_matcher *matcher, char *suffix, size_t suffix_len, struct regex_list *regex);
+static int add_static_pattern(struct regex_matcher *matcher, char* pattern);
+static int build_suffixtree_descend(struct regex_matcher *matcher, struct regex_list *regex, struct node *n, struct text_buffer *buf);
+/* ---------- */
+
+static uint8_t dot_bitmap[32];
+
+static struct node* make_node(enum node_type type, struct node *left, struct node *right)
+{
+	struct node *n;
+	if(type == concat) {
+		if(left == NULL)
+			return right;
+		if(right == NULL)
+			return left;
+	}
+	n = cli_malloc(sizeof(*n));
+	if(!n)
+		return NULL;
+	n->type = type;
+	n->parent = NULL;
+	n->u.children.left = left;
+	n->u.children.right = right;
+	if(left)
+		left->parent = n;
+	if(right)
+		right->parent = n;
+	return n;
+}
+
+static struct node *dup_node(struct node *p)
+{
+	struct node *node_left, *node_right;
+	struct node *d;
+
+	if(!p)
+		return NULL;
+	d = cli_malloc(sizeof(*d));
+	if(!d)
+		return NULL;
+	d->type = p->type;
+	d->parent = NULL;
+	switch(p->type) {
+		case leaf:
+			d->u.leaf_char = p->u.leaf_char;
+			break;
+		case leaf_class:
+			d->u.leaf_class_bitmap = cli_malloc(32);
+			if(!d->u.leaf_class_bitmap)
+				return NULL;
+			memcpy(d->u.leaf_class_bitmap, p->u.leaf_class_bitmap, 32);
+			break;
+		default:
+			node_left = dup_node(p->u.children.left);
+			node_right = dup_node(p->u.children.right);
+			d->u.children.left = node_left;
+			d->u.children.right = node_right;
+			if(node_left)
+				node_left->parent = d;
+			if(node_right)
+				node_right->parent = d;
+			break;
+	}
+	return d;
+}
+
+static struct node *make_charclass(uint8_t *bitmap)
+{
+	struct node *v = cli_malloc(sizeof(*v));
+	if(!v)
+		return NULL;
+	v->type = leaf_class;
+	v->parent = NULL;
+	v->u.leaf_class_bitmap = bitmap;
+	return v;
+}
+
+static struct node *make_leaf(char c)
+{
+	struct node *v = cli_malloc(sizeof(*v));
+	if(!v)
+		return NULL;
+	v->type = leaf;
+	v->parent = NULL;
+	v->u.leaf_char = c;
+	return v;
+}
+
+static void destroy_tree(struct node *n)
+{
+	if(!n)
+		return;
+	switch(n->type) {
+		case concat:
+		case alternate:
+		case optional:
+			destroy_tree(n->u.children.left);
+			destroy_tree(n->u.children.right);
+			break;
+		case leaf_class:
+			if(n->u.leaf_class_bitmap != dot_bitmap)
+			  free(n->u.leaf_class_bitmap);
+			break;
+		case root:
+		case leaf:
+			break;
+	}
+	free(n);
+}
+
+static uint8_t* parse_char_class(const char *pat, size_t *pos)
+{
+	unsigned char range_start=0;
+	int hasprev = 0;
+	uint8_t* bitmap = cli_malloc(32);
+	if(!bitmap)
+		return NULL;
+	if (pat[*pos]=='^') {
+		memset(bitmap,0xFF,32);/*match chars not in brackets*/
+		++*pos;
+	}
+	else
+		memset(bitmap,0x00,32);
+	do {
+		/* literal ] can be first character, so test for it at the end of the loop, for example: []] */
+		if (pat[*pos]=='-' && hasprev) {
+			/* it is a range*/
+			unsigned char range_end;
+			unsigned int c;
+			assert(range_start);
+			++*pos;
+			if (pat[*pos]=='[')
+				if (pat[*pos+1]=='.') {
+					/* collating sequence not handled */
+					free(bitmap);
+					/* we are parsing the regex for a
+					 * filter, be conservative and
+					 * tell the filter that anything could
+					 * match here */
+					while(pat[*pos] != ']') ++*pos;
+					++*pos;
+					while(pat[*pos] != ']') ++*pos;
+					return dot_bitmap;
+				}
+				else
+					range_end = pat[*pos];
+			else
+				range_end = pat[*pos];
+			for(c=range_start+1;c<=range_end;c++)
+				bitmap[c>>3] ^= 1<<(c&0x7);
+			hasprev = 0;
+		}
+		else if (pat[*pos]=='[' && pat[*pos]==':') {
+			/* char class */
+			free(bitmap);
+			while(pat[*pos] != ']') ++*pos;
+			++*pos;
+			while(pat[*pos] != ']') ++*pos;
+			return dot_bitmap;
+		} else {
+			bitmap[pat[*pos]>>3] ^= 1<<(pat[*pos]&0x7);
+			++*pos;
+			range_start = pat[*pos];
+			hasprev = 1;
+		}
+	} while(pat[*pos]!=']');
+	return bitmap;
+}
+
+static struct node* parse_regex(const char *p, size_t *last)
+{
+	struct node *v = NULL;
+	struct node *right;
+	struct node *tmp;
+
+	while(p[*last] != '$' && p[*last] != '\0') {
+		switch(p[*last]) {
+			case '|':
+				++*last;
+				right = parse_regex(p, last);
+				v = make_node(alternate, v, right);
+				if(!v)
+					return NULL;
+				break;
+			case '*':
+			case '?':
+				v = make_node(optional, v, NULL);
+				if(!v)
+					return NULL;
+				++*last;
+				break;
+			case '+':
+				/* (x)* */
+				tmp = make_node(optional, v, NULL);
+				if(!tmp)
+					return NULL;
+				/* (x) */
+				right = dup_node(v);
+				if(!right)
+					return NULL;
+				/* (x)*(x) => (x)+ */
+				v = make_node(concat, tmp, right);
+				if(!v)
+					return NULL;
+				++*last;
+				break;
+			case '(':
+				++*last;
+				right = parse_regex(p, last);
+				if(!right)
+					return NULL;
+				++*last;
+				v = make_node(concat, v, right);
+				break;
+			case ')':
+				return v;
+			case '.':
+				right = make_charclass(dot_bitmap);
+				if(!right)
+					return NULL;
+				v = make_node(concat, v, right);
+				if(!v)
+					return NULL;
+				++*last;
+				break;
+			case '[':
+				right = make_charclass( parse_char_class(p, last) );
+				if(!right)
+					return NULL;
+				v = make_node(concat, v, right);
+				if(!v)
+					return NULL;
+			case '\\':
+				/* next char is escaped, advance pointer
+				 * and let fall-through handle it */
+				++*last;
+			default:
+				right = make_leaf(p[*last]);
+				v = make_node(concat, v, right);
+				if(!v)
+					return NULL;
+				++*last;
+				break;
+		}
+	}
+	return v;
+}
+
+#define BITMAP_HASSET(b, i) (b[i>>3] & (1<<(i&7)))
+
+static int build_suffixtree_ascend(struct regex_matcher *matcher, struct regex_list *regex, struct node *n, struct text_buffer *buf, struct node *prev)
+{
+	size_t i;
+	while(n) {
+		struct node *q = n;
+		switch(n->type) {
+			case root:
+				textbuffer_putc(buf, '\0');
+				if(add_pattern_suffix(matcher, buf->data, buf->pos, regex) < 0)
+					return CL_EMEM;
+				return 0;
+			case leaf:
+				textbuffer_putc(buf, n->u.leaf_char);
+				n = n->parent;
+				break;
+			case leaf_class:
+				if(memcmp(n->u.leaf_class_bitmap, dot_bitmap, sizeof(dot_bitmap)) == 0) {
+					textbuffer_putc(buf, '\0');
+					if(add_pattern_suffix(matcher, buf->data, buf->pos, regex) < 0)
+						return CL_EMEM;
+					return 0;
+				}
+				for(i=0;i<255;i++) {
+					if(BITMAP_HASSET(n->u.leaf_class_bitmap, i)) {
+						size_t pos;
+						pos = buf->pos;
+						textbuffer_putc(buf, i);
+						if(build_suffixtree_ascend(matcher, regex, n->parent, buf, n) < 0)
+							return CL_EMEM;
+						buf->pos = pos;
+					}
+				}
+				return 0;
+			case concat:
+				if(prev != n->u.children.left) {
+					if(build_suffixtree_descend(matcher, regex, n->u.children.left, buf) < 0)
+						return CL_EMEM;
+					/* we're done here, descend will call
+					 * ascend if needed */
+					return 0;
+				} else {
+					n = n->parent;
+				}
+				break;
+			case alternate:
+				n = n->parent;
+				break;
+			case optional:
+				textbuffer_putc(buf, '\0');
+				if(add_pattern_suffix(matcher, buf->data, buf->pos, regex) < 0)
+					return CL_EMEM;
+				return 0;
+		}
+		prev = q;
+	}
+	return 0;
+}
+
+static int build_suffixtree_descend(struct regex_matcher *matcher, struct regex_list *regex, struct node *n, struct text_buffer *buf)
+{
+	size_t pos;
+	while(n && n->type == concat) {
+		n = n->u.children.right;
+	}
+	if(!n)
+		return 0;
+	/* find out end of the regular expression,
+	 * if it ends with a static pattern */
+	switch(n->type) {
+		case alternate:
+			/* save pos as restart point */
+			pos = buf->pos;
+			if(build_suffixtree_descend(matcher, regex, n->u.children.left, buf) < 0)
+				return CL_EMEM;
+			buf->pos = pos;
+			if(build_suffixtree_descend(matcher, regex, n->u.children.right, buf) < 0)
+				return CL_EMEM;
+			buf->pos = pos;
+			break;
+		case optional:
+			textbuffer_putc(buf, '\0');
+			if(add_pattern_suffix(matcher, buf->data, buf->pos, regex) < 0)
+				return CL_EMEM;
+			return 0;
+		case leaf:
+		case leaf_class:
+			if(build_suffixtree_ascend(matcher, regex, n, buf, NULL) < 0)
+			        return CL_EMEM;
+			return 0;
+		default:
+			break;
+	}
+	return 0;
+}
+
+
+/* ----- shift-or filtering -------------- */
+
+#define BITMAP_CONTAINS(bmap, val) ((bmap)[(val) >> 5] & (1 << ((val) & 0x1f)))
+#define BITMAP_INSERT(bmap, val) ((bmap)[(val) >> 5] |= (1 << ((val) & 0x1f)))
+
+static void SO_init(struct filter *m)
+{
+	memset(m->B, ~0, sizeof(m->B));
+	memset(m->end, ~0, sizeof(m->end));
+	memset(m->end_fast, ~0, sizeof(m->end_fast));
+}
+
+/* because we use uint32_t */
+#define MAXSOPATLEN 32
+
+/* merge another pattern into the filter
+ * add('abc'); add('bcd'); will match [ab][bc][cd] */
+static int SO_preprocess_add(struct filter *m, const unsigned char *pattern, size_t len)
+{
+	uint16_t q;
+	uint8_t j;
+
+	/* cut length, and make it modulo 2 */
+	if(len > MAXSOPATLEN) {
+		len = MAXSOPATLEN;
+	} else {
+		/* we use 2-grams, must be multiple of 2 */
+		len = len & ~1;
+	}
+	if(!len)
+		return 0;
+
+	/* Shift-Or like preprocessing */
+	for(j=0;j < len-1;j++) {
+		/* use overlapping 2-grams. We need them overlapping because matching can start at any position */
+		q = cli_readint16( &pattern[j] );
+		m->B[q] &= ~(1 << j);
+	}
+	/* we use variable length patterns, use last character to mark pattern end,
+	 * can lead to false positives.*/
+	/* mark that at state j, the q-gram q can end the pattern */
+	if(j) {
+		j--;
+		m->end[q] &= ~(1 << j);
+		m->end_fast[pattern[j]] &= (1<<j);
+	}
+	return 0;
+}
+
+/* this is like a FSM, with multiple active states at the same time.
+ * each bit in "state" means an active state, when a char is encountered
+ * we determine what states can remain active.
+ * The FSM transition rules are expressed as bit-masks */
+static long SO_search(const struct filter *m, const unsigned char *data, unsigned long len)
+{
+	size_t j;
+	uint32_t state = ~0;
+	const uint32_t *B = m->B;
+	const uint32_t *End = m->end;
+	const uint32_t *EndFast = m->end_fast;
+
+	if(!len) return -1;
+	/* Shift-Or like search algorithm */
+	for(j=0;j < len-1; j++) {
+		const uint16_t q0 = cli_readint16( &data[j] );
+		uint32_t match_end;
+		state = (state << 1) | B[q0];
+		/* state marks with a 0 bit all active states
+		 * End[q0] marks with a 0 bit all states where the q-gram 'q' can end a pattern
+		 * if we got two 0's at matching positions, it means we encountered a pattern's end */
+		match_end = state | EndFast[data[j+1]];
+		if((match_end != 0xffffffff) && (state | End[q0]) !=  0xffffffff) {
+			/* note: we rely on short-circuit eval here, we only evaluate and fetch End[q0], if
+			 * end_fast has matched. This reduces cache pressure on End[], and allows us to keep the working
+			 * set inside L2 */
+
+			/* if state is reachable, and this character can finish a pattern, assume match */
+			/* to reduce false positives check if qgram can finish the pattern */
+			/* return position of probable match */
+			/* find first 0 starting from MSB, the position of that bit as counted from LSB, is the length of the
+			 * longest pattern that could match */
+			return j >= MAXSOPATLEN  ? j - MAXSOPATLEN : 0;
+		}
+	}
+	/* no match */
+	return -1;
+}
+
+/* ----------------------------------------------------------- */
 
-#ifndef NDEBUG
-void dump_tree(struct tree_node* root);
-#endif
 
 #define MATCH_SUCCESS 0 
 #define MATCH_FAILED  -1
@@ -233,6 +554,43 @@ static inline size_t get_char_at_pos_with_skip(const struct pre_fixup_info* info
 	return (pos>0 && !str[realpos]) ? '\0' : str[realpos>0?realpos-1:0];
 }
 
+static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
+{
+	char c;
+	const char *matched;
+	size_t match_len;
+
+	if(!regex || !regex->pattern)
+		return 0;
+	match_len = strlen(regex->pattern);
+	if(((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len+1))==' ' || c=='\0' || c=='/' || c=='?') &&
+			(match_len == buffer_len || /* full match */
+			 (match_len < buffer_len &&
+			  ((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) )
+			 /* subdomain matched*/)) {
+		cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
+		cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
+		if(real_len >= match_len + 1) {
+			const size_t pos = real_len - match_len - 1;
+			if(real_url[pos] != '.') {
+				/* we need to shift left, and insert a '.'
+				 * we have an extra '.' at the beginning inserted by get_host to have room,
+				 * orig_real_url has to be used here, 
+				 * because we want to overwrite that extra '.' */
+				size_t orig_real_len = strlen(orig_real_url);
+				cli_dbgmsg("No dot here:%s\n",real_url+pos);
+				real_url = orig_real_url;
+				memmove(real_url, real_url+1, orig_real_len-match_len-1);
+				real_url[orig_real_len-match_len-1]='.';
+				cli_dbgmsg("After inserting .: %s\n", real_url);
+			}
+		}
+		return 1;
+	}
+	cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
+	return 0;
+}
+
 /*
  * @matcher - matcher structure to use
  * @real_url - href target
@@ -246,24 +604,28 @@ static inline size_t get_char_at_pos_with_skip(const struct pre_fixup_info* info
  * Do not send NULL pointers to this function!!
  *
  */
-int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,const char** info,int is_whitelist)
+int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,const char **info, int is_whitelist)
 {
 	char* orig_real_url = real_url;
-	massert(matcher);
-	massert(real_url);
-	massert(display_url);
-	massert(info);
+	const char *vinfo;
+	struct regex_list *regex;
+
+	assert(matcher);
+	assert(real_url);
+	assert(display_url);
+	*info = NULL;
 	if(!matcher->list_inited)
 		return 0;
-	massert(matcher->list_built);
+	assert(matcher->list_built);
 	/* skip initial '.' inserted by get_host */
 	if(real_url[0] == '.') real_url++;
 	if(display_url[0] == '.') display_url++;
 	{
 		size_t real_len    = strlen(real_url);
 		size_t display_len = strlen(display_url);
-		size_t buffer_len  = (hostOnly && !is_whitelist) ? real_len : real_len + display_len + 1 + (is_whitelist ? 1 : 0);
-		char*  buffer = cli_malloc(buffer_len+1);
+		size_t buffer_len  = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
+		char *buffer = cli_malloc(buffer_len+1);
+		char *bufrev;
 		size_t i;
 		int rc = 0;
 		struct cli_ac_data mdata;
@@ -272,61 +634,48 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
 			return CL_EMEM;
 
 		strncpy(buffer,real_url,real_len);
-		buffer[real_len]= (!is_whitelist && hostOnly) ? '\0' : ':';
+		buffer[real_len]= (!is_whitelist && hostOnly) ? '/' : ':';
 		if(!hostOnly || is_whitelist) {
 			strncpy(buffer+real_len+1,display_url,display_len);
-			if(is_whitelist)
-				buffer[buffer_len - 1] = '/';
-			buffer[buffer_len]=0;
 		}
+		buffer[buffer_len - 1] = '/';
+		buffer[buffer_len]=0;
 		cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
 
-		if(hostOnly) {
-			if((rc = cli_ac_initdata(&mdata, 0, AC_DEFAULT_TRACKLEN)))
-				return rc;
-			rc = 0;
+		if((rc = cli_ac_initdata(&mdata, 0, AC_DEFAULT_TRACKLEN)))
+			return rc;
 
-			for(i = 0; i < matcher->root_hosts_cnt; i++) {
-				/* doesn't need to match terminating \0*/
-				rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
-				cli_ac_freedata(&mdata);
-				if(rc) {
-					char c;
-					const char* matched = strchr(*info,':');
-					const size_t match_len = matched ? strlen(matched+1) : 0;
-					if(((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len+1))==' ' || c=='\0' || c=='/' || c=='?') &&
-						(match_len == buffer_len || /* full match */
-					        (match_len < buffer_len &&
-						((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) )
-						/* subdomain matched*/)) {
+		bufrev = cli_strdup(buffer);
+		if(!bufrev)
+			return CL_EMEM;
+		reverse_string(bufrev);
+		rc = SO_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
+		if(!rc) {
+			/* filter says this suffix doesn't match.
+			 * The filter has false positives, but no false
+			 * negatives */
+			return 0;
+		}
 
-						cli_dbgmsg("Got a match: %s with %s\n", buffer, *info);
-						cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
-						if(real_len >= match_len + 1) {
-							const size_t pos = real_len - match_len - 1;
-							if(real_url[pos] != '.') {
-								/* we need to shift left, and insert a '.'
-								 * we have an extra '.' at the beginning inserted by get_host to have room,
-								 * orig_real_url has to be used here, 
-								 * because we want to overwrite that extra '.' */
-								size_t orig_real_len = strlen(orig_real_url);
-								cli_dbgmsg("No dot here:%s\n",real_url+pos);
-								real_url = orig_real_url;
-								memmove(real_url, real_url+1, orig_real_len-match_len-1);
-								real_url[orig_real_len-match_len-1]='.';
-								cli_dbgmsg("After inserting .: %s\n", real_url);
-							}
-						}
-						break;
-					}
-					cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, *info, c);
-					rc=0;
+		rc = cli_ac_scanbuff((unsigned char*)bufrev,buffer_len, &vinfo, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
+		cli_ac_freedata(&mdata);
+
+		if(rc) {
+			/* TODO loop over multiple virusnames here */
+			regex = (struct regex_list*)vinfo;
+			do {
+				/* loop over multiple regexes corresponding to
+				 * this suffix */
+				if (!regex->preg.re_magic) {
+					/* we matched a static pattern */
+					rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
+				} else {
+					rc = !cli_regexec(&regex->preg, buffer, 0, NULL, 0);
 				}
-			}
-		} else
-			rc = 0;
-		if(!rc)
-			rc = match_node(hostOnly ? matcher->root_regex_hostonly : matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS;
+				if(rc) *info = regex->pattern;
+				regex = regex->nxt;
+			 } while(!rc && regex);
+		}
 		free(buffer);
 		if(!rc)
 			cli_dbgmsg("Lookup result: not in regex list\n");
@@ -336,56 +685,6 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
 	}
 }
 
-/* node stack */
-#define NODE_STACK_INITIAL 1024
-#define NODE_STACK_GROW    4096
-/* Initialize @stack */
-static int stack_init(struct node_stack* stack)
-{
-	massert(stack);
-
-	stack->cnt = 0;
-	stack->capacity = NODE_STACK_INITIAL;
-	stack->data = cli_malloc(stack->capacity * sizeof(*stack->data));
-	if(!stack->data)
-		return CL_EMEM;
-	else
-		return CL_SUCCESS;
-}
-
-/* Reset @stack pointer, but don't realloc */
-static void stack_reset(struct node_stack* stack)
-{
-	massert(stack);
-
-	stack->cnt = 0;
-}
-
-/* Push @node on @stack, growing it if necessarry */
-static int stack_push(struct node_stack* stack,struct tree_node* node)
-{
-	massert(stack);
-	massert(stack->data);
-
-	if(stack->cnt == stack->capacity) {
-		stack->capacity += NODE_STACK_GROW;
-		stack->data = cli_realloc2(stack->data,stack->capacity*sizeof(*stack->data));
-		if(!stack->data)
-			return CL_EMEM;
-	}
-	stack->data[stack->cnt++] = node;
-	return CL_SUCCESS;
-}
-
-/* Pops node from @stack, doesn't realloc */
-static struct tree_node* stack_pop(struct node_stack* stack)
-{
-	massert(stack);
-	massert(stack->data);
-	massert(stack->cnt);/*don't pop from empty stack */
-
-	return stack->cnt ? stack->data[--stack->cnt] : NULL;
-}
 
 /* Initialization & loading */
 /* Initializes @matcher, allocating necesarry substructures */
@@ -393,90 +692,21 @@ int init_regex_list(struct regex_matcher* matcher)
 {
 	int rc;
 
-	massert(matcher);
-	matcher->list_inited = 0;
- 	matcher->root_hosts_cnt = 0;
- 	matcher->root_hosts = NULL;
- 	matcher->root_hosts_cnt = 0;
-
-	matcher->root_regex = tree_root_alloc();
-	if(!matcher->root_regex) {
-		return CL_EMEM;
-	}
-
-	matcher->root_regex_hostonly = tree_root_alloc();
-	if(!matcher->root_regex_hostonly) {
-		free(matcher->root_regex);
-		return CL_EMEM;
-	}
-
-	if(( rc = stack_init(&matcher->node_stack) )) {
-		free(matcher->root_regex_hostonly);
-		free(matcher->root_regex);
-		return rc;
-	}
-	if(( rc = stack_init(&matcher->node_stack_alt) )) {
-		free(matcher->root_regex_hostonly);
-		free(matcher->root_regex);
-		stack_destroy(&matcher->node_stack);
-		return rc;
-	}
+	assert(matcher);
+	memset(matcher, 0, sizeof(*matcher));
 
 	matcher->list_inited=1;
-	matcher->list_built=1;/* its empty, but pretend its built, so that load_ will realloc root_hosts */
+	matcher->list_built=0;
 	matcher->list_loaded=0;
 
+	hashtab_init(&matcher->suffix_hash, 10);
+	if((rc = cli_ac_init(&matcher->suffixes, 2, 32))) {
+		return rc;
+	}
+	SO_init(&matcher->filter);
 	return CL_SUCCESS;
 }
 
-/* inserts @pattern into @root, using ac-matcher 
- * although the name might be confusing, @pattern is not a regex!*/
-static int add_regex_list_element(struct cli_matcher* root,const char* pattern,char* info)
-{
-       int ret;
-       struct cli_ac_patt *new = cli_calloc(1,sizeof(*new));
-       size_t len,i;
-
-       if(!new)
-	       return CL_EMEM;
-       massert(root);
-       massert(pattern);
-
-       len = strlen(pattern);
-       /* need not to match \0 too */
-       new->rtype = 0;
-       new->type = 0;
-       new->sigid = 0;
-       new->parts = 0;
-       new->partno = 0;
-       new->mindist = 0;
-       new->maxdist = 0;
-       new->offset = 0;
-       new->target = 0;
-       new->length = len;
-       new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
-       if(new->length > root->maxpatlen)
-               root->maxpatlen = new->length;
-
-       new->pattern = cli_malloc(sizeof(new->pattern[0])*len);
-       if(!new->pattern) {
-	       free(new);
-	       return CL_EMEM;
-       }
-       for(i=0;i<len;i++)
-	       new->pattern[i]=pattern[i];/*new->pattern is short int* */
-
-	
-       new->virname = cli_strdup(info);
-       if((ret = cli_ac_addpatt(root,new))) {
-	       free(new->virname);
-               free(new->pattern);
-               free(new);
-               return ret;
-       }
-       return CL_SUCCESS;
-}
-
 static int functionality_level_check(char* line)
 {
 	char* ptmin;
@@ -527,14 +757,10 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
 	int rc,line=0;
 	char buffer[FILEBUFF];
 
-	massert(matcher);
+	assert(matcher);
 
 	if(matcher->list_inited==-1)
 		return CL_EMALFDB; /* already failed to load */
-/*	if(matcher->list_loaded) {
-		cli_warnmsg("Regex list has already been loaded, ignoring further requests for load\n");
-		return CL_SUCCESS;
-	}*/
 	if(!fd && !dbio) {
 		cli_errmsg("Unable to load regex list (null file)\n");
 		return CL_EIO;
@@ -548,7 +774,6 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
 			fatal_error(matcher);
 			return rc;
 		}
-		/*atexit(regex_list_done); TODO: destroy this in manager.c */
 	}
 	/*
 	 * Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
@@ -573,11 +798,13 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
 	while(cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
 		char* pattern;
 		char* flags;
+		size_t pattern_len;
+
 		cli_chomp(buffer);
 		if(!*buffer)
 			continue;/* skip empty lines */
 
-		if(functionality_level_check(buffer)) 
+		if(functionality_level_check(buffer))
 			continue;
 
 		line++;
@@ -591,83 +818,39 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
 		flags = buffer+1;
 		pattern++;
 
-		if(is_whitelist) {
-			const size_t pattern_len = strlen(pattern);
-			if(pattern_len < FILEBUFF) {
-				pattern[pattern_len] = '/';
-				pattern[pattern_len+1] = '\0';
-			}
-			else {
-				cli_errmsg("Overlong regex line %d\n",line);
-				fatal_error(matcher);
-				return CL_EMALFDB;
-			}
+		pattern_len = strlen(pattern);
+		if(pattern_len < FILEBUFF) {
+			pattern[pattern_len] = '/';
+			pattern[pattern_len+1] = '\0';
+		}
+		else {
+			cli_errmsg("Overlong regex line %d\n",line);
+			fatal_error(matcher);
+			return CL_EMALFDB;
 		}
 
-		if((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {/*regex*/
-			if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags, buffer[0] == 'Y') ))
+		if((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
+			/* regex for hostname*/
+			if (( rc = add_pattern(matcher, pattern) ))
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
 		}
-		else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {/*matches displayed host*/
-			struct cli_matcher* root;
- 			if(matcher->list_built) {
- 				struct cli_matcher* old_hosts = matcher->root_hosts;
- 				matcher->root_hosts_cnt++;
- 
- 				matcher->root_hosts = cli_realloc(matcher->root_hosts, matcher->root_hosts_cnt * sizeof(*matcher->root_hosts));
- 				if(!matcher->root_hosts) {
- 					matcher->root_hosts = old_hosts;/* according to manpage this must still be valid*/
- 					return CL_EMEM;
-				} 
-
-				root = &matcher->root_hosts[matcher->root_hosts_cnt-1];
- 				memset(root, 0, sizeof(struct cli_matcher));
-
-				cli_dbgmsg("regex_list: Initialising AC pattern matcher\n");
-				if((rc = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
-					/* no need to free previously allocated memory here */
-					cli_errmsg("regex_list: Can't initialise AC pattern matcher\n");
-					return rc;
-				}
- 				matcher->list_built = 0;
- 			}
-			else {
-				root = &matcher->root_hosts[matcher->root_hosts_cnt-1];
-			}
- 			if(( rc = add_regex_list_element(root,pattern,flags) ))
+		else if( ( buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
+			/*matches displayed host*/
+			if (( rc = add_static_pattern(matcher, pattern) ))
 				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
 		}
 		else {
 			return CL_EMALFDB;
-			/* this is useless, we have host, and regex matches
-			if(( rc = add_regex_list_element(matcher->root_urls,pattern,flags) ))
-				return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;*/
 		}
 	}
 	matcher->list_loaded = 1;
-	if(( rc = build_regex_list(matcher) ))
-		return rc;
 
-#ifndef NDEBUG
-/*			dump_tree(matcher->root_regex);*/
-#endif
-	if(!matcher->list_built) {
-		cli_errmsg("Regex list not loaded: build failed!\n");
-		fatal_error(matcher);
-		return CL_EMALFDB;
-	}
-	regex_list_cleanup(matcher);
 	return CL_SUCCESS;
 }
 
 
-static struct tree_node ** tree_node_get_children(const struct tree_node* node)
-{
-	return node->op==OP_CUSTOMCLASS ? (node->u.children[1] ? node->u.children+1 : NULL) :node->u.children;
-}
-
 /* Build the matcher list */
-static int build_regex_list(struct regex_matcher* matcher)
+int cli_build_regex_list(struct regex_matcher* matcher)
 {
 	int rc;
 	if(!matcher->list_inited || !matcher->list_loaded) {
@@ -675,9 +858,9 @@ static int build_regex_list(struct regex_matcher* matcher)
 		return -1;/*TODO: better error code */
 	}
 	cli_dbgmsg("Building regex list\n");
-	if(matcher->root_hosts)
-		if(( rc = cli_ac_buildtrie(&matcher->root_hosts[matcher->root_hosts_cnt-1]) ))
- 			return rc;
+	hashtab_free(&matcher->suffix_hash);
+	if(( rc = cli_ac_buildtrie(&matcher->suffixes) ))
+		return rc;
 	matcher->list_built=1;
 
 	return CL_SUCCESS;
@@ -686,864 +869,193 @@ static int build_regex_list(struct regex_matcher* matcher)
 /* Done with this matcher, free resources */
 void regex_list_done(struct regex_matcher* matcher)
 {
-	massert(matcher);
+	assert(matcher);
 
-	regex_list_cleanup(matcher);
 	if(matcher->list_loaded) {
-		if(matcher->root_hosts) {
-			size_t i;
-			for(i=0;i<matcher->root_hosts_cnt;i++) 
-				cli_ac_free(&matcher->root_hosts[i]);
-			free(matcher->root_hosts);
-			matcher->root_hosts=NULL;
+		size_t i;
+		/* TODO: call it, but be sure it won't free virname */
+		//cli_ac_free(&matcher->suffixes);
+		if(matcher->suffix_regexes) {
+			for(i=0;i<matcher->suffix_cnt;i++) {
+				struct regex_list *r = matcher->suffix_regexes[i];
+				while(r) {
+					cli_regfree(&r->preg);
+					r = r->nxt;
+				}
+			}
+			free(matcher->suffix_regexes);
+			matcher->suffix_regexes = NULL;
 		}
-
-		matcher->root_hosts_cnt=0;
+		hashtab_free(&matcher->suffix_hash);
 		matcher->list_built=0;
-		destroy_tree(matcher);
 		matcher->list_loaded=0;
 	}
 	if(matcher->list_inited) {
 		matcher->list_inited=0;
 	}
-	stack_destroy(&matcher->node_stack);
-	stack_destroy(&matcher->node_stack_alt);
-}
-
-/* Tree matcher algorithm */
-struct token_t
-{
-	union {
-		const unsigned char* start;
-		char_bitmap_p  bitmap;
-		unsigned char  c;
-	} u;
-	size_t len;
-	char   type;
-};
-
-enum {TOKEN_CHAR,TOKEN_DOT,TOKEN_PAR_OPEN,TOKEN_PAR_CLOSE,TOKEN_BRACKET,TOKEN_ALT,TOKEN_REGEX,TOKEN_DONE};
-
-static const unsigned char* getNextToken(const unsigned char* pat,struct token_t* token)
-{
-	massert(pat);
-	massert(token);
-
-	switch(*pat) {
-		case '\\':
-			token->type=TOKEN_CHAR;
-			token->u.c = *(++pat);
-			if(islower(token->u.c)) {
-				/* handle \n, \t, etc. */
-				char fmt[3] = {'\\', '\0', '\0'};
-				char c;
-
-				fmt[1] = token->u.c;
-				if(snprintf(&c,1,fmt)!=1) {
-					token->type=TOKEN_REGEX;
-					token->u.start = pat;
-				}
-				else
-					token->u.c=c;
-			}
-			token->len = 1;
-			break;
-		case '|':
-			token->type=TOKEN_ALT;
-			break;
-		case '*':
-		case '+':
-		case '?':
-		case '{':
-		case '}':
-			token->type=TOKEN_REGEX;
-			break;
-		case '[':
-			{
-			/*TODO: implement*/
-			/*see if it is something simple like a list of characters, a range, or negated ...*/
-			const unsigned char* old=pat++;/* save this in case we change our mind and decide this is too complicated for us to handle*/
-			unsigned char range_start=0;
-			int hasprev = 0;
-			char_bitmap_p bitmap = cli_malloc(32);
-			if(!bitmap)
-				return NULL;
-			if (*pat=='^') {
-				memset(bitmap,0xFF,32);/*match chars not in brackets*/
-				pat++;
-			}
-			else
-				memset(bitmap,0x00,32);
-			do {
-				/* literal ] can be first character, so test for it at the end of the loop, for example: []] */
-				if (*pat=='-' && hasprev) {
-					/* it is a range*/
-					unsigned char range_end;
-					unsigned int c;
-					massert(range_start);
-					pat++;
-					if (pat[0]=='[')
-						if (pat[1]=='.') {
-							if(pat[2]=='-' && pat[3]=='.' && pat[4]==']')
-								range_end = '-';
-							else {
-								/* this is getting complicated, bail out */
-								cli_warnmsg("confused about collating sequences in regex,bailing out");
-								pat=old;
-								token->type=TOKEN_REGEX;
-								break;
-							}
-						}
-						else 
-							range_end = *pat;
-					else
-						range_end = *pat;
-					for(c=range_start+1;c<=range_end;c++)
-						bitmap[c>>3] ^= 1<<(c&0x7);
-					hasprev = 0;
-				}
-				else if (pat[0]=='[' && pat[1]==':') {
-							const unsigned char* end;
-							int len,found=-1;
-							size_t i;
-
-							pat+=2;
-							end=(unsigned char*)strstr((const char*)pat,":]");
-							if(!end) {
-								cli_warnmsg("confused about std char class syntax regex,bailing out");
-								pat=old;
-								token->type=TOKEN_REGEX;
-								break;
-							}
-
-							len = end-pat;
-							for(i=0;i<std_class_cnt;i++)
-								if(!strncmp((const char*)pat,std_class[i],len)) {
-									found=i;
-									break;
-								}
-							if(found!=-1) {
-								for(i=0;i<256;i++)
-									if(char_class[i]&(1<<found))
-										bitmap[i>>3] ^= 1<<(i&0x7);
-							}
-							else {
-								/*unknown class*/
-								cli_warnmsg("confused about regex bracket expression, bailing out");
-								pat=old;
-								token->type=TOKEN_REGEX;
-								break;
-							}
-						}
-				else {
-					bitmap[*pat>>3] ^= 1<<(*pat&0x7);
-					pat++;
-					range_start = *pat;
-					hasprev = 1;
-				}
-			} while(*pat!=']');
-			/*TODO: see if this bitmap already exists, then reuse*/			
-			token->type = TOKEN_BRACKET;
-			token->u.bitmap = bitmap;
-			break;
-			}
-		case ']':
-			massert(0 && "Encountered ] without matching [");
-			/* bad state */
-			break;
-		case '.':
-			token->type=TOKEN_DOT;
-			break;
-		case '(':
-			token->type=TOKEN_PAR_OPEN;
-			break;
-		case ')':
-			token->type=TOKEN_PAR_CLOSE;
-			break;
-		default:
-			token->type=TOKEN_CHAR;
-			token->u.c = *pat;
-			token->len=1;
-			break;
-	}
-	return ++pat;
-}
-
-#define INITIAL_ALT_STACK 10
-#define ALT_STACK_GROW 20
-
-static const unsigned char* find_regex_start(const unsigned char* pat)
-{
-	struct token_t token;
-	/*TODO: find where the regex part begins, for ex:
-	 * abcd+, regex begins at 'd'
-	 * */
-	const unsigned char* last=NULL;
-	const unsigned char* tmp=NULL;
-	const unsigned char** altpositions = cli_malloc(INITIAL_ALT_STACK*sizeof(*altpositions));
-	size_t altpositions_capacity = INITIAL_ALT_STACK;
-	size_t altpositions_cnt = 0;
-	char lasttype = -1;
-	if(!altpositions)
-		return NULL;
-	massert(pat);
-
-	/* Try to parse pattern till special regex chars are encountered, that the tree-matcher doesn't handle, like: +,*,{}.
-	 * The tricky part is that once we encounter these, the previous 'atom' has to be passed on to the regex matcher, so we have to
-	 * back up to the last known good position
-	 * Example, if we have: abc(defg)+, then only abc can be handled by tree parser, so we have to return the position of (.
-	 * Another example: abc(defg|xyz|oz+|pdo), the last known good position is |, after xyz
-	 * TODO: what about open parantheses? maybe once we found a special char, we have top back out before the first (?
-	 * */
-	do {	
-		tmp = pat;
-		pat = getNextToken(pat,&token);
-		if(token.type!=TOKEN_REGEX) {
-			last = tmp;
-			lasttype = token.type;
-			if(token.type==TOKEN_BRACKET && token.u.bitmap)
-				free(token.u.bitmap);
-			if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) {
-				/* save this position on stack, succesfully parsed till here*/
-				if(altpositions_cnt && altpositions[altpositions_cnt-1][0]=='|')
-					/* encountered another alternate (|) operator, override previous | position stored */
-					altpositions[altpositions_cnt-1]=last;
-				else {
-					altpositions[altpositions_cnt++] = last;
-					if(altpositions_cnt == altpositions_capacity) {
-						altpositions_capacity += ALT_STACK_GROW;
-						altpositions = cli_realloc2(altpositions,altpositions_capacity*sizeof(*altpositions));
-						if(!altpositions)
-							return NULL;
-					}
-				}
-			} else if (lasttype==TOKEN_PAR_CLOSE) {
-				/* remove last stored position from stack, succesfully this last group */
-				altpositions_cnt--;
-				massert(altpositions_cnt>0);
-			}
-		}
-		else {
-			if(altpositions_cnt)
-				last = altpositions[0 /*altpositions_cnt-1*/];/*TODO: which index here?, see above TODO... */
-			/*last stored 'safe' position where no special (+,*,{}) regex chars were encountered*/
-		}
-	} while(*pat && token.type!=TOKEN_REGEX);
-	free(altpositions);
-	return *pat ? last : last+1;
-}
-
-static struct tree_node* tree_node_alloc(struct tree_node* next,char listend)
-{
-	struct tree_node* node = cli_malloc(sizeof(*node));
-	if(node) {
-		node->alternatives=0;
-		node->next=next;
-		node->listend=listend;
-		node->u.children=NULL;
-	}
-	return node;
-}
-
-static struct tree_node* tree_root_alloc(void)
-{
-	struct tree_node* root=tree_node_alloc(NULL,1);
-	if(root) {
-		root->op=OP_ROOT;
-		root->c=0;
-		root->next=NULL;
-		root->listend=1;
-	}
-	return root;
-}
-
-static struct tree_node* tree_node_char_binsearch(const struct tree_node* node,const char csearch,int* left)
-{
-	int right;
-	struct tree_node **children;
-	massert(node);
-	massert(left);
-
-	children = tree_node_get_children(node);
-	right = node->alternatives-1;
-	*left = 0;
-	if(!node->alternatives)
-		return NULL;
-	massert(children);
-	while(*left<=right) {
-		int mid  = *left+(right-*left)/2;
-		if(children[mid]->c == csearch)
-			return children[mid]; 
-		else if(children[mid]->c < csearch)
-			*left=mid+1;
-		else
-			right=mid-1;
-	}
-	return NULL;
-}
-
-static struct tree_node* tree_get_next(struct tree_node* node)
-{
-	struct tree_node** children;
-	massert(node);
-	children = tree_node_get_children(node);
-
-	if(!node->alternatives && children && children[0])
-		return children[0];
-	else if(node->alternatives<=1)
-		return node;
-	else
-		return children[0]->next;
-}
-
-static size_t tree_node_get_array_size(const struct tree_node* node)
-{
-	massert(node);
-	/* if op is CUSTOMCLASS, then first pointer is pointer to bitmap, so array size is +1 */
-	return (node->alternatives + (node->op==OP_CUSTOMCLASS ? 1 : 0)) * sizeof(node->u.children[0]);
-}
-
-static struct tree_node* tree_node_char_insert(struct tree_node* node,const char c,int left)
-{
-	struct tree_node* new, *alt = tree_get_next(node);
-	struct tree_node **children;
-	node->alternatives++;
-	node->u.children = cli_realloc2(node->u.children,tree_node_get_array_size(node));
-	if(!node->u.children)
-		return NULL;
-
-	children = node->op==OP_CUSTOMCLASS ? node->u.children+1 : node->u.children;
-
-	new = tree_node_alloc(alt , node == alt );
-	if(new) {
-		new->op=OP_CHAR;
-		new->c=c;
-	}
-
-	if(node->alternatives-left-1>0)
-			memmove(&children[left+1],&children[left],(node->alternatives-left-1)*sizeof(node->u.children[0]));
-	children[left] = new;	
-
-	return new;
-}
-
-static void tree_node_insert_nonbin(struct tree_node* node, struct tree_node* new)
-{
-	struct tree_node **children;
-	massert(node);
-	massert(new);
-
-	children = tree_node_get_children(node);
-	if(node->alternatives) {
-		massert(children);
-	       	if(children[0]->next == node) {
-			int i;
-			new->listend = 1;
-			for(i=0;i<node->alternatives;i++) {
-				children[i]->next = new;
-				children[i]->listend = 0;
-			}
-		}
-		else {
-			struct tree_node* p;
-			for(p = children[0]->next ; p->next != node ; p = p->next)
-				massert(!p->listend);
-			new->listend = 1;
-			p->listend = 0;
-			p->next = new;
-		}
-	}
-	else {
-		int idx = node->op==OP_CUSTOMCLASS ? 1 : 0;
-		if(node->u.children)
-			if(node->u.children[idx]) {
-				node = node->u.children[idx];
-				while(node->next && !node->listend)
-					node = node->next;
-				node->listend = 0;
-				new->next = node->next;
-				node->next = new;
-				new->listend=1;
-				return;
-			}
-		node->u.children = cli_realloc2(node->u.children,sizeof(node->u.children[0])*(2));
-		if(node->u.children) {
-			node->u.children[idx] = new;
-		}
-	}
-}
-
-static unsigned char char_getclass(const unsigned char* bitmap)
-{
-	size_t i;
-	massert(bitmap);
-
-	for(i=0;i<std_class_cnt;i++)
-		if(!memcmp(bitmap,char_class_bitmap[i],256>>3))
-			return i;
-	return std_class_cnt;
-}
-
-static void stack_destroy(struct node_stack* stack)
-{
-	massert(stack);
-	if(stack->data)
-		free(stack->data);
-	stack->data = NULL;
-	stack->capacity = 0;
-}
-
-/* call this after whitelist load is complete, and the tree is no longer going to be modified */
-void regex_list_cleanup(struct regex_matcher* matcher)
-{
-	massert(matcher);
-
-	stack_destroy(&matcher->node_stack);
-	stack_destroy(&matcher->node_stack_alt);
-	stack_init(&matcher->node_stack);
-	stack_init(&matcher->node_stack_alt);
 }
 
 int is_regex_ok(struct regex_matcher* matcher)
 {
-	massert(matcher);
+	assert(matcher);
 	return (!matcher->list_inited || matcher->list_inited!=-1);/* either we don't have a regexlist, or we initialized it successfully */
 }
 
-/* returns 0 on success, regexec error code otherwise */						
-static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,const char* info, int hostonly)
+static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, char *suffix, size_t len)
 {
-	int bol=1;
-	const unsigned char* pat_end = find_regex_start(pat);
-	struct token_t token;
-	struct tree_node* node;
-	
-	massert(matcher);
+	struct cli_matcher *root = &matcher->suffixes;
+	struct cli_ac_patt *new = cli_calloc(1,sizeof(*new));
+	size_t i;
+	int ret;
 
-	node = hostonly ? matcher->root_regex_hostonly : matcher->root_regex;
+	if(!new)
+		return CL_EMEM;
+	assert(root && suffix);
 
-	stack_reset(&matcher->node_stack);
-	stack_reset(&matcher->node_stack_alt);
-	stack_push(&matcher->node_stack,node);
+	new->rtype = 0;
+	new->type = 0;
+	new->sigid = 0;
+	new->parts = 0;
+	new->partno = 0;
+	new->mindist = 0;
+	new->maxdist = 0;
+	new->offset = 0;
+	new->target = 0;
+	new->length = len;
 
-	for(;node->op!=OP_LEAF;){
-		if(pat<pat_end)
-			pat  = getNextToken(pat,&token);
-		else if(*pat) {
-			token.type = TOKEN_REGEX;
-			token.u.start=pat;
-		}
-		else
-			token.type = TOKEN_DONE;
+	new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
+	if(new->length > root->maxpatlen)
+		root->maxpatlen = new->length;
 
-		switch(token.type) {
-			case TOKEN_CHAR: 
-				{
-					/* search for char in tree */
-					int left;
-					struct tree_node* newnode = tree_node_char_binsearch(node,token.u.c,&left);
-					if(newnode)
-						node = newnode;
-					else {
-						/* not found, insert it */
-						node = tree_node_char_insert(node,token.u.c,left);
-					}
-					break;
-				}
+	new->pattern = cli_malloc(sizeof(new->pattern[0])*len);
+	if(!new->pattern) {
+		free(new);
+		return CL_EMEM;
+	}
+	for(i=0;i<len;i++)
+		new->pattern[i] = suffix[i];/*new->pattern is short int* */
 
-			case TOKEN_PAR_OPEN:
-				stack_push(&matcher->node_stack_alt,NULL);/* marker */
-				stack_push(&matcher->node_stack,node);
-				break;
+	new->virname = (char*)info;
+	if((ret = cli_ac_addpatt(root,new))) {
+		free(new->pattern);
+		free(new);
+		return ret;
+	}
+	SO_preprocess_add(&matcher->filter, suffix, len);
+	return CL_SUCCESS;
+}
 
-			case TOKEN_PAR_CLOSE: {
-						      /*TODO: test this!!!*/
-						      struct tree_node* node_alt = node;
-						      node = tree_node_alloc(NULL,1);
-						      node->op=OP_PARCLOSE;
-						      node->c=0;
-						      node->listend=1;
-						      tree_node_insert_nonbin(node_alt,node);
-						      while (( node_alt = stack_pop(&matcher->node_stack_alt) )) {
-							      tree_node_insert_nonbin(node_alt,node);
-						      }
-				      		      stack_pop(&matcher->node_stack);					      
-		      				      break;
-					      }
+#define MODULE "regex_list: "
+/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
 
-			case TOKEN_ALT:
-				stack_push(&matcher->node_stack_alt,node);
-				node = stack_pop(&matcher->node_stack);
-				stack_push(&matcher->node_stack,node);
-				break;
+/* returns 0 on success, clamav error code otherwise */
+static int add_pattern_suffix(struct regex_matcher *matcher, char *suffix, size_t suffix_len, struct regex_list *regex)
+{
+	const struct element *el;
 
-			case TOKEN_BRACKET:
-				{
-					struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
-					unsigned char charclass = char_getclass(token.u.bitmap);
-					if(charclass == std_class_cnt) {/*not a std char class*/
-						new->op = OP_CUSTOMCLASS;
-						new->u.children = cli_malloc(sizeof(new->u.children[0])*2);
-						if(!new->u.children)
-							return CL_EMEM;
-						new->u.bitmap[0] = token.u.bitmap;
-						new->u.bitmap[1] = NULL;
-						tree_node_insert_nonbin(node,new);
-						node = new;
-					}
-					else {
-						new->op = OP_STDCLASS;
-						new->c = charclass;
-						tree_node_insert_nonbin(node,new);
-						node=new;
-					}
-					break;
-				}
-
-			case TOKEN_DOT:
-				{
-					struct tree_node* new = tree_node_alloc(tree_get_next(node),1);
-					new->op = OP_DOT;
-					tree_node_insert_nonbin(node,new);
-					node=new;
-					break;
-				}
-
-			case TOKEN_REGEX:
-			case TOKEN_DONE: {
-						 struct leaf_info* leaf=cli_malloc(sizeof(*leaf));
-						 if(!leaf)
-							 return CL_EMEM;
-						 leaf->info = cli_strdup(info);
-						 if(token.type==TOKEN_REGEX) {
-							 int rc;
-							 struct tree_node* new;
-							 regex_t* preg;
-							 preg=cli_malloc(sizeof(*preg));
-							 if(!preg)
-								 return CL_EMEM;
-							 rc = cli_regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
-							 leaf->preg=preg;
-							 if(rc)
-								 return rc;
-							 new=cli_malloc(sizeof(*new));
-							 if(!new)
-								 return CL_EMEM;
-							 new->op=OP_LEAF;
-							 new->next=node;
-							 new->alternatives=0;
-							 new->u.leaf=leaf;
-							 new->listend=1;
-							 tree_node_insert_nonbin(node,new);
-						 }
-						 else {
-							 leaf->preg=NULL;
-							 node->alternatives=0;
-							 node->u.leaf=leaf;
-							 node->op=OP_LEAF;
-						 }
-						 return 0;
-					 }
-		}
-
-		bol=0;
+	assert(matcher);
+	el = hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
+	/* TODO: what if suffixes are prefixes of eachother and only one will
+	 * match? */
+	if(el) {
+		/* existing suffix */
+		assert(el->data < matcher->suffix_cnt);
+		regex->nxt = matcher->suffix_regexes[el->data];
+		matcher->suffix_regexes[el->data] = regex;
+		cli_dbgmsg(MODULE "added new regex to existing suffix %s: %s\n", suffix, regex->pattern);
+	} else {
+		/* new suffix */
+		size_t n = matcher->suffix_cnt++;
+		el = hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
+		matcher->suffix_regexes = cli_realloc(matcher->suffix_regexes, (n+1)*sizeof(*matcher->suffix_regexes));
+		if(!matcher->suffix_regexes)
+			return CL_EMEM;
+		matcher->suffix_regexes[n] = regex;
+		add_newsuffix(matcher, regex, suffix, suffix_len);
+		cli_dbgmsg(MODULE "added new suffix %s, for regex: %s\n", suffix, regex->pattern);
 	}
 	return 0;
 }
 
-/* c has to be unsigned char here!! */
-static int match_node(struct tree_node* node,const unsigned char* c,size_t len,const char** info)
-{
-	struct tree_node** children;
-	int rc;
-
-	massert(node);
-	massert(c);
-	massert(info);
-
-	if(!node->u.children)
-		return MATCH_FAILED;/* tree empty */
-	*info = NULL;
-	len++;
-	c--;
-	for(;;) {
-		massert(node);
-		children = node->u.children;
-		switch(node->op) {
-			case OP_ROOT:
-				rc=1;
-				break;
-			case OP_PARCLOSE:
-				/*this isn't a real character, so don't move*/
-				c--;
-				len++;
-				rc=1;
-				break;
-			case OP_CHAR:
-				massert(*c==node->c && "We know this has to match");
-				rc = 1;/* *c==node->c;- we know it has matched */
-				break;
-			case OP_DOT:	
-				rc = *c!='\n';
-				break;
-			case OP_STDCLASS:
-				rc = char_class[*c]&(node->c);
-				break;
-			case OP_CUSTOMCLASS:
-			{
-				char_bitmap_p bitmap;
-				massert(children);
-				bitmap = (char_bitmap_p)node->u.bitmap[0];
-				children++;
-				rc = bitmap[*c>>3]&(1<<(*c&0x7));
-				break;
-			}
-			case OP_LEAF:
-			{
-				const struct leaf_info* leaf = node->u.leaf;
-				/*isleaf = 1;*/
-				if(leaf->preg) {
-					rc = !cli_regexec(leaf->preg,(const char*)c,0,NULL,0);
-				}
-				else  {
-					massert(*c==node->c && "We know this has to match[2]");
-					rc = 1;
-				}
-				if(rc) {
-					*info = leaf->info;
-					return MATCH_SUCCESS;
-				}
-				break;
-			}
-			default:
-				/* impossible */
-				cli_errmsg("Encountered invalid operator in tree:%d\n",node->op);
-				exit(1);
-		}
-		len--;
-		if(!len) rc=0;
-		c++;
-		if(rc) {
-			const char csearch = *c;
-			int left = 0,right = node->alternatives-1;
-			int mid;
-			/*matched so far, go deeper*/
-			/*do a binary search between children */
-			massert(children);
-			while(left<=right) {
-				mid  = left+(right-left)/2;
-				if (children[mid]->c == csearch)
-					break;
-				else if(children[mid]->c < csearch)
-					left=mid+1;
-				else
-					right=mid-1;
-			}
-			if(left<=right) {
-				node = children[mid];
-				massert(node);
-			}
-			else {
-				if(node->alternatives) {
-					if(!children[0]->listend) {
-						node = children[0];
-						c++;
-						len--;
-					}
-					while(node && node->listend) {
-						node = node->next;/* climb up */
-						c--;
-						len++;
-					}
-					if(!node || !node->next) 
-						return MATCH_FAILED;/* reached root node */
-					node=node->next;
-					c--;
-					len++;
-				}
-				else if(node->u.children) {
-					struct tree_node* rewrite_next = NULL;
-					if(node->op==OP_PARCLOSE) 
-						rewrite_next = node;
-					node = children[0];
-					massert(node);
-					massert(node->op!=OP_CHAR);
-					if(rewrite_next)
-						node->next = rewrite_next;/* this node is pointed to by several parent nodes, 
-									     we need to know 
-									     from which one we came, so we can find out way back
-									     should we fail to match somewhere deeper*/
-				}
-			}
-		}
-		else {
-			/* this node didn't match, try sibling, or parent (if no more siblings) */
-			while(node && node->listend) {
-				node = node->next;/* sibling of parent */
-				c--;
-				len++;
-			}
-			if(!node || !node->next) /* reached root node, it has no next */
-				return MATCH_FAILED;
-			else {
-				c--;
-				len++;
-				node=node->next;
-			}
-		}
-	}
-	return MATCH_FAILED;
-}
-
-/* push node on stack, only if it isn't there already */
-static void stack_push_once(struct node_stack* stack,struct tree_node* node)
+static size_t reverse_string(char *pattern)
 {
+	size_t len = strlen(pattern);
 	size_t i;
-	massert(stack);
-	massert(node);
-
-	for(i=0;i < stack->cnt;i++)
-		if(stack->data[i]==node)
-			return;
-	stack_push(stack,node);
+	for(i=0; i < (len/2); i++) {
+		char aux = pattern[i];
+		pattern[i] = pattern[len-i-1];
+		pattern[len-i-1] = aux;
+	}
+	return len;
 }
 
-static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node* node)
+static int add_static_pattern(struct regex_matcher *matcher, char* pattern)
 {
-	struct tree_node **children;
-	massert(matcher);
-	massert(node);
-
-	children = tree_node_get_children(node);
-	if(node->op==OP_LEAF) {
-		struct leaf_info* leaf = node->u.leaf;
-		if(node->next && !node->listend)
-			destroy_tree_internal(matcher,node->next);
-		stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */
-		stack_push_once(&matcher->node_stack,node);
-		if(leaf->preg) {
-			cli_regfree(leaf->preg);
-			free(leaf->preg);
-			leaf->preg=NULL;
-		}
-		if(leaf->info) {
-			free(leaf->info);
-			leaf->info=NULL;
-		}
-	/*	return;*/
-	}
-	if(node->alternatives) {
-		int i;
-		struct tree_node* p;
-		massert(children);
-		p = children[0]->op==OP_LEAF ? NULL : children[0]->next;
-		for(i=0;i<node->alternatives;i++)
-			destroy_tree_internal(matcher,children[i]);
-		if(p && p!=node)
-			destroy_tree_internal(matcher,p);/*?? is this ok, or without _internal?*/
-	}
-	else {
-		if(children) {
-			if(children[0])
-				destroy_tree_internal(matcher,children[0]);		
-		}
-	}
-	if(node->op!=OP_LEAF && node->next && !node->listend)
-		destroy_tree_internal(matcher,node->next);
-	if(node->u.children)
-		stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.children);/* cast to make compiler happy, it isn't really a tree_node* */
-	if(node->op==OP_CUSTOMCLASS && node->u.children[0]) {
-		free(node->u.children[0]);
-		node->u.children[0]=NULL;
-	}
-	stack_push_once(&matcher->node_stack,node);
+	size_t len;
+	struct regex_list *regex = cli_malloc(sizeof(*regex));
+	if(!regex)
+		return CL_EMEM;
+	len = reverse_string(pattern);
+	regex->nxt = NULL;
+	regex->pattern = cli_strdup(pattern);
+	regex->preg.re_magic = 0;
+	return add_pattern_suffix(matcher, pattern, len, regex);
 }
 
-static void destroy_tree(struct regex_matcher* matcher)
+static int add_pattern(struct regex_matcher *matcher, char *pattern)
 {
-	/* we might have the same node linked by different nodes, so a recursive walk&free doesn't work in all situations,
-	 * i.e. it might double-free, so instead of freeing, just push the nodes on a stack, and later free the nodes in that stack,
-	 * (and push to stack only if it doesn't contain it already*/
-	massert(matcher);
+	struct text_buffer buf;
+	struct node *n;
+	size_t last=0;
+	int rc;
+	struct regex_list *regex = cli_malloc(sizeof(*regex));
+	struct node root_node;
+	size_t len;
+	/* we only match the host, so remove useless stuff */
+	const char remove_end[] = "([/?].*)?/";
+	const char remove_end2[] = "([/?].*)/";
 
-	stack_reset(&matcher->node_stack);
-	destroy_tree_internal(matcher,matcher->root_regex);
-	destroy_tree_internal(matcher,matcher->root_regex_hostonly);
-	while (matcher->node_stack.cnt) {
-		struct tree_node* node = stack_pop(&matcher->node_stack);
-		if(node)
-			free(node);
-	}
-}
-#ifndef NDEBUG
-static void dump_node(struct tree_node* node)
-{
-	int i;
-	struct tree_node* p,**children;
-	massert(node);
-	if(node->op==OP_LEAF) {
-		if(node->u.leaf->preg)
-			printf("n%p [label=\"regex\\nleaf\"]",(void*)node);
-		else
-			printf("n%p [label=\"%c\\nleaf\"];\n",(void*)node,node->c);
-		if(node->next && !node->listend) {
-			printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
-			dump_node(node->next);
-		}
-		return;
-	}
-	printf("n%p [label=\"%c\\n%d\\nlistend:%d\"];\n",(void*)node,(node->op==OP_ROOT||node->op==OP_PARCLOSE) ?'@' :node->c,node->op,node->listend);
-	if(node->next)
-		printf("n%p -> n%p;\n",(void*)node,(void*)node->next);
-	printf("n%p -> {",(void*)node);/*using address of node as id*/
-	children = tree_node_get_children(node);
-	if(node->alternatives)
-		massert(children);
-	for(i=0;i<node->alternatives;i++)
-		printf("n%p ",(void*)children[i]);
-	if(node->alternatives && children[0]->op!=OP_LEAF)
-		for(p=children[0]->next;p!=node;p=p->next)
-		{
-			massert(p);
-			printf("n%p ",(void*)p);
-			if(p->op==OP_LEAF || p->listend)
-				break;
-		}
-	if(!node->alternatives && children && children[0])
-		printf("n%p ",(void*)children[0]);
-	printf("};\n");
-	printf("{rank=same;");
-	for(i=0;i<node->alternatives;i++)
-		printf("n%p ",(void*)node->u.children[i]);
-	if(node->alternatives && children[0]->op!=OP_LEAF)
-		for(p=children[0]->next;p!=node;p=p->next) 
-		{
-			printf("n%p ",(void*)p);	
-			if(p->op==OP_LEAF || p->listend)
-				break;
-		}
-	if(!node->alternatives && children && children[0])
-		printf("n%p ",(void*)children[0]);
-	printf("};\n");
-	for(i=0;i<node->alternatives;i++)
-		dump_node(children[i]);
-	if(node->alternatives && children[0]->op!=OP_LEAF)
-		for(p=children[0]->next;p!=node;p=p->next)
-		{
-			dump_node(p);
-			if(p->op==OP_LEAF || p->listend)
-				break;
-		}
-	if(!node->alternatives && children && children[0])
-		dump_node(children[0]);
-}
 
-void dump_tree(struct tree_node* root)
-{
-	/*use dot/dotty from graphviz to view it*/
-	massert(root);
-	printf("digraph tree {\n");
-	dump_node(root);
-	printf("}\n");
+	if(!regex)
+		return CL_EMEM;
+
+	len = strlen(pattern);
+	if(len > sizeof(remove_end)) {
+		if(strncmp(&pattern[len - sizeof(remove_end)+1], remove_end, sizeof(remove_end)-1) == 0) {
+			len -= sizeof(remove_end) - 1;
+		}
+		if(strncmp(&pattern[len - sizeof(remove_end2)+1], remove_end2, sizeof(remove_end2)-1) == 0) {
+			len -= sizeof(remove_end2) - 1;
+		}
+	}
+	pattern[len] = '\0';
+
+
+	rc = cli_regcomp(&regex->preg, pattern, REG_EXTENDED);
+	if(rc) {
+		size_t buflen = cli_regerror(rc, &regex->preg, NULL, 0);
+		char *errbuf = cli_malloc(buflen);
+		if(errbuf) {
+			cli_regerror(rc, &regex->preg, errbuf, buflen);
+			cli_errmsg(MODULE "Error compiling regular expression %s: %s\n", pattern, errbuf);
+			free(errbuf);
+		} else {
+			cli_errmsg(MODULE "Error compiling regular expression: %s\n", pattern);
+		}
+		return rc;
+		cli_regfree(&regex->preg);
+		free(regex);
+		return CL_EMALFDB;
+	}
+	regex->pattern = cli_strdup(pattern);
+	regex->nxt = NULL;
+
+	n = parse_regex(pattern, &last);
+	memset(&buf, 0, sizeof(buf));
+	memset(&root_node, 0, sizeof(buf));
+	n->parent = &root_node;
+
+	rc = build_suffixtree_descend(matcher, regex, n, &buf);
+	destroy_tree(n);
+	return rc;
 }
-#endif
diff --git a/libclamav/regex_list.h b/libclamav/regex_list.h
index 90a2ef132..4103bf446 100644
--- a/libclamav/regex_list.h
+++ b/libclamav/regex_list.h
@@ -24,39 +24,37 @@
 #ifndef _REGEX_LIST_H
 #define _REGEX_LIST_H
 
-#ifdef NDEBUG
-#define massert(x) (void)(0)
-#else
-/*debug version, massert enabled*/
-
-#define __massert_fail(expr,file,line) (void)cli_errmsg("Assertion failed at %s:%d\n %s\n",file,line,expr)
-
-#define massert(expr) ((void) ((expr) ? (void)0 : (__massert_fail (#expr,__FILE__,__LINE__))))
-#endif
-
 #include "phishcheck.h"
 #include "readdb.h"
 #include "matcher.h"
 #include <zlib.h> /* for gzFile */
-struct node_stack {
-	struct tree_node** data;
-	size_t capacity;
-	size_t cnt;
+
+struct regex_list {
+	const char *pattern;
+	regex_t preg;
+	struct regex_list *nxt;
+};
+
+struct filter {
+	uint32_t B[65536];
+	uint32_t end_fast[256];
+	uint32_t end[65536];
+	unsigned long m;
 };
 
 struct regex_matcher {
-	struct cli_matcher* root_hosts;
-	struct tree_node* root_regex;
-	struct tree_node* root_regex_hostonly; 
-	struct node_stack node_stack;
-	struct node_stack node_stack_alt;
-	size_t root_hosts_cnt;
-	int list_inited;
-	int list_loaded;
-	int list_built;
+	struct hashtable suffix_hash;
+	size_t suffix_cnt;
+	struct regex_list **suffix_regexes;
+	struct cli_matcher suffixes;
+	struct filter filter;
+	int list_inited:2;
+	int list_loaded:2;
+	int list_built:2;
 };
 
-int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char** info,int is_whitelist);
+int cli_build_regex_list(struct regex_matcher* matcher);
+int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char **info, int is_whitelist);
 int init_regex_list(struct regex_matcher* matcher);
 int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,struct cli_dbio *dbio);
 void regex_list_cleanup(struct regex_matcher* matcher);