performance improvements for URL matching (bb #725, bb #650):

* use a suffix AC-trie and a shift-or FSM to filter * rewrite the URL regex in C * use a perfect hash to lookup TLD and ccTLD, instead of a regex * TODO: suffixes having a common prefix: loop over all of them cli_ac_free: multiple virname pointing to same location git-svn: trunk@3978
2026-02-02 11:01:38 -05:00 · 2008-07-23 13:51:57 +00:00
parent f8a82180da
commit 2e11bcdfd9
16 changed files with 2222 additions and 1436 deletions
--- a/9
+++ b/9
@@ -1,3 +1,12 @@
+Wed Jul 23 16:32:32 EEST 2008 (edwin)
+------------------------------------
+  * libclamav: performance improvements for URL matching (bb #725, bb #650):
+	* use a suffix AC-trie and a shift-or FSM to filter
+	* rewrite the URL regex in C
+	* use a perfect hash to lookup TLD and ccTLD, instead of a regex
+	* TODO: suffixes having a common prefix: loop over all of them
+		cli_ac_free: multiple virname pointing to same location
+
 Mon Jul 21 12:16:44 CEST 2008 (tk)
 ----------------------------------
  * sigtool/vba.c: fix crash on error in vba code (bb#1106)
--- a/contrib/entitynorm/Makefile
+++ b/contrib/entitynorm/Makefile
@@ -1,7 +1,7 @@
 PERL=perl
 CC=cc

-all: entitylist.h encoding_aliases.h gentbl encname_chars.h
+all: entitylist.h encoding_aliases.h gentbl encname_chars.h generate_hash

 entities_parsed: entities entities/* entity_decl_parse.pl
 	$(PERL) entity_decl_parse.pl $</* | sort -u >$@
@@ -9,6 +9,9 @@ entities_parsed: entities entities/* entity_decl_parse.pl
 generate_entitylist: generate_entitylist.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
 	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@

+generate_hash: generate_hash.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
+	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
+
 generate_encoding_aliases: generate_encoding_aliases.c ../../libclamav/hashtab.c ../../libclamav/others.c ../../libclamav/htmlnorm.h ../../libclamav/entconv.h ../../libclamav/cltypes.h ../../libclamav/hashtab.h ../../libclamav/hashtab.h
 	$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@

--- a/contrib/phishing/update_iana_data.sh
+++ b/contrib/phishing/update_iana_data.sh
@@ -26,30 +26,11 @@ OUTFILE=iana_tld.h
 echo "Downloading updated tld list from iana.org"
 wget $IANA_TLD -O $TMP || exit 2
 echo "Download complete, parsing data"
-# 174 is the code for |
-TLDLIST=$(egrep -v ^# $TMP | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating tld list in $OUTFILE"
-cat >$OUTFILE <<EOF
-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-EOF
-echo -n "#define iana_tld \"(" >>$OUTFILE
-echo -n $TLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
+grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_tld.h

 echo "Downloading updated country-code list from iana.org"
 wget $IANA_CCTLD -O $TMP || exit 2
 echo "Download complete, parsing data"
-CCTLDLIST=$(cat $TMP | egrep -oi "<a href=[^>]+>\\.([a-zA-Z]+).+</a>" | egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating cctld list in $OUTFILE"
-echo -n "#define iana_cctld \"(" >>$OUTFILE
-echo -n $CCTLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
-
-
-echo "#endif" >>$OUTFILE
-echo "Finished succesfully"
+cat $TMP | grep country-code|egrep -oi "<a
+href=[^>]+>\\.([a-zA-Z]+).+</a>"|egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr [A-Z] [a-z]| gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set |grep -v '^#line'|sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_cctld.h
+echo "Done"
--- a/contrib/phishing/update_iana_tld.sh
+++ b/contrib/phishing/update_iana_tld.sh
@@ -26,17 +26,4 @@ echo "Downloading updated tld list from iana.org"
 wget $IANA_TLD -O $TMP || exit 2
 echo "Download complete, parsing data"
 # 174 is the code for |
-TLDLIST=$(egrep -v ^# $TMP|tr \\n \\174 )
-echo "Parse complete, removing tmpfile"
-rm $TMP
-echo "Generating $OUTFILE"
-cat >$OUTFILE <<EOF
-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-EOF
-echo -n "#define iana_tld \"(" >>$OUTFILE
-echo -n $TLDLIST >>$OUTFILE
-echo ")\"" >>$OUTFILE
-echo "#endif" >>$OUTFILE
-echo "Finished succesfully"
-
+grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -H tld_hash -N in_tld_set -l|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g'
--- a/docs/clamdoc.tex
+++ b/docs/clamdoc.tex
@@ -361,7 +361,7 @@ All 4 tests passed
 	 \item The exact output from \verb+make check+	 
 	 \item Output of \verb+uname -mrsp+ 
 	 \item your \verb+config.log+	 
-	 \item The following files from the \verb+unit-tests/+ directory:
+	 \item The following files from the \verb+unit_tests/+ directory:
 		\begin{itemize}
 			\item \verb+test.log+
 	 		\item \verb+clamscan.log+
--- a/libclamav/hashtab.c
+++ b/libclamav/hashtab.c
@@ -367,10 +367,18 @@ void hashtab_clear(struct hashtable *s)
 		if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
 			free((void *)s->htable[i].key);
 	}
-	memset(s->htable, 0, s->capacity);
+	if(s->htable)
+		memset(s->htable, 0, s->capacity);
 	s->used = 0;
 }

+void hashtab_free(struct hashtable *s)
+{
+	hashtab_clear(s);
+	free(s->htable);
+	s->htable = NULL;
+	s->capacity = 0;
+}

 int hashtab_store(const struct hashtable *s,FILE* out)
 {
--- a/libclamav/hashtab.h
+++ b/libclamav/hashtab.h
@@ -82,7 +82,7 @@ int hashtab_init(struct hashtable *s,size_t capacity);
 const struct element* hashtab_insert(struct hashtable *s, const char* key, const size_t len, const element_data data);
 void hashtab_delete(struct hashtable *s,const char* key,const size_t len);
 void hashtab_clear(struct hashtable *s);
-
+void hashtab_free(struct hashtable *s);
 int hashtab_load(FILE* in, struct hashtable *s);
 int hashtab_store(const struct hashtable *s,FILE* out);

--- a/libclamav/iana_cctld.h
+++ b/libclamav/iana_cctld.h
@@ -0,0 +1,505 @@
+/* ANSI-C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set  */
+/* Computed positions: -k'1-2' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+/* maximum key range = 472, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+cctld_hash (const char *str, unsigned int len)
+{
+  static const unsigned short asso_values[] =
+    {
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 119,  97,  33,
+      103,   4,  59, 115, 210, 149, 169, 143, 175,  55,
+      145,  89, 178,  37,  85,  18,  34, 239,   2,  73,
+      112,   3,  25,  10,  15, 117, 209, 229, 150, 223,
+      200,  78, 225,  54,   5, 215, 215, 190,  25,  23,
+        0,  20, 233, 234,  14, 476,  33, 204, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
+      476
+    };
+  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]+25];
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const char *
+in_cctld_set (const char *str, unsigned int len)
+{
+  enum
+    {
+      TOTAL_KEYWORDS = 252,
+      MIN_WORD_LENGTH = 2,
+      MAX_WORD_LENGTH = 2,
+      MIN_HASH_VALUE = 4,
+      MAX_HASH_VALUE = 475
+    };
+
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,
+       2,  2,  2,  0,  0,  2,  2,  0,  0,  0,  2,  2,  0,  2,
+       0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  2,  2,  2,
+       2,  2,  2,  2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       2,  2,  2,  2,  2,  2,  2,  0,  2,  0,  2,  2,  0,  2,
+       0,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,
+       2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,
+       2,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,  2,  2,  2,
+       2,  0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  2,  0,  2,
+       2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  0,  2,
+       0,  2,  2,  0,  2,  2,  2,  2,  0,  0,  2,  2,  2,  2,
+       0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,
+       0,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  0,  2,
+       2,  0,  0,  2,  2,  2,  0,  2,  0,  2,  2,  0,  0,  2,
+       2,  2,  0,  2,  2,  0,  2,  0,  0,  2,  2,  2,  2,  0,
+       2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,  2,  2,  0,
+       0,  2,  2,  2,  0,  2,  2,  2,  2,  0,  0,  0,  2,  2,
+       2,  2,  2,  2,  2,  0,  2,  2,  2,  2,  0,  2,  2,  2,
+       2,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  0,  2,  2,
+       2,  2,  0,  2,  0,  2,  2,  0,  2,  0,  2,  2,  0,  2,
+       2,  0,  2,  0,  0,  0,  2,  2,  2,  0,  2,  2,  0,  0,
+       0,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,  2,  2,  2,
+       0,  0,  2,  2,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,
+       0,  0,  0,  2,  2,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       2,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  2,  0,  0,  2,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  2,  2,  2,  0,  2,  0,  2,  0,  2,  0,  2,  2,
+       2,  0,  2,  2,  0,  0,  0,  2,  0,  0,  0,  0,  0,  2,
+       2,  0,  0,  2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2
+    };
+  static const char * const wordlist[] =
+    {
+      "", "", "", "",
+      "sv",
+      "sy",
+      "se",
+      "", "",
+      "mv",
+      "my",
+      "me",
+      "", "",
+      "bv",
+      "by",
+      "be",
+      "", "",
+      "cv",
+      "cy",
+      "", "", "",
+      "tv",
+      "ms",
+      "",
+      "sz",
+      "",
+      "re",
+      "bs",
+      "ae",
+      "mz",
+      "",
+      "ws",
+      "sc",
+      "st",
+      "bz",
+      "",
+      "ye",
+      "mc",
+      "mt",
+      "cz",
+      "rs",
+      "mq",
+      "as",
+      "bt",
+      "tz",
+      "", "",
+      "cc",
+      "",
+      "az",
+      "", "",
+      "tc",
+      "tt",
+      "sm",
+      "lv",
+      "ly",
+      "ac",
+      "at",
+      "mm",
+      "",
+      "aq",
+      "",
+      "mf",
+      "bm",
+      "",
+      "yt",
+      "",
+      "bf",
+      "cm",
+      "",
+      "ls",
+      "wf",
+      "cf",
+      "tm",
+      "", "",
+      "mw",
+      "tf",
+      "am",
+      "",
+      "je",
+      "bw",
+      "af",
+      "sr",
+      "",
+      "lc",
+      "lt",
+      "so",
+      "mr",
+      "", "",
+      "tw",
+      "mo",
+      "br",
+      "rw",
+      "sb",
+      "aw",
+      "bo",
+      "cr",
+      "", "",
+      "sd",
+      "co",
+      "tr",
+      "",
+      "bb",
+      "md",
+      "to",
+      "ar",
+      "",
+      "ro",
+      "bd",
+      "ao",
+      "sg",
+      "",
+      "mx",
+      "cd",
+      "sa",
+      "mg",
+      "de",
+      "",
+      "td",
+      "ma",
+      "bg",
+      "",
+      "cx",
+      "ad",
+      "ba",
+      "cg",
+      "", "",
+      "jm",
+      "ca",
+      "tg",
+      "",
+      "ax",
+      "",
+      "lr",
+      "ag",
+      "",
+      "dz",
+      "sk",
+      "qa",
+      "sn",
+      "", "",
+      "mk",
+      "si",
+      "mn",
+      "lb",
+      "",
+      "gy",
+      "ge",
+      "bn",
+      "", "",
+      "ck",
+      "bi",
+      "cn",
+      "", "",
+      "tk",
+      "ci",
+      "tn",
+      "",
+      "jo",
+      "gs",
+      "sj",
+      "an",
+      "",
+      "dm",
+      "la",
+      "ai",
+      "sl",
+      "", "", "",
+      "bj",
+      "ml",
+      "", "",
+      "mp",
+      "gt",
+      "bl",
+      "",
+      "gq",
+      "",
+      "tj",
+      "cl",
+      "", "",
+      "py",
+      "pe",
+      "tl",
+      "",
+      "lk",
+      "tp",
+      "",
+      "al",
+      "", "",
+      "li",
+      "ie",
+      "gm",
+      "do",
+      "",
+      "ps",
+      "gf",
+      "sh",
+      "", "",
+      "ee",
+      "",
+      "mh",
+      "", "",
+      "is",
+      "ne",
+      "bh",
+      "", "",
+      "gw",
+      "pt",
+      "ch",
+      "",
+      "es",
+      "ky",
+      "ke",
+      "th",
+      "", "", "",
+      "it",
+      "gr",
+      "uy",
+      "iq",
+      "ve",
+      "su",
+      "nz",
+      "",
+      "ec",
+      "et",
+      "mu",
+      "pm",
+      "",
+      "gb",
+      "nc",
+      "pf",
+      "kz",
+      "us",
+      "",
+      "gd",
+      "cu",
+      "im",
+      "jp",
+      "ht",
+      "uz",
+      "zm",
+      "dk",
+      "",
+      "ru",
+      "pw",
+      "au",
+      "gg",
+      "",
+      "vc",
+      "",
+      "ga",
+      "om",
+      "",
+      "yu",
+      "",
+      "nf",
+      "pr",
+      "",
+      "zw",
+      "hm",
+      "",
+      "km",
+      "", "", "",
+      "fm",
+      "ir",
+      "dj",
+      "",
+      "um",
+      "io",
+      "", "", "",
+      "lu",
+      "er",
+      "gn",
+      "", "",
+      "kw",
+      "gi",
+      "nr",
+      "", "",
+      "id",
+      "no",
+      "pg",
+      "", "",
+      "hr",
+      "pa",
+      "kr",
+      "", "", "",
+      "fr",
+      "", "", "",
+      "fo",
+      "", "", "", "",
+      "za",
+      "eg",
+      "gl",
+      "", "",
+      "gp",
+      "",
+      "ng",
+      "", "",
+      "pk",
+      "na",
+      "pn",
+      "", "", "", "",
+      "kg",
+      "", "", "", "",
+      "in",
+      "", "",
+      "ug",
+      "vg",
+      "", "",
+      "ua",
+      "va",
+      "", "", "", "", "", "",
+      "gh",
+      "", "", "",
+      "ni",
+      "pl",
+      "hk",
+      "",
+      "hn",
+      "",
+      "kn",
+      "",
+      "fk",
+      "",
+      "ki",
+      "il",
+      "uk",
+      "",
+      "fi",
+      "vn",
+      "", "", "",
+      "vi",
+      "", "", "", "", "",
+      "gu",
+      "nl",
+      "", "",
+      "np",
+      "", "", "", "",
+      "fj",
+      "",
+      "ph",
+      "", "",
+      "kp",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "eh",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "kh",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "",
+      "eu",
+      "", "", "", "", "",
+      "nu",
+      "", "", "", "", "", "", "",
+      "hu",
+      "", "", "", "", "", "", "", "", "",
+      "",
+      "vu"
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      int key = cctld_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        if (len == lengthtable[key])
+          {
+            const char *s = wordlist[key];
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return s;
+          }
+    }
+  return 0;
+}
--- a/libclamav/iana_tld.h
+++ b/libclamav/iana_tld.h
@@ -1,28 +1,746 @@
-/*
- *  Phishing module: iana tld list.
- *
- *  Copyright (C) 2007-2008 Sourcefire, Inc.
- *
- *  Authors: Török Edvin
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- *  MA 02110-1301, USA.
- */
+/* ANSI-C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set  */
+/* Computed positions: -k'1-2,6' */

-#ifndef IANA_TLD_H
-#define IANA_TLD_H
-#define iana_tld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|INT|MIL|NET|ORG|PRO|TEL|AERO|ARPA|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM|TRAVEL|XN--ZCKZAH|XN--0ZWM56D|XN--DEBA0AD|XN--G6W251D|XN--JXALPDLP|XN--KGBECHTV|XN--9T4B11YI5A|XN--80AKHBYKNJ4F|XN--11B5BS3A9AJ6G|XN--HGBK6AJ7F53BBA)"
-#define iana_cctld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJLMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGHRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEFGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|IN[TT]|MIL|NET|ORG|PRO|TEL|AERO|ARP[AA]|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM)"
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
 #endif

+/* maximum key range = 983, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+tld_hash (const char *str, unsigned int len)
+{
+  static const unsigned short asso_values[] =
+    {
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988,   0,  15,
+      988, 988, 988, 988,   0, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 170, 328,  88,
+        3,  50, 293, 205, 123, 430, 500, 238, 115, 320,
+      375,  30, 413, 348,  70,  43, 475,  18,   6, 283,
+       95,  58,  10, 220,   5, 485, 480,   8, 190, 390,
+      225, 113, 420,  95,   0,  15,  50, 295,  20, 128,
+      130,  80, 405, 470, 340,   0, 305, 415, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
+      988
+    };
+  int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[5]];
+      /*FALLTHROUGH*/
+      case 5:
+      case 4:
+      case 3:
+      case 2:
+        hval += asso_values[(unsigned char)str[1]];
+      /*FALLTHROUGH*/
+      case 1:
+        hval += asso_values[(unsigned char)str[0]+25];
+        break;
+    }
+  return hval;
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const char *
+in_tld_set (const char *str, unsigned int len)
+{
+  enum
+    {
+      TOTAL_KEYWORDS = 280,
+      MIN_WORD_LENGTH = 2,
+      MAX_WORD_LENGTH = 18,
+      MIN_HASH_VALUE = 5,
+      MAX_HASH_VALUE = 987
+    };
+
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,  2,  2,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  3,  4,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,
+       0,  4,  0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  3,  0,
+       0,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,
+       4,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  3,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  3,  0,  2,  0,  0,  2,  0,
+       2,  0,  2,  0,  0,  2,  0,  4,  2,  0,  2,  0,  2,  0,
+       0,  2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  2,  0,  0,  0,  2,  3,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  4,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  3,
+       0,  2,  0,  0,  2,  0,  2,  0,  2,  0,  0,  2,  0,  0,
+       0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  2,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  6,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  0,  6,  2,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2,  3,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  0,  0,  2,  0,  0,  2, 11,  2,  0,  0,  0, 16,
+       2,  0,  0,  0, 11,  2,  0,  0,  0,  0,  2,  0,  0,  0,
+       0, 17,  0,  0,  2,  0,  2,  2,  0,  2,  0,  2,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  3,
+       0,  2, 11,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,
+       0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2, 10,  0,  2,
+       0,  2,  0,  0,  2,  0, 12,  0,  0,  2,  3,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2, 18,  0,  2,  0,  2,  0,  0,  2,  0,  2,
+       0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  2,  0,  0,  0,
+       2,  0,  0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  2,
+       0,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  2,  0,  2,  0,
+       0,  2,  0,  2,  0,  0,  2,  0,  2,  0,  0,  0,  0,  2,
+       0,  0,  2,  0, 12,  0,  0,  0,  0,  2, 18,  0,  0,  0,
+       2,  3,  4,  2,  0,  2,  0,  0,  0,  0,  2,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  2,  3,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,
+       2,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  2,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,
+       0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  2,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 14,  0,  0,  2,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  2
+    };
+  static const char * const wordlist[] =
+    {
+      "", "", "", "", "",
+      "md",
+      "", "",
+      "mv",
+      "",
+      "cd",
+      "",
+      "mz",
+      "cv",
+      "",
+      "ad",
+      "",
+      "cz",
+      "", "",
+      "mu",
+      "",
+      "az",
+      "", "",
+      "cu",
+      "",
+      "nz",
+      "", "",
+      "au",
+      "",
+      "mo",
+      "",
+      "mobi",
+      "nu",
+      "",
+      "co",
+      "com",
+      "coop",
+      "fo",
+      "",
+      "ao",
+      "", "",
+      "ms",
+      "",
+      "no",
+      "", "", "", "",
+      "me",
+      "", "",
+      "as",
+      "",
+      "asia",
+      "", "",
+      "my",
+      "",
+      "ae",
+      "",
+      "aero",
+      "cy",
+      "",
+      "ne",
+      "net",
+      "", "", "",
+      "mr",
+      "", "", "", "",
+      "cr",
+      "", "",
+      "fr",
+      "",
+      "ar",
+      "",
+      "arpa",
+      "td",
+      "",
+      "nr",
+      "tv",
+      "",
+      "mc",
+      "",
+      "tz",
+      "", "",
+      "cc",
+      "",
+      "mx",
+      "", "",
+      "ac",
+      "",
+      "cx",
+      "lv",
+      "",
+      "nc",
+      "",
+      "ax",
+      "", "", "", "",
+      "to",
+      "", "",
+      "lu",
+      "",
+      "ml",
+      "", "", "", "",
+      "cl",
+      "org",
+      "",
+      "mh",
+      "",
+      "al",
+      "", "",
+      "ch",
+      "",
+      "nl",
+      "tel",
+      "",
+      "sd",
+      "", "",
+      "sv",
+      "",
+      "ls",
+      "",
+      "sz",
+      "", "",
+      "jo",
+      "",
+      "jobs",
+      "ru",
+      "",
+      "su",
+      "",
+      "tr",
+      "", "",
+      "ly",
+      "", "", "", "",
+      "ro",
+      "",
+      "so",
+      "", "",
+      "je",
+      "",
+      "lr",
+      "", "",
+      "tc",
+      "",
+      "ma",
+      "rs",
+      "", "", "",
+      "ca",
+      "cat",
+      "",
+      "re",
+      "",
+      "se",
+      "", "",
+      "lc",
+      "",
+      "na",
+      "",
+      "name",
+      "sy",
+      "",
+      "qa",
+      "", "",
+      "gd",
+      "",
+      "tl",
+      "", "", "", "",
+      "sr",
+      "", "",
+      "th",
+      "",
+      "mg",
+      "", "",
+      "gu",
+      "",
+      "cg",
+      "", "", "", "",
+      "ag",
+      "", "",
+      "sc",
+      "",
+      "ng",
+      "gov",
+      "",
+      "bd",
+      "", "",
+      "bv",
+      "",
+      "id",
+      "",
+      "bz",
+      "", "",
+      "gs",
+      "", "", "", "",
+      "mk",
+      "",
+      "ge",
+      "", "",
+      "ck",
+      "",
+      "sl",
+      "fk",
+      "",
+      "gy",
+      "",
+      "bo",
+      "", "",
+      "sh",
+      "",
+      "io",
+      "", "", "", "",
+      "gr",
+      "", "",
+      "bs",
+      "",
+      "la",
+      "", "",
+      "is",
+      "travel",
+      "be",
+      "", "", "", "",
+      "ie",
+      "", "",
+      "by",
+      "", "", "", "",
+      "mw",
+      "",
+      "tg",
+      "", "", "", "",
+      "br",
+      "", "",
+      "aw",
+      "",
+      "ir",
+      "", "",
+      "cf",
+      "",
+      "sa",
+      "", "",
+      "af",
+      "",
+      "gl",
+      "", "",
+      "nf",
+      "", "", "", "",
+      "gh",
+      "", "", "", "",
+      "tk",
+      "",
+      "mm",
+      "", "",
+      "yu",
+      "",
+      "cm",
+      "", "",
+      "fm",
+      "",
+      "am",
+      "", "",
+      "lk",
+      "",
+      "sg",
+      "", "",
+      "ps",
+      "",
+      "il",
+      "",
+      "museum",
+      "bh",
+      "",
+      "pe",
+      "", "",
+      "mq",
+      "", "", "", "",
+      "py",
+      "",
+      "ye",
+      "", "",
+      "aq",
+      "",
+      "ga",
+      "", "",
+      "tw",
+      "",
+      "pr",
+      "pro",
+      "",
+      "sk",
+      "",
+      "om",
+      "", "",
+      "tf",
+      "",
+      "mn",
+      "", "", "", "",
+      "cn",
+      "", "",
+      "ws",
+      "xn--g6w251d",
+      "an",
+      "", "", "",
+      "xn--80akhbyknj4f",
+      "ba",
+      "", "", "",
+      "xn--0zwm56d",
+      "gg",
+      "", "", "", "",
+      "tm",
+      "", "", "", "",
+      "xn--11b5bs3a9aj6g",
+      "", "",
+      "hu",
+      "",
+      "pl",
+      "rw",
+      "",
+      "mp",
+      "",
+      "uz",
+      "", "",
+      "ph",
+      "", "", "", "",
+      "lb",
+      "",
+      "bg",
+      "", "",
+      "np",
+      "",
+      "kz",
+      "mil",
+      "",
+      "jm",
+      "xn--deba0ad",
+      "ci",
+      "", "",
+      "fi",
+      "",
+      "ai",
+      "", "", "", "",
+      "ni",
+      "", "",
+      "us",
+      "",
+      "sm",
+      "", "", "", "",
+      "tn",
+      "", "",
+      "sb",
+      "",
+      "hr",
+      "", "",
+      "uy",
+      "",
+      "pa",
+      "", "", "", "",
+      "ke",
+      "xn--zckzah",
+      "",
+      "gw",
+      "",
+      "mt",
+      "", "",
+      "ky",
+      "",
+      "xn--jxalpdlp",
+      "", "",
+      "gf",
+      "edu",
+      "at",
+      "", "",
+      "vu",
+      "",
+      "kr",
+      "", "",
+      "tp",
+      "",
+      "dz",
+      "", "",
+      "eu",
+      "",
+      "pg",
+      "", "",
+      "bw",
+      "",
+      "sn",
+      "xn--hlcj6aya9esc7a",
+      "",
+      "fj",
+      "",
+      "gm",
+      "", "",
+      "bf",
+      "",
+      "do",
+      "", "",
+      "gb",
+      "",
+      "ve",
+      "", "",
+      "es",
+      "",
+      "li",
+      "jp",
+      "", "", "",
+      "ee",
+      "", "",
+      "pk",
+      "",
+      "de",
+      "", "",
+      "gq",
+      "",
+      "bm",
+      "", "",
+      "kh",
+      "",
+      "im",
+      "", "",
+      "bb",
+      "",
+      "er",
+      "", "", "", "",
+      "tt",
+      "", "",
+      "vc",
+      "",
+      "si",
+      "", "", "", "",
+      "gn",
+      "", "",
+      "ec",
+      "",
+      "lt",
+      "", "",
+      "iq",
+      "",
+      "ua",
+      "", "",
+      "pw",
+      "",
+      "tj",
+      "", "", "", "",
+      "za",
+      "", "",
+      "pf",
+      "",
+      "xn--kgbechtv",
+      "", "", "", "",
+      "bn",
+      "xn--hgbk6aj7f53bba",
+      "", "", "",
+      "in",
+      "int",
+      "info",
+      "gp",
+      "",
+      "st",
+      "", "", "", "",
+      "ug",
+      "", "", "", "",
+      "pm",
+      "", "", "", "",
+      "gi",
+      "", "", "", "",
+      "kg",
+      "", "",
+      "hk",
+      "",
+      "sj",
+      "", "",
+      "wf",
+      "", "", "", "", "", "",
+      "va",
+      "", "",
+      "uk",
+      "", "", "", "", "", "",
+      "bi",
+      "biz",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "",
+      "gt",
+      "", "", "", "",
+      "pn",
+      "", "", "", "",
+      "vg",
+      "", "", "", "", "", "", "", "", "",
+      "eg",
+      "", "", "", "", "", "", "", "", "",
+      "bt",
+      "", "",
+      "zw",
+      "",
+      "it",
+      "", "",
+      "kw",
+      "", "", "", "", "", "",
+      "hm",
+      "", "", "", "", "", "", "", "", "",
+      "bj",
+      "", "",
+      "dk",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "zm",
+      "", "", "", "",
+      "km",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "",
+      "hn",
+      "", "", "", "",
+      "pt",
+      "", "", "", "", "", "", "", "", "",
+      "yt",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "",
+      "kn",
+      "", "", "", "", "", "", "", "", "",
+      "dm",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "kp",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "vn",
+      "", "", "", "",
+      "ki",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "xn--9t4b11yi5a",
+      "", "",
+      "ht",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "",
+      "vi",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "et",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "", "", "", "", "", "", "", "",
+      "", "",
+      "dj"
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      int key = tld_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        if (len == lengthtable[key])
+          {
+            const char *s = wordlist[key];
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return s;
+          }
+    }
+  return 0;
+}
--- a/libclamav/phish_domaincheck_db.c
+++ b/libclamav/phish_domaincheck_db.c
@@ -49,16 +49,6 @@ int domainlist_match(const struct cl_engine* engine,char* real_url,const char* d
 {
 	const char* info;
 	int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly ? pre_fixup : NULL,hostOnly,&info,0) : 0;
-	if(rc && info && info[0] && info[0] != ':') {/*match successful, and has custom flags*/
-		if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
-			unsigned short notwantedflags=0;
-			sscanf(info,"%hx",&notwantedflags);
-		        *flags &= ~notwantedflags;/* filter unwanted phishcheck flags */	
-		}
-		else {
-			cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected");
-		}
-	}
 	return rc;
 }

@@ -79,13 +69,6 @@ int is_domainlist_ok(const struct cl_engine* engine)
 	return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
 }

-void domainlist_cleanup(const struct cl_engine* engine)
-{
-	if(engine && engine->domainlist_matcher) {
-		regex_list_cleanup(engine->domainlist_matcher);
-	}
-}
-
 void domainlist_done(struct cl_engine* engine)
 {
 	if(engine && engine->domainlist_matcher) {
--- a/libclamav/phish_whitelist.c
+++ b/libclamav/phish_whitelist.c
@@ -69,13 +69,6 @@ int is_whitelist_ok(const struct cl_engine* engine)
 	return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
 }

-void whitelist_cleanup(const struct cl_engine* engine)
-{
-	if(engine && engine->whitelist_matcher) {
-		regex_list_cleanup(engine->whitelist_matcher);
-	}
-}
-
 void whitelist_done(struct cl_engine* engine)
 {
 	if(engine && engine->whitelist_matcher) {
--- a/libclamav/phishcheck.c
+++ b/libclamav/phishcheck.c
@@ -39,6 +39,7 @@
 #include <ctype.h>

 #include "clamav.h"
+#include "cltypes.h"
 #include "others.h"
 #include "mbox.h"
 #include "message.h"
@@ -47,6 +48,7 @@
 #include "phish_domaincheck_db.h"
 #include "phish_whitelist.h"
 #include "iana_tld.h"
+#include "iana_cctld.h"


 #define DOMAIN_REAL 1
@@ -140,8 +142,6 @@ static char empty_string[]="";
 #define CLOAKED_URL "^"ANY_CLOAK"(\\."ANY_CLOAK"){0,3}$"

 static const char cloaked_host_regex[] = CLOAKED_URL;
-static const char tld_regex[] = "^"iana_tld"$";
-static const char cctld_regex[] = "^"iana_cctld"$";
 static const char dotnet[] = ".net";
 static const char adonet[] = "ado.net";
 static const char aspnet[] = "asp.net";
@@ -151,7 +151,10 @@ static const char gt[]="&gt";
 static const char src_text[] = "src";
 static const char href_text[] = "href";
 static const char mailto[] = "mailto:";
+static const char mailto_proto[] = "mailto://";
 static const char https[]="https://";
+static const char http[]="http://";
+static const char ftp[] = "ftp://";

 static const size_t href_text_len = sizeof(href_text);
 static const size_t src_text_len = sizeof(src_text);
@@ -161,7 +164,10 @@ static const size_t aspnet_len = sizeof(aspnet)-1;
 static const size_t lt_len = sizeof(lt)-1;
 static const size_t gt_len = sizeof(gt)-1;
 static const size_t mailto_len = sizeof(mailto)-1;
+static const size_t mailto_proto_len = sizeof(mailto_proto)-1;
 static const size_t https_len  = sizeof(https)-1;
+static const size_t http_len  = sizeof(http)-1;
+static const size_t ftp_len  = sizeof(ftp)-1;

 /* for urls, including mailto: urls, and (broken) http:www... style urls*/
 /* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
@@ -169,41 +175,13 @@ static const size_t https_len  = sizeof(https)-1;
 * So the 'safe' char class has been split up
 * */
 /* character classes */
-#define URI_alpha	"a-zA-Z"
 #define URI_digit	"0-9"
-#define URI_safe_nodot  "-$_@&"
-#define URI_safe	"-$_@.&"
-#define URI_extra	"!*\"'(),"
-
-#define URI_hex		 "[0-9a-fA-f]"
-#define URI_escape      "%"URI_hex"{2}"
-#define URI_xalpha "([" URI_safe URI_alpha URI_digit  URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
-#define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
-
-#define URI_xalphas_nodot URI_xalpha_nodot"*"
-
-#define URI_ialpha  "["URI_alpha"]"URI_xalphas_nodot""
-#define URI_xpalpha URI_xalpha"|\\+"
-#define URI_xpalpha_nodot URI_xalpha_nodot"|\\+"
-#define URI_xpalphas_nodot "("URI_xpalpha_nodot")+"
-
-#define URI_scheme URI_ialpha
-#define URI_tld iana_tld
-#define URI_path1 URI_xpalphas_nodot"\\.("URI_xpalphas_nodot"\\.)*"
-
 #define URI_IP_digits "["URI_digit"]{1,3}"
 #define URI_path_start "[/?:]?"
 #define URI_numeric_path URI_IP_digits"(\\."URI_IP_digits"){3}"URI_path_start
-#define URI_numeric_URI "("URI_scheme":(//)?)?"URI_numeric_path
+#define URI_numeric_URI "(http|https|ftp:(//)?)?"URI_numeric_path
 #define URI_numeric_fragmentaddress URI_numeric_URI

-#define URI_URI1 "("URI_scheme":(//)?)?"URI_path1
-#define URI_URI2 URI_tld
-
-#define URI_fragmentaddress1 URI_URI1
-#define URI_fragmentaddress2 URI_URI2""URI_path_start
-
-#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"

 /*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
 * there is fragmentaddress1, and 2  to work around the ISO limitation of 509 bytes max length for string constants*/
@@ -235,7 +213,6 @@ static int string_assign_concatenated(struct string* dest, const char* prefix, c
 static void string_assign_null(struct string* dest);
 static char *rfind(char *start, char c, size_t len);
 static char hex2int(const unsigned char* src);
-static int isTLD(const struct phishcheck* pchk,const char* str,int len);
 static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
 static const char* phishing_ret_toString(enum phish_status rc);

@@ -416,7 +393,7 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
 			}

 			tld = strrchr(realhost,'.');
-			rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
+			rc = tld ? !!in_tld_set(tld,tld-realhost-1) : 0;
 			if(rc < 0)
 				return rc;
 			if(rc)
@@ -438,28 +415,6 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
 	return 0;
 }

-static int isCountryCode(const struct phishcheck* s,const char* str)
-{
-	return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
-}
-
-static int isTLD(const struct phishcheck* pchk,const char* str,int len)
-{
-	if (!str)
-		return 0;
-	else {
-		char*	s  = cli_malloc(len+1);
-		int rc;
-
-		if(!s)
-			return CL_EMEM;
-		strncpy(s,str,len);
-		s[len]='\0';
-		rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
-		free(s);
-		return rc ? 1 : 0;
-	}
-}

 /*
 * memrchr isn't standard, so I use this
@@ -486,7 +441,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
 		string_assign(dest,host);
 		return;
 	}
-	if(isCountryCode(pchk,tld+1)) {
+	if(in_cctld_set(tld+1, strlen(tld+1))) {
 		const char* countrycode = tld+1;
 		tld = rfind(host->data,'.',tld-host->data-1);
 		if(!tld) {
@@ -495,7 +450,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
 			string_assign(dest,host);
 			return;
 		}
-		if(!isTLD(pchk,tld+1,countrycode-tld-2)) {
+		if(!in_tld_set(tld+1, countrycode-tld-2)) {
 			string_assign_ref(dest,host,tld+1);
 			return;/*it was a name like: subdomain.domain.uk, return domain.uk*/
 		}
@@ -737,11 +692,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
 			/* @end points to last character we want to be part of the URL */
 			end = host_begin + host_len - 1;
 		}
-		/* terminate URL with a slash, except when we're at end of string */
-		if(host_begin[host_len]) {
-			host_begin[host_len] = '/';
-			end++;
-		}
+		host_begin[host_len] = '\0';
 		/* convert hostname to lowercase, but only hostname! */
 		str_make_lowercase(host_begin, host_len);
 		/* some broken MUAs put > in the href, and then
@@ -797,6 +748,40 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)

 	if(!ctx->found_possibly_unwanted)
 		*ctx->virname=NULL;
+#if 0
+	FILE *f = fopen("/home/edwin/quarantine/urls","r");
+	if(!f)
+		abort();
+	while(!feof(f)) {
+		struct url_check urls;
+		char line1[4096];
+		char line2[4096];
+		char line3[4096];
+
+		fgets(line1, sizeof(line1), f);
+		fgets(line2, sizeof(line2), f);
+		fgets(line3, sizeof(line3), f);
+		if(strcmp(line3, "\n") != 0) {
+			strcpy(line1, line2);
+			strcpy(line2, line3);
+			fgets(line3, sizeof(line3), f);
+			while(strcmp(line3, "\n") != 0) {
+				fgets(line3, sizeof(line3),f);
+			}
+		}
+		urls.flags = CL_PHISH_ALL_CHECKS;
+		urls.link_type = 0;
+		string_init_c(&urls.realLink, line1);
+		string_init_c(&urls.displayLink, line2);
+		string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
+		urls.realLink.refcount=-1;
+		urls.displayLink.refcount=-1;
+		int rc = phishingCheck(ctx->engine, &urls);
+		//printf("%d\n",rc);
+	}
+	fclose(f);
+	return 0;
+#endif
 	for(i=0;i<hrefs->count;i++)
 		if(hrefs->contents[i]) {
 			struct url_check urls;
@@ -928,44 +913,7 @@ int phishing_init(struct cl_engine* engine)
 		return CL_EFORMAT;
 	}

-	if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	if(build_regex(&pchk->preg_tld,tld_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_fragmentaddress1,URI_fragmentaddress2")) *$");
-	if(!url_regex || build_regex(&pchk->preg,url_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free(url_regex);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	free(url_regex);
-	realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_path1,URI_fragmentaddress2")) *$");
-	if(!realurl_regex || build_regex(&pchk->preg_realurl, realurl_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free_regex(&pchk->preg);
-		free(url_regex);
-		free(realurl_regex);
-		free(pchk);
-		engine->phishcheck = NULL;
-		return CL_EFORMAT;
-	}
-	free(realurl_regex);
 	if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
-		free_regex(&pchk->preg);
-		free_regex(&pchk->preg_realurl);
 		free(pchk);
 		engine->phishcheck = NULL;
 		return CL_EFORMAT;
@@ -980,12 +928,8 @@ void phishing_done(struct cl_engine* engine)
 	struct phishcheck* pchk = engine->phishcheck;
 	cli_dbgmsg("Cleaning up phishcheck\n");
 	if(pchk && !pchk->is_disabled) {
-		free_regex(&pchk->preg);
 		free_regex(&pchk->preg_hexurl);
-		free_regex(&pchk->preg_cctld);
-		free_regex(&pchk->preg_tld);
 		free_regex(&pchk->preg_numeric);
-		free_regex(&pchk->preg_realurl);
 		pchk->is_disabled = 1;
 	}
 	whitelist_done(engine);
@@ -998,22 +942,165 @@ void phishing_done(struct cl_engine* engine)
 	cli_dbgmsg("Phishcheck cleaned up\n");
 }

+
+/*ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_alpha[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*!"$%&'()*,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_xalpha_nodot[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*!"$%&'()*+,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
+static const uint8_t URI_xpalpha_nodot[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static inline int validate_uri_xalphas_nodot(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	for(p=start;p < (const unsigned char*)end; p++) {
+		if(!URI_xalpha_nodot[*p])
+			return 0;
+	}
+	return 1;
+}
+
+static inline int validate_uri_xpalphas_nodot(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	for(p=start;p < (const unsigned char*)end; p++) {
+		if(!URI_xpalpha_nodot[*p])
+			return 0;
+	}
+	/* must have at least on char */
+	return p > (const unsigned char*)start;
+}
+
+
+static inline int validate_uri_ialpha(const char *start, const char *end)
+{
+	const unsigned char *p = start;
+	if(start >= end || !URI_alpha[*p])
+		return 0;
+	return validate_uri_xalphas_nodot(start + 1, end);
+}
+
 /*
 * Only those URLs are identified as URLs for which phishing detection can be performed.
 */
-static int isURL(const struct phishcheck* pchk,const char* URL)
+static int isURL(const struct phishcheck* pchk,const char* URL, int accept_anyproto)
 {
-	return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
+	const char *start = NULL, *p, *q;
+	if(!URL)
+		return 0;
+
+	switch (URL[0]) {
+		case 'h':
+			if (strncmp(URL, https, https_len) == 0)
+				start = URL + https_len;
+			else if (strncmp(URL, http, http_len) == 0)
+				start = URL + http_len;
+			break;
+		case 'f':
+		       if (strncmp(URL, ftp, ftp_len) == 0)
+			       start = URL + ftp_len;
+		       break;
+		case 'm':
+		       if (strncmp(URL, mailto_proto, mailto_proto_len) == 0)
+			       start = URL + mailto_proto_len;
+		       break;
+	}
+	if(start) {
+		if(start[0] == '\0')
+			return 0;/* empty URL */
+		/* has a valid protocol, it is a URL */
+		return 1;
+	}
+	start = accept_anyproto ?  strchr(URL, ':') : NULL;
+	if(start) {
+		/* validate URI scheme */
+		if(validate_uri_ialpha(URL, start)) {
+			if(start[1] == '/' && start[2] == '/')
+				start += 3; /* skip :// */
+			else
+				start++;
+		}
+		else
+			start = URL; /* scheme invalid */
+	} else
+		start = URL;
+	p = start;
+	do {
+		q = strchr(p, '.');
+		if(q) {
+			if(!validate_uri_xpalphas_nodot(p, q))
+				return 0;
+			p = q+1;
+		}
+	} while(q);
+	if (p == start) /* must have at least one dot in the URL */
+		return 0;
+	return !!in_tld_set(p, strlen(p));
 }

 /*
 * Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
 * This prevents false positives with outbind:// and blocked:: links.
 */
+#if 0
 static int isRealURL(const struct phishcheck* pchk,const char* URL)
 {
 	return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
 }
+#endif

 static int isNumericURL(const struct phishcheck* pchk,const char* URL)
 {
@@ -1139,7 +1226,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
 	cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data,
 		urls->displayLink.data);

-	if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) ) &&
+	if((!isURL(pchk, urls->displayLink.data, 1) || !isURL(pchk, urls->realLink.data, 0) ) &&
 			( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
 			  !(phishy&PHISHY_NUMERIC_IP))) {
 		cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
--- a/libclamav/phishcheck.h
+++ b/libclamav/phishcheck.h
@@ -44,10 +44,6 @@ struct string {
 };

 struct phishcheck {
-	regex_t preg;
-	regex_t preg_realurl;
-	regex_t preg_tld;
-	regex_t preg_cctld;
 	regex_t preg_numeric;
 	regex_t preg_hexurl;
 	int      is_disabled;
--- a/libclamav/readdb.c
+++ b/libclamav/readdb.c
@@ -1839,6 +1839,12 @@ int cl_build(struct cl_engine *engine)
 	}
    }

+    if((ret = cli_build_regex_list(engine->whitelist_matcher))) {
+	    return ret;
+    }
+    if((ret = cli_build_regex_list(engine->domainlist_matcher))) {
+	    return ret;
+    }
    cli_md5db_build(engine->md5_mdb);
    cli_freeign(engine);
    cli_dconf_print(engine->dconf);
--- a/libclamav/regex_list.c
+++ b/libclamav/regex_list.c
--- a/libclamav/regex_list.h
+++ b/libclamav/regex_list.h
@@ -24,39 +24,37 @@
 #ifndef _REGEX_LIST_H
 #define _REGEX_LIST_H

-#ifdef NDEBUG
-#define massert(x) (void)(0)
-#else
-/*debug version, massert enabled*/
-
-#define __massert_fail(expr,file,line) (void)cli_errmsg("Assertion failed at %s:%d\n %s\n",file,line,expr)
-
-#define massert(expr) ((void) ((expr) ? (void)0 : (__massert_fail (#expr,__FILE__,__LINE__))))
-#endif
-
 #include "phishcheck.h"
 #include "readdb.h"
 #include "matcher.h"
 #include <zlib.h> /* for gzFile */
-struct node_stack {
-	struct tree_node** data;
-	size_t capacity;
-	size_t cnt;
+
+struct regex_list {
+	const char *pattern;
+	regex_t preg;
+	struct regex_list *nxt;
+};
+
+struct filter {
+	uint32_t B[65536];
+	uint32_t end_fast[256];
+	uint32_t end[65536];
+	unsigned long m;
 };

 struct regex_matcher {
-	struct cli_matcher* root_hosts;
-	struct tree_node* root_regex;
-	struct tree_node* root_regex_hostonly; 
-	struct node_stack node_stack;
-	struct node_stack node_stack_alt;
-	size_t root_hosts_cnt;
-	int list_inited;
-	int list_loaded;
-	int list_built;
+	struct hashtable suffix_hash;
+	size_t suffix_cnt;
+	struct regex_list **suffix_regexes;
+	struct cli_matcher suffixes;
+	struct filter filter;
+	int list_inited:2;
+	int list_loaded:2;
+	int list_built:2;
 };

-int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char** info,int is_whitelist);
+int cli_build_regex_list(struct regex_matcher* matcher);
+int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char **info, int is_whitelist);
 int init_regex_list(struct regex_matcher* matcher);
 int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,struct cli_dbio *dbio);
 void regex_list_cleanup(struct regex_matcher* matcher);