mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2026-02-02 11:01:38 -05:00
* use a suffix AC-trie and a shift-or FSM to filter * rewrite the URL regex in C * use a perfect hash to lookup TLD and ccTLD, instead of a regex * TODO: suffixes having a common prefix: loop over all of them cli_ac_free: multiple virname pointing to same location git-svn: trunk@3978
This commit is contained in:
@@ -1,3 +1,12 @@
|
||||
Wed Jul 23 16:32:32 EEST 2008 (edwin)
|
||||
------------------------------------
|
||||
* libclamav: performance improvements for URL matching (bb #725, bb #650):
|
||||
* use a suffix AC-trie and a shift-or FSM to filter
|
||||
* rewrite the URL regex in C
|
||||
* use a perfect hash to lookup TLD and ccTLD, instead of a regex
|
||||
* TODO: suffixes having a common prefix: loop over all of them
|
||||
cli_ac_free: multiple virname pointing to same location
|
||||
|
||||
Mon Jul 21 12:16:44 CEST 2008 (tk)
|
||||
----------------------------------
|
||||
* sigtool/vba.c: fix crash on error in vba code (bb#1106)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
PERL=perl
|
||||
CC=cc
|
||||
|
||||
all: entitylist.h encoding_aliases.h gentbl encname_chars.h
|
||||
all: entitylist.h encoding_aliases.h gentbl encname_chars.h generate_hash
|
||||
|
||||
entities_parsed: entities entities/* entity_decl_parse.pl
|
||||
$(PERL) entity_decl_parse.pl $</* | sort -u >$@
|
||||
@@ -9,6 +9,9 @@ entities_parsed: entities entities/* entity_decl_parse.pl
|
||||
generate_entitylist: generate_entitylist.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
|
||||
$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
|
||||
|
||||
generate_hash: generate_hash.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
|
||||
$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
|
||||
|
||||
generate_encoding_aliases: generate_encoding_aliases.c ../../libclamav/hashtab.c ../../libclamav/others.c ../../libclamav/htmlnorm.h ../../libclamav/entconv.h ../../libclamav/cltypes.h ../../libclamav/hashtab.h ../../libclamav/hashtab.h
|
||||
$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY -DPROFILE_HASHTABLE $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
|
||||
|
||||
|
||||
@@ -26,30 +26,11 @@ OUTFILE=iana_tld.h
|
||||
echo "Downloading updated tld list from iana.org"
|
||||
wget $IANA_TLD -O $TMP || exit 2
|
||||
echo "Download complete, parsing data"
|
||||
# 174 is the code for |
|
||||
TLDLIST=$(egrep -v ^# $TMP | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
|
||||
echo "Parse complete, removing tmpfile"
|
||||
rm $TMP
|
||||
echo "Generating tld list in $OUTFILE"
|
||||
cat >$OUTFILE <<EOF
|
||||
#ifndef IANA_TLD_H
|
||||
#define IANA_TLD_H
|
||||
EOF
|
||||
echo -n "#define iana_tld \"(" >>$OUTFILE
|
||||
echo -n $TLDLIST >>$OUTFILE
|
||||
echo ")\"" >>$OUTFILE
|
||||
grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_tld.h
|
||||
|
||||
echo "Downloading updated country-code list from iana.org"
|
||||
wget $IANA_CCTLD -O $TMP || exit 2
|
||||
echo "Download complete, parsing data"
|
||||
CCTLDLIST=$(cat $TMP | egrep -oi "<a href=[^>]+>\\.([a-zA-Z]+).+</a>" | egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr \\n \\174 | sed 's/[^a-zA-Z]$//')
|
||||
echo "Parse complete, removing tmpfile"
|
||||
rm $TMP
|
||||
echo "Generating cctld list in $OUTFILE"
|
||||
echo -n "#define iana_cctld \"(" >>$OUTFILE
|
||||
echo -n $CCTLDLIST >>$OUTFILE
|
||||
echo ")\"" >>$OUTFILE
|
||||
|
||||
|
||||
echo "#endif" >>$OUTFILE
|
||||
echo "Finished succesfully"
|
||||
cat $TMP | grep country-code|egrep -oi "<a
|
||||
href=[^>]+>\\.([a-zA-Z]+).+</a>"|egrep -o ">.[a-zA-Z]+" | colrm 1 2 | tr [A-Z] [a-z]| gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set |grep -v '^#line'|sed -e 's/^const struct/static const struct/' -e 's/register //g' >iana_cctld.h
|
||||
echo "Done"
|
||||
|
||||
@@ -26,17 +26,4 @@ echo "Downloading updated tld list from iana.org"
|
||||
wget $IANA_TLD -O $TMP || exit 2
|
||||
echo "Download complete, parsing data"
|
||||
# 174 is the code for |
|
||||
TLDLIST=$(egrep -v ^# $TMP|tr \\n \\174 )
|
||||
echo "Parse complete, removing tmpfile"
|
||||
rm $TMP
|
||||
echo "Generating $OUTFILE"
|
||||
cat >$OUTFILE <<EOF
|
||||
#ifndef IANA_TLD_H
|
||||
#define IANA_TLD_H
|
||||
EOF
|
||||
echo -n "#define iana_tld \"(" >>$OUTFILE
|
||||
echo -n $TLDLIST >>$OUTFILE
|
||||
echo ")\"" >>$OUTFILE
|
||||
echo "#endif" >>$OUTFILE
|
||||
echo "Finished succesfully"
|
||||
|
||||
grep -Ev ^# $TMP | tr [A-Z] [a-z] | gperf -C -H tld_hash -N in_tld_set -l|grep -v '^#line' | sed -e 's/^const struct/static const struct/' -e 's/register //g'
|
||||
|
||||
@@ -361,7 +361,7 @@ All 4 tests passed
|
||||
\item The exact output from \verb+make check+
|
||||
\item Output of \verb+uname -mrsp+
|
||||
\item your \verb+config.log+
|
||||
\item The following files from the \verb+unit-tests/+ directory:
|
||||
\item The following files from the \verb+unit_tests/+ directory:
|
||||
\begin{itemize}
|
||||
\item \verb+test.log+
|
||||
\item \verb+clamscan.log+
|
||||
|
||||
@@ -367,10 +367,18 @@ void hashtab_clear(struct hashtable *s)
|
||||
if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
|
||||
free((void *)s->htable[i].key);
|
||||
}
|
||||
memset(s->htable, 0, s->capacity);
|
||||
if(s->htable)
|
||||
memset(s->htable, 0, s->capacity);
|
||||
s->used = 0;
|
||||
}
|
||||
|
||||
void hashtab_free(struct hashtable *s)
|
||||
{
|
||||
hashtab_clear(s);
|
||||
free(s->htable);
|
||||
s->htable = NULL;
|
||||
s->capacity = 0;
|
||||
}
|
||||
|
||||
int hashtab_store(const struct hashtable *s,FILE* out)
|
||||
{
|
||||
|
||||
@@ -82,7 +82,7 @@ int hashtab_init(struct hashtable *s,size_t capacity);
|
||||
const struct element* hashtab_insert(struct hashtable *s, const char* key, const size_t len, const element_data data);
|
||||
void hashtab_delete(struct hashtable *s,const char* key,const size_t len);
|
||||
void hashtab_clear(struct hashtable *s);
|
||||
|
||||
void hashtab_free(struct hashtable *s);
|
||||
int hashtab_load(FILE* in, struct hashtable *s);
|
||||
int hashtab_store(const struct hashtable *s,FILE* out);
|
||||
|
||||
|
||||
505
libclamav/iana_cctld.h
Normal file
505
libclamav/iana_cctld.h
Normal file
@@ -0,0 +1,505 @@
|
||||
/* ANSI-C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -C -l -L ANSI-C -E -C -H cctld_hash -N in_cctld_set */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 472, duplicates = 0 */
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
cctld_hash (const char *str, unsigned int len)
|
||||
{
|
||||
static const unsigned short asso_values[] =
|
||||
{
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 119, 97, 33,
|
||||
103, 4, 59, 115, 210, 149, 169, 143, 175, 55,
|
||||
145, 89, 178, 37, 85, 18, 34, 239, 2, 73,
|
||||
112, 3, 25, 10, 15, 117, 209, 229, 150, 223,
|
||||
200, 78, 225, 54, 5, 215, 215, 190, 25, 23,
|
||||
0, 20, 233, 234, 14, 476, 33, 204, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476, 476, 476, 476, 476, 476, 476, 476, 476, 476,
|
||||
476
|
||||
};
|
||||
return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]+25];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
in_cctld_set (const char *str, unsigned int len)
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 252,
|
||||
MIN_WORD_LENGTH = 2,
|
||||
MAX_WORD_LENGTH = 2,
|
||||
MIN_HASH_VALUE = 4,
|
||||
MAX_HASH_VALUE = 475
|
||||
};
|
||||
|
||||
static const unsigned char lengthtable[] =
|
||||
{
|
||||
0, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0,
|
||||
2, 2, 2, 0, 0, 2, 2, 0, 0, 0, 2, 2, 0, 2,
|
||||
0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2,
|
||||
0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0,
|
||||
2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2,
|
||||
2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2,
|
||||
2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2,
|
||||
0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2,
|
||||
0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2,
|
||||
0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 0, 2,
|
||||
2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 2,
|
||||
2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 0,
|
||||
2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 2, 2, 0,
|
||||
0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
|
||||
2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2,
|
||||
2, 2, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2,
|
||||
2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0,
|
||||
0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2,
|
||||
0, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0,
|
||||
0, 0, 0, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2,
|
||||
2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0,
|
||||
2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0,
|
||||
0, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 0, 2, 2,
|
||||
2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2,
|
||||
2, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
|
||||
0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2
|
||||
};
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"", "", "", "",
|
||||
"sv",
|
||||
"sy",
|
||||
"se",
|
||||
"", "",
|
||||
"mv",
|
||||
"my",
|
||||
"me",
|
||||
"", "",
|
||||
"bv",
|
||||
"by",
|
||||
"be",
|
||||
"", "",
|
||||
"cv",
|
||||
"cy",
|
||||
"", "", "",
|
||||
"tv",
|
||||
"ms",
|
||||
"",
|
||||
"sz",
|
||||
"",
|
||||
"re",
|
||||
"bs",
|
||||
"ae",
|
||||
"mz",
|
||||
"",
|
||||
"ws",
|
||||
"sc",
|
||||
"st",
|
||||
"bz",
|
||||
"",
|
||||
"ye",
|
||||
"mc",
|
||||
"mt",
|
||||
"cz",
|
||||
"rs",
|
||||
"mq",
|
||||
"as",
|
||||
"bt",
|
||||
"tz",
|
||||
"", "",
|
||||
"cc",
|
||||
"",
|
||||
"az",
|
||||
"", "",
|
||||
"tc",
|
||||
"tt",
|
||||
"sm",
|
||||
"lv",
|
||||
"ly",
|
||||
"ac",
|
||||
"at",
|
||||
"mm",
|
||||
"",
|
||||
"aq",
|
||||
"",
|
||||
"mf",
|
||||
"bm",
|
||||
"",
|
||||
"yt",
|
||||
"",
|
||||
"bf",
|
||||
"cm",
|
||||
"",
|
||||
"ls",
|
||||
"wf",
|
||||
"cf",
|
||||
"tm",
|
||||
"", "",
|
||||
"mw",
|
||||
"tf",
|
||||
"am",
|
||||
"",
|
||||
"je",
|
||||
"bw",
|
||||
"af",
|
||||
"sr",
|
||||
"",
|
||||
"lc",
|
||||
"lt",
|
||||
"so",
|
||||
"mr",
|
||||
"", "",
|
||||
"tw",
|
||||
"mo",
|
||||
"br",
|
||||
"rw",
|
||||
"sb",
|
||||
"aw",
|
||||
"bo",
|
||||
"cr",
|
||||
"", "",
|
||||
"sd",
|
||||
"co",
|
||||
"tr",
|
||||
"",
|
||||
"bb",
|
||||
"md",
|
||||
"to",
|
||||
"ar",
|
||||
"",
|
||||
"ro",
|
||||
"bd",
|
||||
"ao",
|
||||
"sg",
|
||||
"",
|
||||
"mx",
|
||||
"cd",
|
||||
"sa",
|
||||
"mg",
|
||||
"de",
|
||||
"",
|
||||
"td",
|
||||
"ma",
|
||||
"bg",
|
||||
"",
|
||||
"cx",
|
||||
"ad",
|
||||
"ba",
|
||||
"cg",
|
||||
"", "",
|
||||
"jm",
|
||||
"ca",
|
||||
"tg",
|
||||
"",
|
||||
"ax",
|
||||
"",
|
||||
"lr",
|
||||
"ag",
|
||||
"",
|
||||
"dz",
|
||||
"sk",
|
||||
"qa",
|
||||
"sn",
|
||||
"", "",
|
||||
"mk",
|
||||
"si",
|
||||
"mn",
|
||||
"lb",
|
||||
"",
|
||||
"gy",
|
||||
"ge",
|
||||
"bn",
|
||||
"", "",
|
||||
"ck",
|
||||
"bi",
|
||||
"cn",
|
||||
"", "",
|
||||
"tk",
|
||||
"ci",
|
||||
"tn",
|
||||
"",
|
||||
"jo",
|
||||
"gs",
|
||||
"sj",
|
||||
"an",
|
||||
"",
|
||||
"dm",
|
||||
"la",
|
||||
"ai",
|
||||
"sl",
|
||||
"", "", "",
|
||||
"bj",
|
||||
"ml",
|
||||
"", "",
|
||||
"mp",
|
||||
"gt",
|
||||
"bl",
|
||||
"",
|
||||
"gq",
|
||||
"",
|
||||
"tj",
|
||||
"cl",
|
||||
"", "",
|
||||
"py",
|
||||
"pe",
|
||||
"tl",
|
||||
"",
|
||||
"lk",
|
||||
"tp",
|
||||
"",
|
||||
"al",
|
||||
"", "",
|
||||
"li",
|
||||
"ie",
|
||||
"gm",
|
||||
"do",
|
||||
"",
|
||||
"ps",
|
||||
"gf",
|
||||
"sh",
|
||||
"", "",
|
||||
"ee",
|
||||
"",
|
||||
"mh",
|
||||
"", "",
|
||||
"is",
|
||||
"ne",
|
||||
"bh",
|
||||
"", "",
|
||||
"gw",
|
||||
"pt",
|
||||
"ch",
|
||||
"",
|
||||
"es",
|
||||
"ky",
|
||||
"ke",
|
||||
"th",
|
||||
"", "", "",
|
||||
"it",
|
||||
"gr",
|
||||
"uy",
|
||||
"iq",
|
||||
"ve",
|
||||
"su",
|
||||
"nz",
|
||||
"",
|
||||
"ec",
|
||||
"et",
|
||||
"mu",
|
||||
"pm",
|
||||
"",
|
||||
"gb",
|
||||
"nc",
|
||||
"pf",
|
||||
"kz",
|
||||
"us",
|
||||
"",
|
||||
"gd",
|
||||
"cu",
|
||||
"im",
|
||||
"jp",
|
||||
"ht",
|
||||
"uz",
|
||||
"zm",
|
||||
"dk",
|
||||
"",
|
||||
"ru",
|
||||
"pw",
|
||||
"au",
|
||||
"gg",
|
||||
"",
|
||||
"vc",
|
||||
"",
|
||||
"ga",
|
||||
"om",
|
||||
"",
|
||||
"yu",
|
||||
"",
|
||||
"nf",
|
||||
"pr",
|
||||
"",
|
||||
"zw",
|
||||
"hm",
|
||||
"",
|
||||
"km",
|
||||
"", "", "",
|
||||
"fm",
|
||||
"ir",
|
||||
"dj",
|
||||
"",
|
||||
"um",
|
||||
"io",
|
||||
"", "", "",
|
||||
"lu",
|
||||
"er",
|
||||
"gn",
|
||||
"", "",
|
||||
"kw",
|
||||
"gi",
|
||||
"nr",
|
||||
"", "",
|
||||
"id",
|
||||
"no",
|
||||
"pg",
|
||||
"", "",
|
||||
"hr",
|
||||
"pa",
|
||||
"kr",
|
||||
"", "", "",
|
||||
"fr",
|
||||
"", "", "",
|
||||
"fo",
|
||||
"", "", "", "",
|
||||
"za",
|
||||
"eg",
|
||||
"gl",
|
||||
"", "",
|
||||
"gp",
|
||||
"",
|
||||
"ng",
|
||||
"", "",
|
||||
"pk",
|
||||
"na",
|
||||
"pn",
|
||||
"", "", "", "",
|
||||
"kg",
|
||||
"", "", "", "",
|
||||
"in",
|
||||
"", "",
|
||||
"ug",
|
||||
"vg",
|
||||
"", "",
|
||||
"ua",
|
||||
"va",
|
||||
"", "", "", "", "", "",
|
||||
"gh",
|
||||
"", "", "",
|
||||
"ni",
|
||||
"pl",
|
||||
"hk",
|
||||
"",
|
||||
"hn",
|
||||
"",
|
||||
"kn",
|
||||
"",
|
||||
"fk",
|
||||
"",
|
||||
"ki",
|
||||
"il",
|
||||
"uk",
|
||||
"",
|
||||
"fi",
|
||||
"vn",
|
||||
"", "", "",
|
||||
"vi",
|
||||
"", "", "", "", "",
|
||||
"gu",
|
||||
"nl",
|
||||
"", "",
|
||||
"np",
|
||||
"", "", "", "",
|
||||
"fj",
|
||||
"",
|
||||
"ph",
|
||||
"", "",
|
||||
"kp",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "",
|
||||
"eh",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "",
|
||||
"kh",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "",
|
||||
"eu",
|
||||
"", "", "", "", "",
|
||||
"nu",
|
||||
"", "", "", "", "", "", "",
|
||||
"hu",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"",
|
||||
"vu"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
int key = cctld_hash (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (len == lengthtable[key])
|
||||
{
|
||||
const char *s = wordlist[key];
|
||||
|
||||
if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -1,28 +1,746 @@
|
||||
/*
|
||||
* Phishing module: iana tld list.
|
||||
*
|
||||
* Copyright (C) 2007-2008 Sourcefire, Inc.
|
||||
*
|
||||
* Authors: Török Edvin
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
/* ANSI-C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -C -l -L ANSI-C -E -C -H tld_hash -N in_tld_set */
|
||||
/* Computed positions: -k'1-2,6' */
|
||||
|
||||
#ifndef IANA_TLD_H
|
||||
#define IANA_TLD_H
|
||||
#define iana_tld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|INT|MIL|NET|ORG|PRO|TEL|AERO|ARPA|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM|TRAVEL|XN--ZCKZAH|XN--0ZWM56D|XN--DEBA0AD|XN--G6W251D|XN--JXALPDLP|XN--KGBECHTV|XN--9T4B11YI5A|XN--80AKHBYKNJ4F|XN--11B5BS3A9AJ6G|XN--HGBK6AJ7F53BBA)"
|
||||
#define iana_cctld "(A[CDEFGILMNOQRSTUWXZ]|B[ABDEFGHIJLMNORSTVWYZ]|C[ACDFGHIKLMNORUVXYZ]|D[EJKMOZ]|E[CEGHRSTU]|F[IJKMOR]|G[ABDEFGHILMNPQRSTUWY]|H[KMNRTU]|I[DELMNOQRST]|J[EMOP]|K[EGHIMNPRWYZ]|L[ABCIKRSTUVY]|M[ACDEFGHKLMNOPQRSTUVWXYZ]|N[ACEFGILOPRUZ]|OM|P[AEFGHKLMNRSTWY]|QA|R[EOSUW]|S[ABCDEGHIJKLMNORTUVYZ]|T[CDFGHJKLMNOPRTVWZ]|U[AGKMSYZ]|V[ACEGINU]|W[FS]|Y[ETU]|Z[AMW]|BIZ|CAT|COM|EDU|GOV|IN[TT]|MIL|NET|ORG|PRO|TEL|AERO|ARP[AA]|ASIA|COOP|INFO|JOBS|MOBI|NAME|MUSEUM)"
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 983, duplicates = 0 */
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
tld_hash (const char *str, unsigned int len)
|
||||
{
|
||||
static const unsigned short asso_values[] =
|
||||
{
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 0, 15,
|
||||
988, 988, 988, 988, 0, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 170, 328, 88,
|
||||
3, 50, 293, 205, 123, 430, 500, 238, 115, 320,
|
||||
375, 30, 413, 348, 70, 43, 475, 18, 6, 283,
|
||||
95, 58, 10, 220, 5, 485, 480, 8, 190, 390,
|
||||
225, 113, 420, 95, 0, 15, 50, 295, 20, 128,
|
||||
130, 80, 405, 470, 340, 0, 305, 415, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988, 988, 988, 988, 988, 988, 988, 988, 988, 988,
|
||||
988
|
||||
};
|
||||
int hval = len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[5]];
|
||||
/*FALLTHROUGH*/
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
hval += asso_values[(unsigned char)str[1]];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]+25];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
in_tld_set (const char *str, unsigned int len)
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 280,
|
||||
MIN_WORD_LENGTH = 2,
|
||||
MAX_WORD_LENGTH = 18,
|
||||
MIN_HASH_VALUE = 5,
|
||||
MAX_HASH_VALUE = 987
|
||||
};
|
||||
|
||||
static const unsigned char lengthtable[] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2,
|
||||
0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 2, 0, 2, 0, 4, 2, 0, 2, 3, 4, 2, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2,
|
||||
0, 4, 0, 0, 2, 0, 2, 0, 4, 2, 0, 2, 3, 0,
|
||||
0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0,
|
||||
4, 2, 0, 2, 2, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 0, 0, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 3, 0, 2,
|
||||
0, 2, 0, 0, 2, 0, 2, 3, 0, 2, 0, 0, 2, 0,
|
||||
2, 0, 2, 0, 0, 2, 0, 4, 2, 0, 2, 0, 2, 0,
|
||||
0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 3, 0, 2, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 4, 2, 0, 2, 0, 0, 2,
|
||||
0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0, 0,
|
||||
2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 3,
|
||||
0, 2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 0, 0,
|
||||
0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2,
|
||||
0, 2, 0, 0, 2, 6, 2, 0, 0, 0, 0, 2, 0, 0,
|
||||
2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0,
|
||||
0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2,
|
||||
0, 2, 0, 0, 2, 0, 2, 0, 6, 2, 0, 2, 0, 0,
|
||||
2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0,
|
||||
0, 2, 0, 2, 3, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 0, 0, 2, 0, 0, 2, 11, 2, 0, 0, 0, 16,
|
||||
2, 0, 0, 0, 11, 2, 0, 0, 0, 0, 2, 0, 0, 0,
|
||||
0, 17, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 0,
|
||||
2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 3,
|
||||
0, 2, 11, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2,
|
||||
0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 10, 0, 2,
|
||||
0, 2, 0, 0, 2, 0, 12, 0, 0, 2, 3, 2, 0, 0,
|
||||
2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0,
|
||||
0, 2, 0, 2, 18, 0, 2, 0, 2, 0, 0, 2, 0, 2,
|
||||
0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0,
|
||||
2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2,
|
||||
0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0,
|
||||
2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0,
|
||||
0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2,
|
||||
0, 0, 2, 0, 12, 0, 0, 0, 0, 2, 18, 0, 0, 0,
|
||||
2, 3, 4, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0,
|
||||
0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0,
|
||||
2, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0,
|
||||
0, 2, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
|
||||
2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
|
||||
2, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
|
||||
2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
|
||||
0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 2,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 2
|
||||
};
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"", "", "", "", "",
|
||||
"md",
|
||||
"", "",
|
||||
"mv",
|
||||
"",
|
||||
"cd",
|
||||
"",
|
||||
"mz",
|
||||
"cv",
|
||||
"",
|
||||
"ad",
|
||||
"",
|
||||
"cz",
|
||||
"", "",
|
||||
"mu",
|
||||
"",
|
||||
"az",
|
||||
"", "",
|
||||
"cu",
|
||||
"",
|
||||
"nz",
|
||||
"", "",
|
||||
"au",
|
||||
"",
|
||||
"mo",
|
||||
"",
|
||||
"mobi",
|
||||
"nu",
|
||||
"",
|
||||
"co",
|
||||
"com",
|
||||
"coop",
|
||||
"fo",
|
||||
"",
|
||||
"ao",
|
||||
"", "",
|
||||
"ms",
|
||||
"",
|
||||
"no",
|
||||
"", "", "", "",
|
||||
"me",
|
||||
"", "",
|
||||
"as",
|
||||
"",
|
||||
"asia",
|
||||
"", "",
|
||||
"my",
|
||||
"",
|
||||
"ae",
|
||||
"",
|
||||
"aero",
|
||||
"cy",
|
||||
"",
|
||||
"ne",
|
||||
"net",
|
||||
"", "", "",
|
||||
"mr",
|
||||
"", "", "", "",
|
||||
"cr",
|
||||
"", "",
|
||||
"fr",
|
||||
"",
|
||||
"ar",
|
||||
"",
|
||||
"arpa",
|
||||
"td",
|
||||
"",
|
||||
"nr",
|
||||
"tv",
|
||||
"",
|
||||
"mc",
|
||||
"",
|
||||
"tz",
|
||||
"", "",
|
||||
"cc",
|
||||
"",
|
||||
"mx",
|
||||
"", "",
|
||||
"ac",
|
||||
"",
|
||||
"cx",
|
||||
"lv",
|
||||
"",
|
||||
"nc",
|
||||
"",
|
||||
"ax",
|
||||
"", "", "", "",
|
||||
"to",
|
||||
"", "",
|
||||
"lu",
|
||||
"",
|
||||
"ml",
|
||||
"", "", "", "",
|
||||
"cl",
|
||||
"org",
|
||||
"",
|
||||
"mh",
|
||||
"",
|
||||
"al",
|
||||
"", "",
|
||||
"ch",
|
||||
"",
|
||||
"nl",
|
||||
"tel",
|
||||
"",
|
||||
"sd",
|
||||
"", "",
|
||||
"sv",
|
||||
"",
|
||||
"ls",
|
||||
"",
|
||||
"sz",
|
||||
"", "",
|
||||
"jo",
|
||||
"",
|
||||
"jobs",
|
||||
"ru",
|
||||
"",
|
||||
"su",
|
||||
"",
|
||||
"tr",
|
||||
"", "",
|
||||
"ly",
|
||||
"", "", "", "",
|
||||
"ro",
|
||||
"",
|
||||
"so",
|
||||
"", "",
|
||||
"je",
|
||||
"",
|
||||
"lr",
|
||||
"", "",
|
||||
"tc",
|
||||
"",
|
||||
"ma",
|
||||
"rs",
|
||||
"", "", "",
|
||||
"ca",
|
||||
"cat",
|
||||
"",
|
||||
"re",
|
||||
"",
|
||||
"se",
|
||||
"", "",
|
||||
"lc",
|
||||
"",
|
||||
"na",
|
||||
"",
|
||||
"name",
|
||||
"sy",
|
||||
"",
|
||||
"qa",
|
||||
"", "",
|
||||
"gd",
|
||||
"",
|
||||
"tl",
|
||||
"", "", "", "",
|
||||
"sr",
|
||||
"", "",
|
||||
"th",
|
||||
"",
|
||||
"mg",
|
||||
"", "",
|
||||
"gu",
|
||||
"",
|
||||
"cg",
|
||||
"", "", "", "",
|
||||
"ag",
|
||||
"", "",
|
||||
"sc",
|
||||
"",
|
||||
"ng",
|
||||
"gov",
|
||||
"",
|
||||
"bd",
|
||||
"", "",
|
||||
"bv",
|
||||
"",
|
||||
"id",
|
||||
"",
|
||||
"bz",
|
||||
"", "",
|
||||
"gs",
|
||||
"", "", "", "",
|
||||
"mk",
|
||||
"",
|
||||
"ge",
|
||||
"", "",
|
||||
"ck",
|
||||
"",
|
||||
"sl",
|
||||
"fk",
|
||||
"",
|
||||
"gy",
|
||||
"",
|
||||
"bo",
|
||||
"", "",
|
||||
"sh",
|
||||
"",
|
||||
"io",
|
||||
"", "", "", "",
|
||||
"gr",
|
||||
"", "",
|
||||
"bs",
|
||||
"",
|
||||
"la",
|
||||
"", "",
|
||||
"is",
|
||||
"travel",
|
||||
"be",
|
||||
"", "", "", "",
|
||||
"ie",
|
||||
"", "",
|
||||
"by",
|
||||
"", "", "", "",
|
||||
"mw",
|
||||
"",
|
||||
"tg",
|
||||
"", "", "", "",
|
||||
"br",
|
||||
"", "",
|
||||
"aw",
|
||||
"",
|
||||
"ir",
|
||||
"", "",
|
||||
"cf",
|
||||
"",
|
||||
"sa",
|
||||
"", "",
|
||||
"af",
|
||||
"",
|
||||
"gl",
|
||||
"", "",
|
||||
"nf",
|
||||
"", "", "", "",
|
||||
"gh",
|
||||
"", "", "", "",
|
||||
"tk",
|
||||
"",
|
||||
"mm",
|
||||
"", "",
|
||||
"yu",
|
||||
"",
|
||||
"cm",
|
||||
"", "",
|
||||
"fm",
|
||||
"",
|
||||
"am",
|
||||
"", "",
|
||||
"lk",
|
||||
"",
|
||||
"sg",
|
||||
"", "",
|
||||
"ps",
|
||||
"",
|
||||
"il",
|
||||
"",
|
||||
"museum",
|
||||
"bh",
|
||||
"",
|
||||
"pe",
|
||||
"", "",
|
||||
"mq",
|
||||
"", "", "", "",
|
||||
"py",
|
||||
"",
|
||||
"ye",
|
||||
"", "",
|
||||
"aq",
|
||||
"",
|
||||
"ga",
|
||||
"", "",
|
||||
"tw",
|
||||
"",
|
||||
"pr",
|
||||
"pro",
|
||||
"",
|
||||
"sk",
|
||||
"",
|
||||
"om",
|
||||
"", "",
|
||||
"tf",
|
||||
"",
|
||||
"mn",
|
||||
"", "", "", "",
|
||||
"cn",
|
||||
"", "",
|
||||
"ws",
|
||||
"xn--g6w251d",
|
||||
"an",
|
||||
"", "", "",
|
||||
"xn--80akhbyknj4f",
|
||||
"ba",
|
||||
"", "", "",
|
||||
"xn--0zwm56d",
|
||||
"gg",
|
||||
"", "", "", "",
|
||||
"tm",
|
||||
"", "", "", "",
|
||||
"xn--11b5bs3a9aj6g",
|
||||
"", "",
|
||||
"hu",
|
||||
"",
|
||||
"pl",
|
||||
"rw",
|
||||
"",
|
||||
"mp",
|
||||
"",
|
||||
"uz",
|
||||
"", "",
|
||||
"ph",
|
||||
"", "", "", "",
|
||||
"lb",
|
||||
"",
|
||||
"bg",
|
||||
"", "",
|
||||
"np",
|
||||
"",
|
||||
"kz",
|
||||
"mil",
|
||||
"",
|
||||
"jm",
|
||||
"xn--deba0ad",
|
||||
"ci",
|
||||
"", "",
|
||||
"fi",
|
||||
"",
|
||||
"ai",
|
||||
"", "", "", "",
|
||||
"ni",
|
||||
"", "",
|
||||
"us",
|
||||
"",
|
||||
"sm",
|
||||
"", "", "", "",
|
||||
"tn",
|
||||
"", "",
|
||||
"sb",
|
||||
"",
|
||||
"hr",
|
||||
"", "",
|
||||
"uy",
|
||||
"",
|
||||
"pa",
|
||||
"", "", "", "",
|
||||
"ke",
|
||||
"xn--zckzah",
|
||||
"",
|
||||
"gw",
|
||||
"",
|
||||
"mt",
|
||||
"", "",
|
||||
"ky",
|
||||
"",
|
||||
"xn--jxalpdlp",
|
||||
"", "",
|
||||
"gf",
|
||||
"edu",
|
||||
"at",
|
||||
"", "",
|
||||
"vu",
|
||||
"",
|
||||
"kr",
|
||||
"", "",
|
||||
"tp",
|
||||
"",
|
||||
"dz",
|
||||
"", "",
|
||||
"eu",
|
||||
"",
|
||||
"pg",
|
||||
"", "",
|
||||
"bw",
|
||||
"",
|
||||
"sn",
|
||||
"xn--hlcj6aya9esc7a",
|
||||
"",
|
||||
"fj",
|
||||
"",
|
||||
"gm",
|
||||
"", "",
|
||||
"bf",
|
||||
"",
|
||||
"do",
|
||||
"", "",
|
||||
"gb",
|
||||
"",
|
||||
"ve",
|
||||
"", "",
|
||||
"es",
|
||||
"",
|
||||
"li",
|
||||
"jp",
|
||||
"", "", "",
|
||||
"ee",
|
||||
"", "",
|
||||
"pk",
|
||||
"",
|
||||
"de",
|
||||
"", "",
|
||||
"gq",
|
||||
"",
|
||||
"bm",
|
||||
"", "",
|
||||
"kh",
|
||||
"",
|
||||
"im",
|
||||
"", "",
|
||||
"bb",
|
||||
"",
|
||||
"er",
|
||||
"", "", "", "",
|
||||
"tt",
|
||||
"", "",
|
||||
"vc",
|
||||
"",
|
||||
"si",
|
||||
"", "", "", "",
|
||||
"gn",
|
||||
"", "",
|
||||
"ec",
|
||||
"",
|
||||
"lt",
|
||||
"", "",
|
||||
"iq",
|
||||
"",
|
||||
"ua",
|
||||
"", "",
|
||||
"pw",
|
||||
"",
|
||||
"tj",
|
||||
"", "", "", "",
|
||||
"za",
|
||||
"", "",
|
||||
"pf",
|
||||
"",
|
||||
"xn--kgbechtv",
|
||||
"", "", "", "",
|
||||
"bn",
|
||||
"xn--hgbk6aj7f53bba",
|
||||
"", "", "",
|
||||
"in",
|
||||
"int",
|
||||
"info",
|
||||
"gp",
|
||||
"",
|
||||
"st",
|
||||
"", "", "", "",
|
||||
"ug",
|
||||
"", "", "", "",
|
||||
"pm",
|
||||
"", "", "", "",
|
||||
"gi",
|
||||
"", "", "", "",
|
||||
"kg",
|
||||
"", "",
|
||||
"hk",
|
||||
"",
|
||||
"sj",
|
||||
"", "",
|
||||
"wf",
|
||||
"", "", "", "", "", "",
|
||||
"va",
|
||||
"", "",
|
||||
"uk",
|
||||
"", "", "", "", "", "",
|
||||
"bi",
|
||||
"biz",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "",
|
||||
"gt",
|
||||
"", "", "", "",
|
||||
"pn",
|
||||
"", "", "", "",
|
||||
"vg",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"eg",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"bt",
|
||||
"", "",
|
||||
"zw",
|
||||
"",
|
||||
"it",
|
||||
"", "",
|
||||
"kw",
|
||||
"", "", "", "", "", "",
|
||||
"hm",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"bj",
|
||||
"", "",
|
||||
"dk",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "",
|
||||
"zm",
|
||||
"", "", "", "",
|
||||
"km",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "",
|
||||
"hn",
|
||||
"", "", "", "",
|
||||
"pt",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"yt",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "",
|
||||
"kn",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"dm",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"kp",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "",
|
||||
"vn",
|
||||
"", "", "", "",
|
||||
"ki",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "",
|
||||
"xn--9t4b11yi5a",
|
||||
"", "",
|
||||
"ht",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "",
|
||||
"vi",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"et",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "", "", "",
|
||||
"", "",
|
||||
"dj"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
int key = tld_hash (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (len == lengthtable[key])
|
||||
{
|
||||
const char *s = wordlist[key];
|
||||
|
||||
if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -49,16 +49,6 @@ int domainlist_match(const struct cl_engine* engine,char* real_url,const char* d
|
||||
{
|
||||
const char* info;
|
||||
int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly ? pre_fixup : NULL,hostOnly,&info,0) : 0;
|
||||
if(rc && info && info[0] && info[0] != ':') {/*match successful, and has custom flags*/
|
||||
if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
|
||||
unsigned short notwantedflags=0;
|
||||
sscanf(info,"%hx",¬wantedflags);
|
||||
*flags &= ~notwantedflags;/* filter unwanted phishcheck flags */
|
||||
}
|
||||
else {
|
||||
cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected");
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -79,13 +69,6 @@ int is_domainlist_ok(const struct cl_engine* engine)
|
||||
return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
|
||||
}
|
||||
|
||||
void domainlist_cleanup(const struct cl_engine* engine)
|
||||
{
|
||||
if(engine && engine->domainlist_matcher) {
|
||||
regex_list_cleanup(engine->domainlist_matcher);
|
||||
}
|
||||
}
|
||||
|
||||
void domainlist_done(struct cl_engine* engine)
|
||||
{
|
||||
if(engine && engine->domainlist_matcher) {
|
||||
|
||||
@@ -69,13 +69,6 @@ int is_whitelist_ok(const struct cl_engine* engine)
|
||||
return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
|
||||
}
|
||||
|
||||
void whitelist_cleanup(const struct cl_engine* engine)
|
||||
{
|
||||
if(engine && engine->whitelist_matcher) {
|
||||
regex_list_cleanup(engine->whitelist_matcher);
|
||||
}
|
||||
}
|
||||
|
||||
void whitelist_done(struct cl_engine* engine)
|
||||
{
|
||||
if(engine && engine->whitelist_matcher) {
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <ctype.h>
|
||||
|
||||
#include "clamav.h"
|
||||
#include "cltypes.h"
|
||||
#include "others.h"
|
||||
#include "mbox.h"
|
||||
#include "message.h"
|
||||
@@ -47,6 +48,7 @@
|
||||
#include "phish_domaincheck_db.h"
|
||||
#include "phish_whitelist.h"
|
||||
#include "iana_tld.h"
|
||||
#include "iana_cctld.h"
|
||||
|
||||
|
||||
#define DOMAIN_REAL 1
|
||||
@@ -140,8 +142,6 @@ static char empty_string[]="";
|
||||
#define CLOAKED_URL "^"ANY_CLOAK"(\\."ANY_CLOAK"){0,3}$"
|
||||
|
||||
static const char cloaked_host_regex[] = CLOAKED_URL;
|
||||
static const char tld_regex[] = "^"iana_tld"$";
|
||||
static const char cctld_regex[] = "^"iana_cctld"$";
|
||||
static const char dotnet[] = ".net";
|
||||
static const char adonet[] = "ado.net";
|
||||
static const char aspnet[] = "asp.net";
|
||||
@@ -151,7 +151,10 @@ static const char gt[]=">";
|
||||
static const char src_text[] = "src";
|
||||
static const char href_text[] = "href";
|
||||
static const char mailto[] = "mailto:";
|
||||
static const char mailto_proto[] = "mailto://";
|
||||
static const char https[]="https://";
|
||||
static const char http[]="http://";
|
||||
static const char ftp[] = "ftp://";
|
||||
|
||||
static const size_t href_text_len = sizeof(href_text);
|
||||
static const size_t src_text_len = sizeof(src_text);
|
||||
@@ -161,7 +164,10 @@ static const size_t aspnet_len = sizeof(aspnet)-1;
|
||||
static const size_t lt_len = sizeof(lt)-1;
|
||||
static const size_t gt_len = sizeof(gt)-1;
|
||||
static const size_t mailto_len = sizeof(mailto)-1;
|
||||
static const size_t mailto_proto_len = sizeof(mailto_proto)-1;
|
||||
static const size_t https_len = sizeof(https)-1;
|
||||
static const size_t http_len = sizeof(http)-1;
|
||||
static const size_t ftp_len = sizeof(ftp)-1;
|
||||
|
||||
/* for urls, including mailto: urls, and (broken) http:www... style urls*/
|
||||
/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
|
||||
@@ -169,41 +175,13 @@ static const size_t https_len = sizeof(https)-1;
|
||||
* So the 'safe' char class has been split up
|
||||
* */
|
||||
/* character classes */
|
||||
#define URI_alpha "a-zA-Z"
|
||||
#define URI_digit "0-9"
|
||||
#define URI_safe_nodot "-$_@&"
|
||||
#define URI_safe "-$_@.&"
|
||||
#define URI_extra "!*\"'(),"
|
||||
|
||||
#define URI_hex "[0-9a-fA-f]"
|
||||
#define URI_escape "%"URI_hex"{2}"
|
||||
#define URI_xalpha "([" URI_safe URI_alpha URI_digit URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
|
||||
#define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
|
||||
|
||||
#define URI_xalphas_nodot URI_xalpha_nodot"*"
|
||||
|
||||
#define URI_ialpha "["URI_alpha"]"URI_xalphas_nodot""
|
||||
#define URI_xpalpha URI_xalpha"|\\+"
|
||||
#define URI_xpalpha_nodot URI_xalpha_nodot"|\\+"
|
||||
#define URI_xpalphas_nodot "("URI_xpalpha_nodot")+"
|
||||
|
||||
#define URI_scheme URI_ialpha
|
||||
#define URI_tld iana_tld
|
||||
#define URI_path1 URI_xpalphas_nodot"\\.("URI_xpalphas_nodot"\\.)*"
|
||||
|
||||
#define URI_IP_digits "["URI_digit"]{1,3}"
|
||||
#define URI_path_start "[/?:]?"
|
||||
#define URI_numeric_path URI_IP_digits"(\\."URI_IP_digits"){3}"URI_path_start
|
||||
#define URI_numeric_URI "("URI_scheme":(//)?)?"URI_numeric_path
|
||||
#define URI_numeric_URI "(http|https|ftp:(//)?)?"URI_numeric_path
|
||||
#define URI_numeric_fragmentaddress URI_numeric_URI
|
||||
|
||||
#define URI_URI1 "("URI_scheme":(//)?)?"URI_path1
|
||||
#define URI_URI2 URI_tld
|
||||
|
||||
#define URI_fragmentaddress1 URI_URI1
|
||||
#define URI_fragmentaddress2 URI_URI2""URI_path_start
|
||||
|
||||
#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"
|
||||
|
||||
/*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
|
||||
* there is fragmentaddress1, and 2 to work around the ISO limitation of 509 bytes max length for string constants*/
|
||||
@@ -235,7 +213,6 @@ static int string_assign_concatenated(struct string* dest, const char* prefix, c
|
||||
static void string_assign_null(struct string* dest);
|
||||
static char *rfind(char *start, char c, size_t len);
|
||||
static char hex2int(const unsigned char* src);
|
||||
static int isTLD(const struct phishcheck* pchk,const char* str,int len);
|
||||
static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
|
||||
static const char* phishing_ret_toString(enum phish_status rc);
|
||||
|
||||
@@ -416,7 +393,7 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
|
||||
}
|
||||
|
||||
tld = strrchr(realhost,'.');
|
||||
rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
|
||||
rc = tld ? !!in_tld_set(tld,tld-realhost-1) : 0;
|
||||
if(rc < 0)
|
||||
return rc;
|
||||
if(rc)
|
||||
@@ -438,28 +415,6 @@ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* p
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int isCountryCode(const struct phishcheck* s,const char* str)
|
||||
{
|
||||
return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
|
||||
}
|
||||
|
||||
static int isTLD(const struct phishcheck* pchk,const char* str,int len)
|
||||
{
|
||||
if (!str)
|
||||
return 0;
|
||||
else {
|
||||
char* s = cli_malloc(len+1);
|
||||
int rc;
|
||||
|
||||
if(!s)
|
||||
return CL_EMEM;
|
||||
strncpy(s,str,len);
|
||||
s[len]='\0';
|
||||
rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
|
||||
free(s);
|
||||
return rc ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* memrchr isn't standard, so I use this
|
||||
@@ -486,7 +441,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
|
||||
string_assign(dest,host);
|
||||
return;
|
||||
}
|
||||
if(isCountryCode(pchk,tld+1)) {
|
||||
if(in_cctld_set(tld+1, strlen(tld+1))) {
|
||||
const char* countrycode = tld+1;
|
||||
tld = rfind(host->data,'.',tld-host->data-1);
|
||||
if(!tld) {
|
||||
@@ -495,7 +450,7 @@ static void get_domain(const struct phishcheck* pchk,struct string* dest,struct
|
||||
string_assign(dest,host);
|
||||
return;
|
||||
}
|
||||
if(!isTLD(pchk,tld+1,countrycode-tld-2)) {
|
||||
if(!in_tld_set(tld+1, countrycode-tld-2)) {
|
||||
string_assign_ref(dest,host,tld+1);
|
||||
return;/*it was a name like: subdomain.domain.uk, return domain.uk*/
|
||||
}
|
||||
@@ -737,11 +692,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
|
||||
/* @end points to last character we want to be part of the URL */
|
||||
end = host_begin + host_len - 1;
|
||||
}
|
||||
/* terminate URL with a slash, except when we're at end of string */
|
||||
if(host_begin[host_len]) {
|
||||
host_begin[host_len] = '/';
|
||||
end++;
|
||||
}
|
||||
host_begin[host_len] = '\0';
|
||||
/* convert hostname to lowercase, but only hostname! */
|
||||
str_make_lowercase(host_begin, host_len);
|
||||
/* some broken MUAs put > in the href, and then
|
||||
@@ -797,6 +748,40 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
|
||||
|
||||
if(!ctx->found_possibly_unwanted)
|
||||
*ctx->virname=NULL;
|
||||
#if 0
|
||||
FILE *f = fopen("/home/edwin/quarantine/urls","r");
|
||||
if(!f)
|
||||
abort();
|
||||
while(!feof(f)) {
|
||||
struct url_check urls;
|
||||
char line1[4096];
|
||||
char line2[4096];
|
||||
char line3[4096];
|
||||
|
||||
fgets(line1, sizeof(line1), f);
|
||||
fgets(line2, sizeof(line2), f);
|
||||
fgets(line3, sizeof(line3), f);
|
||||
if(strcmp(line3, "\n") != 0) {
|
||||
strcpy(line1, line2);
|
||||
strcpy(line2, line3);
|
||||
fgets(line3, sizeof(line3), f);
|
||||
while(strcmp(line3, "\n") != 0) {
|
||||
fgets(line3, sizeof(line3),f);
|
||||
}
|
||||
}
|
||||
urls.flags = CL_PHISH_ALL_CHECKS;
|
||||
urls.link_type = 0;
|
||||
string_init_c(&urls.realLink, line1);
|
||||
string_init_c(&urls.displayLink, line2);
|
||||
string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
|
||||
urls.realLink.refcount=-1;
|
||||
urls.displayLink.refcount=-1;
|
||||
int rc = phishingCheck(ctx->engine, &urls);
|
||||
//printf("%d\n",rc);
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
#endif
|
||||
for(i=0;i<hrefs->count;i++)
|
||||
if(hrefs->contents[i]) {
|
||||
struct url_check urls;
|
||||
@@ -928,44 +913,7 @@ int phishing_init(struct cl_engine* engine)
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
|
||||
if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
|
||||
free(pchk);
|
||||
engine->phishcheck = NULL;
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
if(build_regex(&pchk->preg_tld,tld_regex,1)) {
|
||||
free_regex(&pchk->preg_cctld);
|
||||
free(pchk);
|
||||
engine->phishcheck = NULL;
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_fragmentaddress1,URI_fragmentaddress2")) *$");
|
||||
if(!url_regex || build_regex(&pchk->preg,url_regex,1)) {
|
||||
free_regex(&pchk->preg_cctld);
|
||||
free_regex(&pchk->preg_tld);
|
||||
free(url_regex);
|
||||
free(pchk);
|
||||
engine->phishcheck = NULL;
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
free(url_regex);
|
||||
realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|(",URI_path1,URI_fragmentaddress2")) *$");
|
||||
if(!realurl_regex || build_regex(&pchk->preg_realurl, realurl_regex,1)) {
|
||||
free_regex(&pchk->preg_cctld);
|
||||
free_regex(&pchk->preg_tld);
|
||||
free_regex(&pchk->preg);
|
||||
free(url_regex);
|
||||
free(realurl_regex);
|
||||
free(pchk);
|
||||
engine->phishcheck = NULL;
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
free(realurl_regex);
|
||||
if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
|
||||
free_regex(&pchk->preg_cctld);
|
||||
free_regex(&pchk->preg_tld);
|
||||
free_regex(&pchk->preg);
|
||||
free_regex(&pchk->preg_realurl);
|
||||
free(pchk);
|
||||
engine->phishcheck = NULL;
|
||||
return CL_EFORMAT;
|
||||
@@ -980,12 +928,8 @@ void phishing_done(struct cl_engine* engine)
|
||||
struct phishcheck* pchk = engine->phishcheck;
|
||||
cli_dbgmsg("Cleaning up phishcheck\n");
|
||||
if(pchk && !pchk->is_disabled) {
|
||||
free_regex(&pchk->preg);
|
||||
free_regex(&pchk->preg_hexurl);
|
||||
free_regex(&pchk->preg_cctld);
|
||||
free_regex(&pchk->preg_tld);
|
||||
free_regex(&pchk->preg_numeric);
|
||||
free_regex(&pchk->preg_realurl);
|
||||
pchk->is_disabled = 1;
|
||||
}
|
||||
whitelist_done(engine);
|
||||
@@ -998,22 +942,165 @@ void phishing_done(struct cl_engine* engine)
|
||||
cli_dbgmsg("Phishcheck cleaned up\n");
|
||||
}
|
||||
|
||||
|
||||
/*ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*/
|
||||
static const uint8_t URI_alpha[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/*!"$%&'()*,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
|
||||
static const uint8_t URI_xalpha_nodot[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/*!"$%&'()*+,-0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz*/
|
||||
static const uint8_t URI_xpalpha_nodot[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static inline int validate_uri_xalphas_nodot(const char *start, const char *end)
|
||||
{
|
||||
const unsigned char *p = start;
|
||||
for(p=start;p < (const unsigned char*)end; p++) {
|
||||
if(!URI_xalpha_nodot[*p])
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int validate_uri_xpalphas_nodot(const char *start, const char *end)
|
||||
{
|
||||
const unsigned char *p = start;
|
||||
for(p=start;p < (const unsigned char*)end; p++) {
|
||||
if(!URI_xpalpha_nodot[*p])
|
||||
return 0;
|
||||
}
|
||||
/* must have at least on char */
|
||||
return p > (const unsigned char*)start;
|
||||
}
|
||||
|
||||
|
||||
static inline int validate_uri_ialpha(const char *start, const char *end)
|
||||
{
|
||||
const unsigned char *p = start;
|
||||
if(start >= end || !URI_alpha[*p])
|
||||
return 0;
|
||||
return validate_uri_xalphas_nodot(start + 1, end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only those URLs are identified as URLs for which phishing detection can be performed.
|
||||
*/
|
||||
static int isURL(const struct phishcheck* pchk,const char* URL)
|
||||
static int isURL(const struct phishcheck* pchk,const char* URL, int accept_anyproto)
|
||||
{
|
||||
return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
|
||||
const char *start = NULL, *p, *q;
|
||||
if(!URL)
|
||||
return 0;
|
||||
|
||||
switch (URL[0]) {
|
||||
case 'h':
|
||||
if (strncmp(URL, https, https_len) == 0)
|
||||
start = URL + https_len;
|
||||
else if (strncmp(URL, http, http_len) == 0)
|
||||
start = URL + http_len;
|
||||
break;
|
||||
case 'f':
|
||||
if (strncmp(URL, ftp, ftp_len) == 0)
|
||||
start = URL + ftp_len;
|
||||
break;
|
||||
case 'm':
|
||||
if (strncmp(URL, mailto_proto, mailto_proto_len) == 0)
|
||||
start = URL + mailto_proto_len;
|
||||
break;
|
||||
}
|
||||
if(start) {
|
||||
if(start[0] == '\0')
|
||||
return 0;/* empty URL */
|
||||
/* has a valid protocol, it is a URL */
|
||||
return 1;
|
||||
}
|
||||
start = accept_anyproto ? strchr(URL, ':') : NULL;
|
||||
if(start) {
|
||||
/* validate URI scheme */
|
||||
if(validate_uri_ialpha(URL, start)) {
|
||||
if(start[1] == '/' && start[2] == '/')
|
||||
start += 3; /* skip :// */
|
||||
else
|
||||
start++;
|
||||
}
|
||||
else
|
||||
start = URL; /* scheme invalid */
|
||||
} else
|
||||
start = URL;
|
||||
p = start;
|
||||
do {
|
||||
q = strchr(p, '.');
|
||||
if(q) {
|
||||
if(!validate_uri_xpalphas_nodot(p, q))
|
||||
return 0;
|
||||
p = q+1;
|
||||
}
|
||||
} while(q);
|
||||
if (p == start) /* must have at least one dot in the URL */
|
||||
return 0;
|
||||
return !!in_tld_set(p, strlen(p));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
|
||||
* This prevents false positives with outbind:// and blocked:: links.
|
||||
*/
|
||||
#if 0
|
||||
static int isRealURL(const struct phishcheck* pchk,const char* URL)
|
||||
{
|
||||
return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int isNumericURL(const struct phishcheck* pchk,const char* URL)
|
||||
{
|
||||
@@ -1139,7 +1226,7 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
|
||||
cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data,
|
||||
urls->displayLink.data);
|
||||
|
||||
if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) ) &&
|
||||
if((!isURL(pchk, urls->displayLink.data, 1) || !isURL(pchk, urls->realLink.data, 0) ) &&
|
||||
( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
|
||||
!(phishy&PHISHY_NUMERIC_IP))) {
|
||||
cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
|
||||
|
||||
@@ -44,10 +44,6 @@ struct string {
|
||||
};
|
||||
|
||||
struct phishcheck {
|
||||
regex_t preg;
|
||||
regex_t preg_realurl;
|
||||
regex_t preg_tld;
|
||||
regex_t preg_cctld;
|
||||
regex_t preg_numeric;
|
||||
regex_t preg_hexurl;
|
||||
int is_disabled;
|
||||
|
||||
@@ -1839,6 +1839,12 @@ int cl_build(struct cl_engine *engine)
|
||||
}
|
||||
}
|
||||
|
||||
if((ret = cli_build_regex_list(engine->whitelist_matcher))) {
|
||||
return ret;
|
||||
}
|
||||
if((ret = cli_build_regex_list(engine->domainlist_matcher))) {
|
||||
return ret;
|
||||
}
|
||||
cli_md5db_build(engine->md5_mdb);
|
||||
cli_freeign(engine);
|
||||
cli_dconf_print(engine->dconf);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -24,39 +24,37 @@
|
||||
#ifndef _REGEX_LIST_H
|
||||
#define _REGEX_LIST_H
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define massert(x) (void)(0)
|
||||
#else
|
||||
/*debug version, massert enabled*/
|
||||
|
||||
#define __massert_fail(expr,file,line) (void)cli_errmsg("Assertion failed at %s:%d\n %s\n",file,line,expr)
|
||||
|
||||
#define massert(expr) ((void) ((expr) ? (void)0 : (__massert_fail (#expr,__FILE__,__LINE__))))
|
||||
#endif
|
||||
|
||||
#include "phishcheck.h"
|
||||
#include "readdb.h"
|
||||
#include "matcher.h"
|
||||
#include <zlib.h> /* for gzFile */
|
||||
struct node_stack {
|
||||
struct tree_node** data;
|
||||
size_t capacity;
|
||||
size_t cnt;
|
||||
|
||||
struct regex_list {
|
||||
const char *pattern;
|
||||
regex_t preg;
|
||||
struct regex_list *nxt;
|
||||
};
|
||||
|
||||
struct filter {
|
||||
uint32_t B[65536];
|
||||
uint32_t end_fast[256];
|
||||
uint32_t end[65536];
|
||||
unsigned long m;
|
||||
};
|
||||
|
||||
struct regex_matcher {
|
||||
struct cli_matcher* root_hosts;
|
||||
struct tree_node* root_regex;
|
||||
struct tree_node* root_regex_hostonly;
|
||||
struct node_stack node_stack;
|
||||
struct node_stack node_stack_alt;
|
||||
size_t root_hosts_cnt;
|
||||
int list_inited;
|
||||
int list_loaded;
|
||||
int list_built;
|
||||
struct hashtable suffix_hash;
|
||||
size_t suffix_cnt;
|
||||
struct regex_list **suffix_regexes;
|
||||
struct cli_matcher suffixes;
|
||||
struct filter filter;
|
||||
int list_inited:2;
|
||||
int list_loaded:2;
|
||||
int list_built:2;
|
||||
};
|
||||
|
||||
int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char** info,int is_whitelist);
|
||||
int cli_build_regex_list(struct regex_matcher* matcher);
|
||||
int regex_list_match(struct regex_matcher* matcher, char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup, int hostOnly,const char **info, int is_whitelist);
|
||||
int init_regex_list(struct regex_matcher* matcher);
|
||||
int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int options,int is_whitelist,struct cli_dbio *dbio);
|
||||
void regex_list_cleanup(struct regex_matcher* matcher);
|
||||
|
||||
Reference in New Issue
Block a user