Files
clamav/contrib/entitynorm/entity_decl_parse.pl
Török Edvin b0b8398b48 * contrib/entitynorm:
* use fewer entities, browsers don't support all either.
		       	* update to generate code for new entconv.
		       	* no need for configure, use just a simple Makefile
			 (it is an internal tool)
  libclamav/entconv.c, hashtab.c, htmlnorm.c:
			* don't allocate memory for each entity_norm call.
			* don't touch length of mmaped area (bb #785)
			* update htmlnorm to use new entity_norm


git-svn: trunk@3515
2008-01-21 15:52:21 +00:00

42 lines
1.0 KiB
Perl

#!/usr/bin/perl
# (C) 2008 Török Edwin <edwin@clamav.net>
# parse <!ENTITY declarations and output them in the format
# used by generate_entitylist.c
# Format is EntityName,EntityValue.
# Only accepts entity values 0 < V < 0xffff, and doesn't accept entities that have multiple values assigned.
while(<>) {
chomp;
if(/<!ENTITY +([^ \t]+)[ \t]+\" *([^ \"]+) *\" *>/) {
$name = $1;
$v = $2;
if($v =~ /^&(#38;)?#([^;]+);$/) {
$valx = $2;
my $value;
if($valx =~ /^x([0-9a-fA-F]+)$/) {
$value = hex($valx);
if($value > 0xffff) {
printf STDERR "TOOBIG $_\n"
} else {
printf "$name,%d\n", $value
}
} elsif($valx =~ /^[0-9]+$/) {
if($valx > 0xffff) {
print STDERR "TOOBIG $_\n";
} else {
printf "$name,%d\n", $valx
}
} else {
print "unknown1: $_\n";
}
} elsif($v =~ /^(&#x[0-9a-fA-F]+;)+$/) {
print STDERR "MULTIPLECHARS $name $1\n";
} else {
print "unknown2: $_\n";
}
} elsif(/.*<!ENTITY.*/) {
if($_ !~ /.*(PUBLIC|SYSTEM).*/) {
print "unknown3: $_\n";
}
}
}