add an #ifdef NOISY to pdf.c

Usage: uncomment #define NOISY, rebuild, and scan some PDF files
with an empty DB. It should print info messages for successful
extraction/decryption, and warnings where it fails.
This commit is contained in:
Török Edvin
2011-12-28 19:05:57 +02:00
parent 7719760b66
commit 4956690d99

View File

@@ -58,6 +58,7 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
#include "sha256.h"
#include "textnorm.h"
#ifdef CL_DEBUG
/*#define SAVE_TMP
*Save the file being worked on in tmp */
@@ -122,6 +123,17 @@ struct pdf_struct {
unsigned keylen;
};
/* define this to be noisy about things that we can't parse properly */
/*#define NOISY*/
#ifdef NOISY
#define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
#define noisy_warnmsg cli_warnmsg
#else
#define noisy_msg (void)
#define noisy_warnmsg (void)
#endif
static const char *findNextNonWSBack(const char *q, const char *start)
{
while (q > start &&
@@ -416,10 +428,18 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj,
else
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
if(stream.msg)
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes,
stream.msg, obj->id>>8, obj->id&0xff);
else
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
/* mark stream as bad only if not encrypted */
inflateEnd(&stream);
if (!nbytes) {
cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n");
noisy_warnmsg("cli_pdf: dumping raw stream, probably encrypted and we failed to decrypt'n");
if (filter_writen(pdf, obj, fout, buf, len, sum) != len) {
cli_errmsg("cli_pdf: failed to write output file\n");
return CL_EWRITE;
@@ -593,6 +613,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
}
if (len < 32) {
cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
return;
}
if (has_iv) {
@@ -618,6 +639,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
pad = q[-1];
if (pad > 0x10) {
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
*length -= len;
return;
}
@@ -625,6 +647,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
for (i=1;i<pad;i++) {
if (q[i] != pad) {
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
*length -= len;
return;
}
@@ -644,14 +667,18 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
cli_md5_ctx md5;
struct arc4_state arc4;
if (!length || !*length || !in)
if (!length || !*length || !in) {
noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
return NULL;
}
n = pdf->keylen + 5;
if (enc_method == ENC_AESV2)
n += 4;
key = cli_malloc(n);
if (!key)
if (!key) {
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL;
}
memcpy(key, pdf->key, pdf->keylen);
q = key + pdf->keylen;
@@ -672,8 +699,10 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
n = 16;
q = cli_malloc(*length);
if (!q)
if (!q) {
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL;
}
switch (enc_method) {
case ENC_V2:
@@ -681,26 +710,33 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
memcpy(q, in, *length);
arc4_init(&arc4, result, n);
arc4_apply(&arc4, q, *length);
noisy_msg(pdf, "decrypted ARC4 data\n");
break;
case ENC_AESV2:
cli_dbgmsg("cli_pdf: enc is aesv2\n");
aes_decrypt(in, length, q, result, n, 1);
noisy_msg(pdf, "decrypted AES(v2) data\n");
break;
case ENC_AESV3:
cli_dbgmsg("cli_pdf: enc is aesv3\n");
aes_decrypt(in, length, q, pdf->key, pdf->keylen, 1);
noisy_msg(pdf, "decrypted AES(v3) data\n");
break;
case ENC_IDENTITY:
cli_dbgmsg("cli_pdf: enc is identity\n");
memcpy(q, in, *length);
noisy_msg(pdf, "identity encryption\n");
break;
case ENC_NONE:
cli_dbgmsg("cli_pdf: enc is none\n");
noisy_msg(pdf, "encryption is none\n");
free(q);
return NULL;
case ENC_UNKNOWN:
cli_dbgmsg("cli_pdf: enc is unknown\n");
free(q);
noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
id>>8,id&0xff);
return NULL;
}
return q;
@@ -759,7 +795,7 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
char fullname[1024];
char outbuff[BUFSIZ];
char inbuf[BUFSIZ];
int fout, n;
int fout, n, rc;
enum cstate st = CSTATE_NONE;
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
@@ -779,8 +815,13 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
}
cli_writen(fout, s.out, s.out_pos);
lseek(fout, 0, SEEK_SET);
rc = cli_magic_scandesc(fout, pdf->ctx);
close(fout);
return CL_SUCCESS;
if (!pdf->ctx->engine->keeptmp)
if (cli_unlink(fullname) && rc != CL_VIRUS)
rc = CL_EUNLINK;
return rc;
}
static const char *pdf_getdict(const char *q0, int* len, const char *key);
@@ -847,6 +888,8 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
cli_dbgmsg("cli_pdf: length out of file: %ld + %ld > %ld\n",
p_stream, length, pdf->size);
noisy_warnmsg("length out of file, truncated: %ld + %ld > %ld\n",
p_stream, length, pdf->size);
length = pdf->size - (obj->start + p_stream);
}
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
@@ -898,9 +941,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
else {
decrypted = decrypt_any(pdf, obj->id, flate_in, &length,
enc);
if (!decrypted)
cli_warnmsg("cli_pdf:decrypt_any: malloc failed\n");
else
if (decrypted)
flate_in = decrypted;
}
}
@@ -949,7 +990,9 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
rc = CL_EWRITE;
}
}
} else
noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
const char *q2;
const char *q = pdf->map+obj->start;
@@ -1001,8 +1044,10 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
cli_dbgmsg("cli_pdf: encrypted string\n");
decrypted = decrypt_any(pdf, obj->id, out, &n,
pdf->enc_method_string);
if (decrypted)
if (decrypted) {
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
out = decrypted;
}
}
if (filter_writen(pdf, obj, fout, out, n, &sum) != n) {
rc = CL_EWRITE;
@@ -1024,8 +1069,10 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
out++;
n--;
decrypted = decrypt_any(pdf, obj->id, out, &n, pdf->enc_method_string);
if (decrypted)
if (decrypted) {
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
out = decrypted;
}
}
decoded = cli_malloc(n);
if (!decoded) {
@@ -1068,6 +1115,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
rc2 = pdf_scan_contents(fout, pdf);
if (rc2 == CL_VIRUS)
rc = rc2;
noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
}
}
close(fout);
@@ -1634,6 +1682,7 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
n = UE ? strlen(UE) : 0;
if (n != 32) {
cli_dbgmsg("cli_pdf: UE length is not 32: %d\n", n);
noisy_warnmsg("cli_pdf: UE length is not 32: %d\n", n);
} else {
pdf->keylen = 32;
pdf->key = cli_malloc(32);
@@ -1708,15 +1757,18 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
password_empty = 1;
} else {
cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
}
}
if (password_empty) {
cli_dbgmsg("cli_pdf: user password is empty\n");
noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
/* The key we computed above is the key used to encrypt the streams.
* We could decrypt it now if we wanted to */
pdf->flags |= 1 << DECRYPTABLE_PDF;
} else {
cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
/* the key is not valid, we would need the user or the owner password to
* decrypt */
}
@@ -1759,11 +1811,13 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
return;
if (!pdf->fileID) {
cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
return;
}
obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
if (!obj) {
cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
return;
}
len = obj_size(pdf, obj, 1);
@@ -1778,12 +1832,14 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
P = pdf_readint(q, len, "/P");
if (P == ~0u) {
cli_dbgmsg("cli_pdf: invalid P\n");
noisy_warnmsg("cli_pdf: invalid P\n");
break;
}
q2 = cli_memstr(q, len, "/Standard", 9);
if (!q2) {
cli_dbgmsg("cli_pdf: /Standard not found\n");
noisy_warnmsg("cli_pdf: /Standard not found\n");
break;
}
/* we can have both of these:
@@ -1801,6 +1857,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
R = pdf_readint(q, len, "/R");
if (R == ~0u) {
cli_dbgmsg("cli_pdf: invalid R\n");
noisy_warnmsg("cli_pdf: invalid R\n");
break;
}
@@ -1846,6 +1903,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
n = 0;
O = pdf_readstring(q, len, "/O", &n);
if (!O || n < oulen) {
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
if (O)
dbg_printhex("invalid O", O, n);
@@ -1857,6 +1915,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
break;
if (i != n) {
dbg_printhex("too long O", O, n);
noisy_warnmsg("too long O", O, n);
break;
}
}
@@ -1865,6 +1924,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
U = pdf_readstring(q, len, "/U", &n);
if (!U || n < oulen) {
cli_dbgmsg("cli_pdf: invalid U: %d\n", n);
noisy_warnmsg("cli_pdf: invalid U: %d\n", n);
if (U)
dbg_printhex("invalid U", U, n);
break;
@@ -1881,6 +1941,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %d\n", R, P, length);
if (length % 8) {
cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
break;
}
check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
@@ -1919,6 +1980,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
pdfver = cli_memstr(pdfver, versize, "%PDF-", 5);
if (!pdfver) {
cli_dbgmsg("cli_pdf: no PDF- header found\n");
noisy_warnmsg("cli_pdf: no PDF- header found\n");
return CL_SUCCESS;
}
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future