mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2026-05-16 12:32:56 -04:00
Revert "pdf strings are now base64 encoded if utf conversion fails"
This reverts commit 6c3cc09415.
This commit is contained in:
@@ -3215,7 +3215,7 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.author))
|
||||
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &pdf->stats.author_base64);
|
||||
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3231,7 +3231,7 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.creator))
|
||||
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &pdf->stats.creator_base64);
|
||||
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3247,7 +3247,7 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.modificationdate))
|
||||
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &pdf->stats.modificationdate_base64);
|
||||
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3263,7 +3263,7 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.creationdate))
|
||||
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &pdf->stats.creationdate_base64);
|
||||
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3279,7 +3279,7 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.producer))
|
||||
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &pdf->stats.producer_base64);
|
||||
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3295,7 +3295,7 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.title))
|
||||
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &pdf->stats.title_base64);
|
||||
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3311,7 +3311,7 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.keywords))
|
||||
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &pdf->stats.keywords_base64);
|
||||
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3327,7 +3327,7 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
|
||||
return;
|
||||
|
||||
if (!(pdf->stats.subject))
|
||||
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &pdf->stats.subject_base64);
|
||||
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3511,46 +3511,22 @@ static void pdf_export_json(struct pdf_struct *pdf)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (pdf->stats.author) {
|
||||
if (pdf->stats.author)
|
||||
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
|
||||
if (pdf->stats.author_base64)
|
||||
cli_jsonbool(pdfobj, "Author_base64", 1);
|
||||
}
|
||||
if (pdf->stats.creator) {
|
||||
if (pdf->stats.creator)
|
||||
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
|
||||
if (pdf->stats.creator_base64)
|
||||
cli_jsonbool(pdfobj, "Creator_base64", 1);
|
||||
}
|
||||
if (pdf->stats.producer) {
|
||||
if (pdf->stats.producer)
|
||||
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
|
||||
if (pdf->stats.producer_base64)
|
||||
cli_jsonbool(pdfobj, "Producer_base64", 1);
|
||||
}
|
||||
if (pdf->stats.modificationdate) {
|
||||
if (pdf->stats.modificationdate)
|
||||
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
|
||||
if (pdf->stats.modificationdate_base64)
|
||||
cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
|
||||
}
|
||||
if (pdf->stats.creationdate) {
|
||||
if (pdf->stats.creationdate)
|
||||
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
|
||||
if (pdf->stats.creationdate_base64)
|
||||
cli_jsonbool(pdfobj, "CreationDate_base64", 1);
|
||||
}
|
||||
if (pdf->stats.title) {
|
||||
if (pdf->stats.title)
|
||||
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
|
||||
if (pdf->stats.title_base64)
|
||||
cli_jsonbool(pdfobj, "Title_base64", 1);
|
||||
}
|
||||
if (pdf->stats.subject) {
|
||||
if (pdf->stats.subject)
|
||||
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
|
||||
if (pdf->stats.subject_base64)
|
||||
cli_jsonbool(pdfobj, "Subject_base64", 1);
|
||||
}
|
||||
if (pdf->stats.keywords) {
|
||||
if (pdf->stats.keywords)
|
||||
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
|
||||
if (pdf->stats.keywords_base64)
|
||||
cli_jsonbool(pdfobj, "Keywords_base64", 1);
|
||||
}
|
||||
if (pdf->stats.ninvalidobjs)
|
||||
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
|
||||
if (pdf->stats.njs)
|
||||
|
||||
@@ -89,21 +89,13 @@ struct pdf_stats {
|
||||
int32_t nacroform; /* Number of AcroForm objects */
|
||||
int32_t nxfa; /* Number of XFA objects */
|
||||
char *author; /* Author of the PDF */
|
||||
int8_t author_base64; /* Author string is base64 encoded */
|
||||
char *creator; /* Application used to create the PDF */
|
||||
int8_t creator_base64; /* Author string is base64 encoded */
|
||||
char *producer; /* Application used to produce the PDF */
|
||||
int8_t producer_base64; /* Application string is base64 encoded */
|
||||
char *creationdate; /* Date the PDF was created */
|
||||
int8_t creationdate_base64; /* Date of creation string is base64 encoded */
|
||||
char *modificationdate; /* Date the PDF was modified */
|
||||
int8_t modificationdate_base64; /* Date of modification string is base64 encoded */
|
||||
char *creationdate; /* Date the PDF was created */
|
||||
char *modificationdate; /* Date the PDF was modified */
|
||||
char *title; /* Title of the PDF */
|
||||
int8_t title_base64; /* Title string is base64 encoded */
|
||||
char *subject; /* Subject of the PDF */
|
||||
int8_t subject_base64; /* Subject string is base64 encoded */
|
||||
char *keywords; /* Keywords of the PDF */
|
||||
int8_t keywords_base64; /* Keywords string is base64 encoded */
|
||||
};
|
||||
|
||||
|
||||
@@ -152,7 +144,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
|
||||
int pdf_findobj(struct pdf_struct *pdf);
|
||||
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
|
||||
|
||||
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *base64);
|
||||
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar);
|
||||
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
|
||||
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
|
||||
int is_object_reference(char *begin, char **endchar, uint32_t *id);
|
||||
|
||||
@@ -188,6 +188,7 @@ char *pdf_convert_utf(char *begin, size_t sz)
|
||||
#endif
|
||||
free(buf);
|
||||
free(outbuf);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -275,7 +276,7 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *base64)
|
||||
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar)
|
||||
{
|
||||
const char *q = objstart;
|
||||
char *p1, *p2;
|
||||
@@ -408,7 +409,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
|
||||
switch (*p3) {
|
||||
case '(':
|
||||
case '<':
|
||||
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, NULL);
|
||||
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL);
|
||||
free(begin);
|
||||
break;
|
||||
default:
|
||||
@@ -422,14 +423,8 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
|
||||
res = likelyutf ? pdf_convert_utf(p3, objsize2) : NULL;
|
||||
|
||||
if (!(res)) {
|
||||
if (base64) {
|
||||
res = (char*)cl_base64_encode(p1, len);
|
||||
if (res)
|
||||
*base64 = 1;
|
||||
} else {
|
||||
res = begin;
|
||||
res[objsize2] = '\0';
|
||||
}
|
||||
res = begin;
|
||||
res[objsize2] = '\0';
|
||||
} else {
|
||||
free(begin);
|
||||
}
|
||||
@@ -518,11 +513,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
|
||||
}
|
||||
|
||||
res = pdf_convert_utf(p1, len);
|
||||
if (!res && base64) {
|
||||
res = (char*)cl_base64_encode(p1, len);
|
||||
if (res)
|
||||
*base64 = 1;
|
||||
}
|
||||
|
||||
if (res && endchar)
|
||||
*endchar = p2;
|
||||
@@ -684,7 +674,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
|
||||
|
||||
switch (begin[0]) {
|
||||
case '(':
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
|
||||
begin = p1+2;
|
||||
break;
|
||||
case '[':
|
||||
@@ -700,7 +690,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
|
||||
}
|
||||
}
|
||||
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
|
||||
begin = p1+2;
|
||||
break;
|
||||
default:
|
||||
@@ -882,7 +872,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
|
||||
|
||||
/* Not a dictionary. Intentially fall through. */
|
||||
case '(':
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin, NULL);
|
||||
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin);
|
||||
begin += 2;
|
||||
break;
|
||||
case '[':
|
||||
|
||||
Reference in New Issue
Block a user