Revert "pdf strings are now base64 encoded if utf conversion fails"

This reverts commit 6c3cc09415.
This commit is contained in:
Kevin Lin
2015-03-02 19:05:09 -05:00
parent 0a185b8253
commit e098bf4bd9
3 changed files with 27 additions and 69 deletions

View File

@@ -3215,7 +3215,7 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
return;
if (!(pdf->stats.author))
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &pdf->stats.author_base64);
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
}
#endif
@@ -3231,7 +3231,7 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
return;
if (!(pdf->stats.creator))
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &pdf->stats.creator_base64);
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
}
#endif
@@ -3247,7 +3247,7 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
return;
if (!(pdf->stats.modificationdate))
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &pdf->stats.modificationdate_base64);
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
}
#endif
@@ -3263,7 +3263,7 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
return;
if (!(pdf->stats.creationdate))
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &pdf->stats.creationdate_base64);
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
}
#endif
@@ -3279,7 +3279,7 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
return;
if (!(pdf->stats.producer))
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &pdf->stats.producer_base64);
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
}
#endif
@@ -3295,7 +3295,7 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
return;
if (!(pdf->stats.title))
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &pdf->stats.title_base64);
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
}
#endif
@@ -3311,7 +3311,7 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
return;
if (!(pdf->stats.keywords))
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &pdf->stats.keywords_base64);
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
}
#endif
@@ -3327,7 +3327,7 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
return;
if (!(pdf->stats.subject))
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &pdf->stats.subject_base64);
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
}
#endif
@@ -3511,46 +3511,22 @@ static void pdf_export_json(struct pdf_struct *pdf)
goto cleanup;
}
if (pdf->stats.author) {
if (pdf->stats.author)
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
if (pdf->stats.author_base64)
cli_jsonbool(pdfobj, "Author_base64", 1);
}
if (pdf->stats.creator) {
if (pdf->stats.creator)
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
if (pdf->stats.creator_base64)
cli_jsonbool(pdfobj, "Creator_base64", 1);
}
if (pdf->stats.producer) {
if (pdf->stats.producer)
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
if (pdf->stats.producer_base64)
cli_jsonbool(pdfobj, "Producer_base64", 1);
}
if (pdf->stats.modificationdate) {
if (pdf->stats.modificationdate)
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
if (pdf->stats.modificationdate_base64)
cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
}
if (pdf->stats.creationdate) {
if (pdf->stats.creationdate)
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
if (pdf->stats.creationdate_base64)
cli_jsonbool(pdfobj, "CreationDate_base64", 1);
}
if (pdf->stats.title) {
if (pdf->stats.title)
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
if (pdf->stats.title_base64)
cli_jsonbool(pdfobj, "Title_base64", 1);
}
if (pdf->stats.subject) {
if (pdf->stats.subject)
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
if (pdf->stats.subject_base64)
cli_jsonbool(pdfobj, "Subject_base64", 1);
}
if (pdf->stats.keywords) {
if (pdf->stats.keywords)
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
if (pdf->stats.keywords_base64)
cli_jsonbool(pdfobj, "Keywords_base64", 1);
}
if (pdf->stats.ninvalidobjs)
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
if (pdf->stats.njs)

View File

@@ -89,21 +89,13 @@ struct pdf_stats {
int32_t nacroform; /* Number of AcroForm objects */
int32_t nxfa; /* Number of XFA objects */
char *author; /* Author of the PDF */
int8_t author_base64; /* Author string is base64 encoded */
char *creator; /* Application used to create the PDF */
int8_t creator_base64; /* Author string is base64 encoded */
char *producer; /* Application used to produce the PDF */
int8_t producer_base64; /* Application string is base64 encoded */
char *creationdate; /* Date the PDF was created */
int8_t creationdate_base64; /* Date of creation string is base64 encoded */
char *modificationdate; /* Date the PDF was modified */
int8_t modificationdate_base64; /* Date of modification string is base64 encoded */
char *creationdate; /* Date the PDF was created */
char *modificationdate; /* Date the PDF was modified */
char *title; /* Title of the PDF */
int8_t title_base64; /* Title string is base64 encoded */
char *subject; /* Subject of the PDF */
int8_t subject_base64; /* Subject string is base64 encoded */
char *keywords; /* Keywords of the PDF */
int8_t keywords_base64; /* Keywords string is base64 encoded */
};
@@ -152,7 +144,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
int pdf_findobj(struct pdf_struct *pdf);
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *base64);
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar);
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
int is_object_reference(char *begin, char **endchar, uint32_t *id);

View File

@@ -188,6 +188,7 @@ char *pdf_convert_utf(char *begin, size_t sz)
#endif
free(buf);
free(outbuf);
return res;
}
@@ -275,7 +276,7 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
return 0;
}
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, int8_t *base64)
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar)
{
const char *q = objstart;
char *p1, *p2;
@@ -408,7 +409,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
switch (*p3) {
case '(':
case '<':
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, NULL);
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL);
free(begin);
break;
default:
@@ -422,14 +423,8 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
res = likelyutf ? pdf_convert_utf(p3, objsize2) : NULL;
if (!(res)) {
if (base64) {
res = (char*)cl_base64_encode(p1, len);
if (res)
*base64 = 1;
} else {
res = begin;
res[objsize2] = '\0';
}
res = begin;
res[objsize2] = '\0';
} else {
free(begin);
}
@@ -518,11 +513,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
}
res = pdf_convert_utf(p1, len);
if (!res && base64) {
res = (char*)cl_base64_encode(p1, len);
if (res)
*base64 = 1;
}
if (res && endchar)
*endchar = p2;
@@ -684,7 +674,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
switch (begin[0]) {
case '(':
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
begin = p1+2;
break;
case '[':
@@ -700,7 +690,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
}
}
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
begin = p1+2;
break;
default:
@@ -882,7 +872,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
/* Not a dictionary. Intentially fall through. */
case '(':
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin, NULL);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin);
begin += 2;
break;
case '[':