mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2026-02-02 19:11:25 -05:00
HWPML: added hwpml_keys for hwpml parsing
This commit is contained in:
@@ -23,8 +23,13 @@
|
||||
#include "clamav-config.h"
|
||||
#endif
|
||||
|
||||
#if HAVE_ICONV
|
||||
#include <iconv.h>
|
||||
#if HAVE_LIBXML2
|
||||
#ifdef _WIN32
|
||||
#ifndef LIBXML_WRITER_ENABLED
|
||||
#define LIBXML_WRITER_ENABLED 1
|
||||
#endif
|
||||
#endif
|
||||
#include <libxml/xmlreader.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
@@ -39,6 +44,8 @@
|
||||
#include "str.h"
|
||||
#include "others.h"
|
||||
#include "scanners.h"
|
||||
#include "msxml_parser.h"
|
||||
#include "msxml.h"
|
||||
#include "json_api.h"
|
||||
#include "hwp.h"
|
||||
#if HAVE_JSON
|
||||
@@ -47,6 +54,7 @@
|
||||
|
||||
#define HWP5_DEBUG 0
|
||||
#define HWP3_DEBUG 1
|
||||
#define HWPML_DEBUG 1
|
||||
#if HWP5_DEBUG
|
||||
#define hwp5_debug(...) cli_dbgmsg(__VA_ARGS__)
|
||||
#else
|
||||
@@ -57,6 +65,11 @@
|
||||
#else
|
||||
#define hwp3_debug(...) ;
|
||||
#endif
|
||||
#if HWPML_DEBUG
|
||||
#define hwpml_debug(...) cli_dbgmsg(__VA_ARGS__)
|
||||
#else
|
||||
#define hwpml_debug(...) ;
|
||||
#endif
|
||||
|
||||
typedef int (*hwp_cb )(void *cbdata, int fd, cli_ctx *ctx);
|
||||
static int decompress_and_callback(cli_ctx *ctx, fmap_t *input, off_t at, size_t len, const char *parent, hwp_cb cb, void *cbdata)
|
||||
@@ -488,8 +501,8 @@ static inline int parsehwp3_docsummary(cli_ctx *ctx, off_t offset)
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
UNUSED(ctx);
|
||||
UNUSED(offset);
|
||||
UNUSEDPARAM(ctx);
|
||||
UNUSEDPARAM(offset);
|
||||
#endif
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -637,3 +650,80 @@ int cli_scanhwp3(cli_ctx *ctx)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*** HWPML (hijacking the msxml parser) ***/
|
||||
|
||||
static const struct key_entry hwpml_keys[] = {
|
||||
{ "hwpml", "HWPML", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
|
||||
|
||||
/* HEAD - Document Properties */
|
||||
{ "head", "Head", MSXML_JSON_ROOT },
|
||||
{ "docsummary", "DocumentProperties", MSXML_JSON_WRKPTR },
|
||||
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
||||
{ "author", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
||||
{ "date", "Date", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
||||
{ "docsetting", "DocumentSettings", MSXML_JSON_WRKPTR },
|
||||
{ "beginnumber", "BeginNumber", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
|
||||
{ "caretpos", "CaretPos", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
|
||||
{ "bindatalist", "BinDataList", MSXML_JSON_WRKPTR },
|
||||
{ "binitem", "BinItem", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
|
||||
{ "facenamelist", "FaceNameList", MSXML_IGNORE_ELEM }, /* fonts list */
|
||||
{ "borderfilllist", "BorderFillList", MSXML_IGNORE_ELEM }, /* borders list */
|
||||
{ "charshapelist", "CharShapeList", MSXML_IGNORE_ELEM }, /* character shapes */
|
||||
{ "tabdeflist", "TableDefList", MSXML_IGNORE_ELEM }, /* table defs */
|
||||
{ "numberinglist", "NumberingList", MSXML_IGNORE_ELEM }, /* numbering list */
|
||||
{ "parashapelist", "ParagraphShapeList", MSXML_IGNORE_ELEM }, /* paragraph shapes */
|
||||
{ "stylelist", "StyleList", MSXML_IGNORE_ELEM }, /* styles */
|
||||
{ "compatibledocument", "WordCompatibility", MSXML_IGNORE_ELEM }, /* word compatibility data */
|
||||
|
||||
/* BODY - Document Contents */
|
||||
{ "body", "Body", MSXML_IGNORE_ELEM }, /* document contents (we could build a document contents summary */
|
||||
|
||||
/* TAIL - Document Attachments */
|
||||
{ "tail", "Tail", MSXML_JSON_ROOT },
|
||||
{ "bindatastorage", "BinaryDataStorage", MSXML_JSON_WRKPTR },
|
||||
{ "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
|
||||
{ "scriptcode", "ScriptCodeStorage", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
|
||||
{ "scriptheader", "ScriptHeader", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
||||
{ "scriptsource", "ScriptSource", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }
|
||||
};
|
||||
static size_t num_hwpml_keys = sizeof(hwpml_keys) / sizeof(struct key_entry);
|
||||
|
||||
int cli_scanhwpml(cli_ctx *ctx)
|
||||
{
|
||||
#if HAVE_LIBXML2
|
||||
struct msxml_cbdata cbdata;
|
||||
xmlTextReaderPtr reader = NULL;
|
||||
int state, ret = CL_SUCCESS;
|
||||
|
||||
cli_dbgmsg("in cli_scanhwpml()\n");
|
||||
|
||||
if (!ctx)
|
||||
return CL_ENULLARG;
|
||||
|
||||
memset(&cbdata, 0, sizeof(cbdata));
|
||||
cbdata.map = *ctx->fmap;
|
||||
|
||||
reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "hwpml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
|
||||
if (!reader) {
|
||||
cli_dbgmsg("cli_scanhwpml: cannot intialize xmlReader\n");
|
||||
|
||||
#if HAVE_JSON
|
||||
ret = cli_json_parse_error(ctx->wrkproperty, "HWPML_ERROR_XML_READER_IO");
|
||||
#endif
|
||||
return ret; // libxml2 failed!
|
||||
}
|
||||
|
||||
ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, 1);
|
||||
|
||||
xmlTextReaderClose(reader);
|
||||
xmlFreeTextReader(reader);
|
||||
return ret;
|
||||
#else
|
||||
UNUSEDPARAM(ctx);
|
||||
cli_dbgmsg("in cli_scanhwpml()\n");
|
||||
cli_dbgmsg("cli_scanhwpml: scanning hwpml documents requires libxml2!\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -51,4 +51,7 @@ int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd);
|
||||
/* HWP 3.0 - UNIQUE FORMAT */
|
||||
int cli_scanhwp3(cli_ctx *ctx);
|
||||
|
||||
/* HWPML - SINGLE XML DOCUMENT (similar to MSXML) */
|
||||
int cli_scanhwpml(cli_ctx *ctx);
|
||||
|
||||
#endif /* __HWP_H__ */
|
||||
|
||||
@@ -79,24 +79,6 @@ static const struct key_entry msxml_keys[] = {
|
||||
};
|
||||
static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);
|
||||
|
||||
enum msxml_state {
|
||||
MSXML_STATE_NORMAL = 0,
|
||||
MSXML_STATE_ENTITY_START_1,
|
||||
MSXML_STATE_ENTITY_START_2,
|
||||
MSXML_STATE_ENTITY_HEX,
|
||||
MSXML_STATE_ENTITY_DEC,
|
||||
MSXML_STATE_ENTITY_CLOSE,
|
||||
MSXML_STATE_ENTITY_NONE
|
||||
};
|
||||
|
||||
struct msxml_cbdata {
|
||||
enum msxml_state state;
|
||||
fmap_t *map;
|
||||
const unsigned char *window;
|
||||
off_t winpos, mappos;
|
||||
size_t winsize;
|
||||
};
|
||||
|
||||
static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)
|
||||
{
|
||||
const unsigned char *new_window = NULL;
|
||||
|
||||
@@ -30,6 +30,25 @@
|
||||
|
||||
#include "others.h"
|
||||
|
||||
enum msxml_state {
|
||||
MSXML_STATE_NORMAL = 0,
|
||||
MSXML_STATE_ENTITY_START_1,
|
||||
MSXML_STATE_ENTITY_START_2,
|
||||
MSXML_STATE_ENTITY_HEX,
|
||||
MSXML_STATE_ENTITY_DEC,
|
||||
MSXML_STATE_ENTITY_CLOSE,
|
||||
MSXML_STATE_ENTITY_NONE
|
||||
};
|
||||
|
||||
struct msxml_cbdata {
|
||||
enum msxml_state state;
|
||||
fmap_t *map;
|
||||
const unsigned char *window;
|
||||
off_t winpos, mappos;
|
||||
size_t winsize;
|
||||
};
|
||||
|
||||
int msxml_read_cb(void *ctx, char *buffer, int len);
|
||||
int cli_scanmsxml(cli_ctx *ctx);
|
||||
|
||||
#endif /* __MSXML_H */
|
||||
|
||||
@@ -2275,6 +2275,9 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
|
||||
case CL_TYPE_XML_XL:
|
||||
ret = cli_scanmsxml(ctx);
|
||||
break;
|
||||
case CL_TYPE_XML_HWP:
|
||||
ret = cli_scanhwpml(ctx);
|
||||
break;
|
||||
case CL_TYPE_RARSFX:
|
||||
if(type != CL_TYPE_RAR && have_rar && SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
|
||||
char *tmpname = NULL;
|
||||
@@ -2681,7 +2684,8 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
|
||||
type == CL_TYPE_OOXML_XL ||
|
||||
type == CL_TYPE_XML_WORD ||
|
||||
type == CL_TYPE_XML_XL ||
|
||||
type == CL_TYPE_HWP3) {
|
||||
type == CL_TYPE_HWP3 ||
|
||||
type == CL_TYPE_XML_HWP) {
|
||||
ctx->properties = json_object_new_object();
|
||||
if (NULL == ctx->properties) {
|
||||
cli_errmsg("magic_scandesc: no memory for json properties object\n");
|
||||
@@ -2851,6 +2855,10 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
|
||||
ret = cli_scanmsxml(ctx);
|
||||
break;
|
||||
|
||||
case CL_TYPE_XML_HWP:
|
||||
ret = cli_scanhwpml(ctx);
|
||||
break;
|
||||
|
||||
case CL_TYPE_XDP:
|
||||
ret = cli_scanxdp(ctx);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user