mirror of
https://github.com/pdfme/pdfme.git
synced 2026-06-02 19:29:57 -04:00
* fix: two unbounded-cache memory leaks in common and schemas
Two module-level Map caches that never evict and store multi-MB strings
as keys, silently leaking for the entire lifetime of any consumer.
1. packages/common/src/expression.ts — parseDataCache
parseData() was memoized via a module-level parseDataCache keyed by
JSON.stringify(data). replacePlaceholders() calls it with a merged
{ ...schemaNameDefaults, ...variables } object where values may be
arbitrary strings from the caller. Whenever inputs contain base64
(image schemas with embedded data URLs, embedded fonts, large text),
the cache key is a multi-MB JSON string that gets pinned permanently;
every unique inputs state adds its own key, never collected. Parsing
is O(fields) and cheap, so removing the cache is strictly a win.
Regression test: packages/common/__tests__/expression.test.ts
'replacePlaceholders memory safety > does not retain call inputs in
a module-level cache' — runs 30 replacePlaceholders() calls with
unique ~500 KB payloads, captures a V8 heap snapshot via
v8.writeHeapSnapshot, aggregates string nodes >= 200 KB and asserts
the total retained size is below 2 MB. Pre-fix: ~30 MB retained
(FAILS). Post-fix: 0 bytes retained (passes).
2. packages/schemas/src/graphics/image.ts — getCacheKey
getCacheKey(schema, input) returned `${schema.type}${input}`, using
the full base64 bytes of the image as part of the cache key. Every
unique image processed by the PDF render path added a permanent Map
entry whose key byte length matched the image itself.
Replaced with a short fingerprint that samples the total length plus
three 16-char regions (first, middle, last). The middle-region
sample is essential: base64 PNGs share a common header and IEND
trailer, so distinct images of the same size would collide if only
first/last regions were sampled. Middle bytes are pixel data and
differ between distinct images with overwhelming probability. Keys
stay under 80 chars regardless of input size.
Regression tests: packages/schemas/__tests__/image.test.ts
- 'does not pin the full base64 input as a cache key' — asserts
key length < 100 chars. Pre-fix: 139 chars for a minimal PNG and
proportionally more for realistic images (FAILS).
- 'distinguishes different images via the fingerprint' — guards
against future over-shortening of the fingerprint that could
reintroduce collisions between distinct images.
Both leaks were originally identified via a V8 heap-snapshot diff taken
across a UI workload (typing + field tabbing) against a consumer app
with image schemas carrying base64 content. Before the fix, the top two
growing allocations by retained size were multi-MB string entries — one
per module-level cache in this PR — together accounting for hundreds of
MB of retained JS heap in a single 3-iteration run. After the fix, both
string entries disappear from the top 25 growing allocations and
aggregate JS heap is net flat / slightly shrinking across iterations.
No public API change. No behavioral change for consumers. Both caches
were module-local implementation details.
* fix(schemas): harden image cache key with FNV-1a hash; fix stale test comments
Addresses Greptile review on #1426:
- Replace 3-region sampling fingerprint in getCacheKey with an FNV-1a
32-bit hash over the full input. The old first-16 slice was a
constant data-URI prefix for any image of the same MIME type,
contributing no entropy; hashing every byte removes that weakness
at the same O(n) cost without retaining any slice as a Map key.
Key format is now `${type}:${len}:${fnv1a-hex}` (~40 chars).
- Rewrite stale comments in image.test.ts that referred to a
padding/mutation scheme the test never performs, and update the
fingerprint-format comment to match the new hash-based key.
- Add trailing newline to expression.test.ts.
All pre-existing and new tests still pass.
471 lines
14 KiB
TypeScript
471 lines
14 KiB
TypeScript
import * as acorn from 'acorn';
|
|
import type { Node as AcornNode, Identifier, Property } from 'estree';
|
|
import type { SchemaPageArray } from './types.js';
|
|
|
|
const expressionCache = new Map<string, (context: Record<string, unknown>) => unknown>();
|
|
|
|
/**
|
|
* Parse each string value in `data` as JSON, falling back to the original
|
|
* string on failure. Previously memoized via a module-level `parseDataCache`
|
|
* Map keyed by `JSON.stringify(data)`, but that was a severe memory leak:
|
|
* - Cache was never evicted.
|
|
* - Key was a multi-MB string whenever `data` included schema.content with
|
|
* base64 (e.g. image schemas) or inputs containing base64 values. Every
|
|
* unique inputs state pinned its own multi-MB key for the app lifetime.
|
|
* Parsing is O(fields) and cheap; removing the cache is strictly a win.
|
|
*/
|
|
const parseData = (data: Record<string, unknown>): Record<string, unknown> => {
|
|
return Object.fromEntries(
|
|
Object.entries(data).map(([key, value]) => {
|
|
if (typeof value === 'string') {
|
|
try {
|
|
const parsedValue = JSON.parse(value) as unknown;
|
|
return [key, parsedValue];
|
|
} catch {
|
|
return [key, value];
|
|
}
|
|
}
|
|
return [key, value];
|
|
}),
|
|
);
|
|
};
|
|
|
|
const padZero = (num: number): string => String(num).padStart(2, '0');
|
|
|
|
const formatDate = (date: Date): string =>
|
|
`${date.getFullYear()}/${padZero(date.getMonth() + 1)}/${padZero(date.getDate())}`;
|
|
|
|
const formatDateTime = (date: Date): string =>
|
|
`${formatDate(date)} ${padZero(date.getHours())}:${padZero(date.getMinutes())}`;
|
|
|
|
// Safe assign function that prevents prototype pollution
|
|
const safeAssign = (
|
|
target: Record<string, unknown>,
|
|
...sources: Array<Record<string, unknown> | null | undefined>
|
|
): Record<string, unknown> => {
|
|
if (target == null) {
|
|
throw new TypeError('Cannot convert undefined or null to object');
|
|
}
|
|
|
|
const to = { ...target };
|
|
|
|
for (const source of sources) {
|
|
if (source != null) {
|
|
for (const key in source) {
|
|
// Skip prototype pollution keys
|
|
if (key === '__proto__' || key === 'constructor' || key === 'prototype') {
|
|
continue;
|
|
}
|
|
// Only copy own properties
|
|
if (Object.prototype.hasOwnProperty.call(source, key)) {
|
|
to[key] = source[key];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return to;
|
|
};
|
|
|
|
// Create a safe copy of Object with dangerous methods excluded
|
|
const safeObject = {
|
|
keys: Object.keys,
|
|
values: Object.values,
|
|
entries: Object.entries,
|
|
fromEntries: Object.fromEntries,
|
|
is: Object.is,
|
|
hasOwnProperty: Object.hasOwnProperty,
|
|
assign: safeAssign, // Safe version of Object.assign
|
|
// The following methods are excluded due to security concerns:
|
|
// - Side effects: create, freeze, seal (can still be used for attacks)
|
|
// - Prototype access: getOwnPropertyDescriptor, getPrototypeOf, setPrototypeOf,
|
|
// defineProperty, defineProperties, getOwnPropertyNames, getOwnPropertySymbols
|
|
};
|
|
|
|
const allowedGlobals: Record<string, unknown> = {
|
|
Math,
|
|
String,
|
|
Number,
|
|
Boolean,
|
|
Array,
|
|
Object: safeObject,
|
|
Date,
|
|
JSON,
|
|
isNaN,
|
|
parseFloat,
|
|
parseInt,
|
|
decodeURI,
|
|
decodeURIComponent,
|
|
encodeURI,
|
|
encodeURIComponent,
|
|
};
|
|
|
|
const validateAST = (node: AcornNode): void => {
|
|
switch (node.type) {
|
|
case 'Literal':
|
|
case 'Identifier':
|
|
break;
|
|
case 'BinaryExpression':
|
|
case 'LogicalExpression': {
|
|
const binaryNode = node;
|
|
validateAST(binaryNode.left);
|
|
validateAST(binaryNode.right);
|
|
break;
|
|
}
|
|
case 'UnaryExpression': {
|
|
const unaryNode = node;
|
|
validateAST(unaryNode.argument);
|
|
break;
|
|
}
|
|
case 'ConditionalExpression': {
|
|
const condNode = node;
|
|
validateAST(condNode.test);
|
|
validateAST(condNode.consequent);
|
|
validateAST(condNode.alternate);
|
|
break;
|
|
}
|
|
case 'MemberExpression': {
|
|
const memberNode = node;
|
|
validateAST(memberNode.object);
|
|
if (memberNode.computed) {
|
|
validateAST(memberNode.property);
|
|
} else {
|
|
const propName = (memberNode.property as Identifier).name;
|
|
if (['constructor', '__proto__', 'prototype'].includes(propName)) {
|
|
throw new Error('Access to prohibited property');
|
|
}
|
|
// Block prototype pollution methods
|
|
if (
|
|
['__defineGetter__', '__defineSetter__', '__lookupGetter__', '__lookupSetter__'].includes(
|
|
propName,
|
|
)
|
|
) {
|
|
throw new Error(`Access to prohibited method: ${propName}`);
|
|
}
|
|
const prohibitedMethods = ['toLocaleString', 'valueOf'];
|
|
if (typeof propName === 'string' && prohibitedMethods.includes(propName)) {
|
|
throw new Error(`Access to prohibited method: ${propName}`);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 'CallExpression': {
|
|
const callNode = node;
|
|
validateAST(callNode.callee);
|
|
callNode.arguments.forEach(validateAST);
|
|
break;
|
|
}
|
|
case 'ArrayExpression': {
|
|
const arrayNode = node;
|
|
arrayNode.elements.forEach((elem) => {
|
|
if (elem) validateAST(elem);
|
|
});
|
|
break;
|
|
}
|
|
case 'ObjectExpression': {
|
|
const objectNode = node;
|
|
objectNode.properties.forEach((prop) => {
|
|
const propNode = prop as Property;
|
|
validateAST(propNode.key);
|
|
validateAST(propNode.value);
|
|
});
|
|
break;
|
|
}
|
|
case 'ArrowFunctionExpression': {
|
|
const arrowFuncNode = node;
|
|
arrowFuncNode.params.forEach((param) => {
|
|
if (param.type !== 'Identifier') {
|
|
throw new Error('Only identifier parameters are supported in arrow functions');
|
|
}
|
|
validateAST(param);
|
|
});
|
|
validateAST(arrowFuncNode.body);
|
|
break;
|
|
}
|
|
default:
|
|
throw new Error(`Unsupported syntax in placeholder: ${node.type}`);
|
|
}
|
|
};
|
|
|
|
const evaluateAST = (node: AcornNode, context: Record<string, unknown>): unknown => {
|
|
switch (node.type) {
|
|
case 'Literal': {
|
|
const literalNode = node;
|
|
return literalNode.value;
|
|
}
|
|
case 'Identifier': {
|
|
const idNode = node;
|
|
if (Object.prototype.hasOwnProperty.call(context, idNode.name)) {
|
|
return context[idNode.name];
|
|
} else if (Object.prototype.hasOwnProperty.call(allowedGlobals, idNode.name)) {
|
|
return allowedGlobals[idNode.name];
|
|
} else {
|
|
throw new Error(`Undefined variable: ${idNode.name}`);
|
|
}
|
|
}
|
|
case 'BinaryExpression': {
|
|
const binaryNode = node;
|
|
const left = evaluateAST(binaryNode.left, context) as number;
|
|
const right = evaluateAST(binaryNode.right, context) as number;
|
|
switch (binaryNode.operator) {
|
|
case '+':
|
|
return left + right;
|
|
case '-':
|
|
return left - right;
|
|
case '*':
|
|
return left * right;
|
|
case '/':
|
|
return left / right;
|
|
case '%':
|
|
return left % right;
|
|
case '**':
|
|
return left ** right;
|
|
case '==':
|
|
return left == right;
|
|
case '!=':
|
|
return left != right;
|
|
case '===':
|
|
return left === right;
|
|
case '!==':
|
|
return left !== right;
|
|
case '<':
|
|
return left < right;
|
|
case '>':
|
|
return left > right;
|
|
case '<=':
|
|
return left <= right;
|
|
case '>=':
|
|
return left >= right;
|
|
default:
|
|
throw new Error(`Unsupported operator: ${binaryNode.operator}`);
|
|
}
|
|
}
|
|
case 'LogicalExpression': {
|
|
const logicalNode = node;
|
|
const leftLogical = evaluateAST(logicalNode.left, context);
|
|
const rightLogical = evaluateAST(logicalNode.right, context);
|
|
switch (logicalNode.operator) {
|
|
case '&&':
|
|
return leftLogical && rightLogical;
|
|
case '||':
|
|
return leftLogical || rightLogical;
|
|
default:
|
|
throw new Error(`Unsupported operator: ${logicalNode.operator}`);
|
|
}
|
|
}
|
|
case 'UnaryExpression': {
|
|
const unaryNode = node;
|
|
const arg = evaluateAST(unaryNode.argument, context) as number;
|
|
switch (unaryNode.operator) {
|
|
case '+':
|
|
return +arg;
|
|
case '-':
|
|
return -arg;
|
|
case '!':
|
|
return !arg;
|
|
default:
|
|
throw new Error(`Unsupported operator: ${unaryNode.operator}`);
|
|
}
|
|
}
|
|
case 'ConditionalExpression': {
|
|
const condNode = node;
|
|
const test = evaluateAST(condNode.test, context);
|
|
return test
|
|
? evaluateAST(condNode.consequent, context)
|
|
: evaluateAST(condNode.alternate, context);
|
|
}
|
|
case 'MemberExpression': {
|
|
const memberNode = node;
|
|
const obj = evaluateAST(memberNode.object, context) as Record<string, unknown>;
|
|
let prop: string | number;
|
|
if (memberNode.computed) {
|
|
prop = evaluateAST(memberNode.property, context) as string | number;
|
|
} else {
|
|
prop = (memberNode.property as Identifier).name;
|
|
}
|
|
if (typeof prop === 'string' || typeof prop === 'number') {
|
|
if (typeof prop === 'string' && ['constructor', '__proto__', 'prototype'].includes(prop)) {
|
|
throw new Error('Access to prohibited property');
|
|
}
|
|
// Block prototype pollution methods
|
|
if (
|
|
typeof prop === 'string' &&
|
|
['__defineGetter__', '__defineSetter__', '__lookupGetter__', '__lookupSetter__'].includes(
|
|
prop,
|
|
)
|
|
) {
|
|
throw new Error(`Access to prohibited method: ${prop}`);
|
|
}
|
|
return obj[prop];
|
|
} else {
|
|
throw new Error('Invalid property access');
|
|
}
|
|
}
|
|
case 'CallExpression': {
|
|
const callNode = node;
|
|
const callee = evaluateAST(callNode.callee, context);
|
|
const args = callNode.arguments.map((argNode) => evaluateAST(argNode, context));
|
|
if (typeof callee === 'function') {
|
|
if (callNode.callee.type === 'MemberExpression') {
|
|
const memberExpr = callNode.callee;
|
|
const obj = evaluateAST(memberExpr.object, context);
|
|
if (
|
|
obj !== null &&
|
|
(typeof obj === 'object' ||
|
|
typeof obj === 'number' ||
|
|
typeof obj === 'string' ||
|
|
typeof obj === 'boolean')
|
|
) {
|
|
return callee.call(obj, ...args);
|
|
} else {
|
|
throw new Error('Invalid object in member function call');
|
|
}
|
|
} else {
|
|
// Use a type assertion to tell TypeScript this is a safe function call
|
|
return (callee as (...args: unknown[]) => unknown)(...args);
|
|
}
|
|
} else {
|
|
throw new Error('Attempted to call a non-function');
|
|
}
|
|
}
|
|
case 'ArrowFunctionExpression': {
|
|
const arrowFuncNode = node;
|
|
const params = arrowFuncNode.params.map((param) => (param as Identifier).name);
|
|
const body = arrowFuncNode.body;
|
|
|
|
return (...args: unknown[]) => {
|
|
const newContext = { ...context };
|
|
params.forEach((param, index) => {
|
|
newContext[param] = args[index];
|
|
});
|
|
return evaluateAST(body, newContext);
|
|
};
|
|
}
|
|
case 'ArrayExpression': {
|
|
const arrayNode = node;
|
|
return arrayNode.elements.map((elem) => (elem ? evaluateAST(elem, context) : null));
|
|
}
|
|
case 'ObjectExpression': {
|
|
const objectNode = node;
|
|
const objResult: Record<string, unknown> = {};
|
|
objectNode.properties.forEach((prop) => {
|
|
const propNode = prop as Property;
|
|
let key: string;
|
|
if (propNode.key.type === 'Identifier') {
|
|
key = propNode.key.name;
|
|
} else {
|
|
const evaluatedKey = evaluateAST(propNode.key, context);
|
|
if (typeof evaluatedKey !== 'string' && typeof evaluatedKey !== 'number') {
|
|
throw new Error('Object property keys must be strings or numbers');
|
|
}
|
|
key = String(evaluatedKey);
|
|
}
|
|
const value = evaluateAST(propNode.value, context);
|
|
objResult[key] = value;
|
|
});
|
|
return objResult;
|
|
}
|
|
default:
|
|
throw new Error(`Unsupported syntax in placeholder: ${node.type}`);
|
|
}
|
|
};
|
|
|
|
const evaluatePlaceholders = (arg: {
|
|
content: string;
|
|
context: Record<string, unknown>;
|
|
}): string => {
|
|
const { content, context } = arg;
|
|
|
|
let resultContent = '';
|
|
let index = 0;
|
|
|
|
while (index < content.length) {
|
|
const startIndex = content.indexOf('{', index);
|
|
if (startIndex === -1) {
|
|
resultContent += content.slice(index);
|
|
break;
|
|
}
|
|
|
|
resultContent += content.slice(index, startIndex);
|
|
let braceCount = 1;
|
|
let endIndex = startIndex + 1;
|
|
|
|
while (endIndex < content.length && braceCount > 0) {
|
|
if (content[endIndex] === '{') {
|
|
braceCount++;
|
|
} else if (content[endIndex] === '}') {
|
|
braceCount--;
|
|
}
|
|
endIndex++;
|
|
}
|
|
|
|
if (braceCount === 0) {
|
|
const code = content.slice(startIndex + 1, endIndex - 1).trim();
|
|
|
|
if (expressionCache.has(code)) {
|
|
const evalFunc = expressionCache.get(code)!;
|
|
try {
|
|
const value = evalFunc(context);
|
|
resultContent += String(value);
|
|
} catch {
|
|
resultContent += content.slice(startIndex, endIndex);
|
|
}
|
|
} else {
|
|
try {
|
|
const ast = acorn.parseExpressionAt(code, 0, { ecmaVersion: 'latest' }) as AcornNode;
|
|
validateAST(ast);
|
|
const evalFunc = (ctx: Record<string, unknown>) => evaluateAST(ast, ctx);
|
|
expressionCache.set(code, evalFunc);
|
|
const value = evalFunc(context);
|
|
resultContent += String(value);
|
|
} catch {
|
|
resultContent += content.slice(startIndex, endIndex);
|
|
}
|
|
}
|
|
|
|
index = endIndex;
|
|
} else {
|
|
throw new Error('Invalid placeholder');
|
|
}
|
|
}
|
|
|
|
return resultContent;
|
|
};
|
|
|
|
export const replacePlaceholders = (arg: {
|
|
content: string;
|
|
variables: Record<string, unknown>;
|
|
schemas: SchemaPageArray;
|
|
}): string => {
|
|
const { content, variables, schemas } = arg;
|
|
if (!content || typeof content !== 'string' || !content.includes('{') || !content.includes('}')) {
|
|
return content;
|
|
}
|
|
|
|
const date = new Date();
|
|
const formattedDate = formatDate(date);
|
|
const formattedDateTime = formatDateTime(date);
|
|
|
|
const data = {
|
|
...Object.fromEntries(
|
|
schemas.flat().map((schema) => [schema.name, schema.readOnly ? schema.content || '' : '']),
|
|
),
|
|
...variables,
|
|
};
|
|
const parsedInput = parseData(data);
|
|
|
|
const context: Record<string, unknown> = {
|
|
date: formattedDate,
|
|
dateTime: formattedDateTime,
|
|
...parsedInput,
|
|
};
|
|
|
|
Object.entries(context).forEach(([key, value]) => {
|
|
if (typeof value === 'string' && value.includes('{') && value.includes('}')) {
|
|
context[key] = evaluatePlaceholders({ content: value, context });
|
|
}
|
|
});
|
|
|
|
return evaluatePlaceholders({ content, context });
|
|
};
|