mirror of
https://github.com/pdfme/pdfme.git
synced 2026-06-18 19:29:25 -04:00
* fix: two unbounded-cache memory leaks in common and schemas
Two module-level Map caches that never evict and store multi-MB strings
as keys, silently leaking for the entire lifetime of any consumer.
1. packages/common/src/expression.ts — parseDataCache
parseData() was memoized via a module-level parseDataCache keyed by
JSON.stringify(data). replacePlaceholders() calls it with a merged
{ ...schemaNameDefaults, ...variables } object where values may be
arbitrary strings from the caller. Whenever inputs contain base64
(image schemas with embedded data URLs, embedded fonts, large text),
the cache key is a multi-MB JSON string that gets pinned permanently;
every unique inputs state adds its own key, never collected. Parsing
is O(fields) and cheap, so removing the cache is strictly a win.
Regression test: packages/common/__tests__/expression.test.ts
'replacePlaceholders memory safety > does not retain call inputs in
a module-level cache' — runs 30 replacePlaceholders() calls with
unique ~500 KB payloads, captures a V8 heap snapshot via
v8.writeHeapSnapshot, aggregates string nodes >= 200 KB and asserts
the total retained size is below 2 MB. Pre-fix: ~30 MB retained
(FAILS). Post-fix: 0 bytes retained (passes).
2. packages/schemas/src/graphics/image.ts — getCacheKey
getCacheKey(schema, input) returned `${schema.type}${input}`, using
the full base64 bytes of the image as part of the cache key. Every
unique image processed by the PDF render path added a permanent Map
entry whose key byte length matched the image itself.
Replaced with a short fingerprint that samples the total length plus
three 16-char regions (first, middle, last). The middle-region
sample is essential: base64 PNGs share a common header and IEND
trailer, so distinct images of the same size would collide if only
first/last regions were sampled. Middle bytes are pixel data and
differ between distinct images with overwhelming probability. Keys
stay under 80 chars regardless of input size.
Regression tests: packages/schemas/__tests__/image.test.ts
- 'does not pin the full base64 input as a cache key' — asserts
key length < 100 chars. Pre-fix: 139 chars for a minimal PNG and
proportionally more for realistic images (FAILS).
- 'distinguishes different images via the fingerprint' — guards
against future over-shortening of the fingerprint that could
reintroduce collisions between distinct images.
Both leaks were originally identified via a V8 heap-snapshot diff taken
across a UI workload (typing + field tabbing) against a consumer app
with image schemas carrying base64 content. Before the fix, the top two
growing allocations by retained size were multi-MB string entries — one
per module-level cache in this PR — together accounting for hundreds of
MB of retained JS heap in a single 3-iteration run. After the fix, both
string entries disappear from the top 25 growing allocations and
aggregate JS heap is net flat / slightly shrinking across iterations.
No public API change. No behavioral change for consumers. Both caches
were module-local implementation details.
* fix(schemas): harden image cache key with FNV-1a hash; fix stale test comments
Addresses Greptile review on #1426:
- Replace 3-region sampling fingerprint in getCacheKey with an FNV-1a
32-bit hash over the full input. The old first-16 slice was a
constant data-URI prefix for any image of the same MIME type,
contributing no entropy; hashing every byte removes that weakness
at the same O(n) cost without retaining any slice as a Map key.
Key format is now `${type}:${len}:${fnv1a-hex}` (~40 chars).
- Rewrite stale comments in image.test.ts that referred to a
padding/mutation scheme the test never performs, and update the
fingerprint-format comment to match the new hash-based key.
- Add trailing newline to expression.test.ts.
All pre-existing and new tests still pass.
105 lines
3.7 KiB
TypeScript
105 lines
3.7 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
||
import { PDFDocument } from '@pdfme/pdf-lib';
|
||
import * as pdfLib from '@pdfme/pdf-lib';
|
||
import { BLANK_PDF, type Schema, type PDFRenderProps } from '@pdfme/common';
|
||
import { image } from '../src/index.js';
|
||
|
||
describe('image plugin memory-safety', () => {
|
||
it('does not pin the full base64 input as a cache key', async () => {
|
||
const pdfDoc = await PDFDocument.create();
|
||
const page = pdfDoc.addPage();
|
||
const _cache = new Map<string | number, unknown>();
|
||
|
||
// A minimal but valid 1×1 PNG data URL is sufficient: we only need
|
||
// embedPng to succeed so the render path reaches the cache; the
|
||
// cache key is derived from `value` regardless of image size.
|
||
const minimalPng =
|
||
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAA1J' +
|
||
'REFUGFdj+P///38ACfsD/QVDRcoAAAAASUVORK5CYII=';
|
||
|
||
const schema = {
|
||
name: 'pic',
|
||
type: 'image',
|
||
content: minimalPng,
|
||
position: { x: 0, y: 0 },
|
||
width: 50,
|
||
height: 50,
|
||
} as unknown as Schema;
|
||
|
||
const arg = {
|
||
value: minimalPng,
|
||
schema,
|
||
basePdf: BLANK_PDF,
|
||
pdfLib,
|
||
pdfDoc,
|
||
page,
|
||
options: {},
|
||
_cache,
|
||
} as unknown as PDFRenderProps<Schema>;
|
||
|
||
await image.pdf(arg);
|
||
|
||
const keys = [...(_cache.keys() as Iterable<string>)];
|
||
// Exactly one cache entry should have been created by the one pdf() call.
|
||
expect(keys.length).toBe(1);
|
||
// Regression guard: the cache key MUST be a fingerprint, not the raw
|
||
// input. Before the fix, the key was `${schema.type}${value}` and its
|
||
// byte length matched the input byte length. A tight bound of 100
|
||
// chars catches any regression back to that behaviour — the current
|
||
// fingerprint format (`${type}:${len}:${fnv1a-hex}`) stays well under
|
||
// 40 even for huge inputs.
|
||
expect(keys[0].length).toBeLessThan(100);
|
||
// Schema type must still be part of the key so different plugins
|
||
// can't collide on the same shared cache Map.
|
||
expect(keys[0].startsWith('image')).toBe(true);
|
||
// Same input hitting the cache a second time must be a cache hit, not
|
||
// a new entry — proves the fingerprint is deterministic.
|
||
await image.pdf(arg);
|
||
expect([...(_cache.keys() as Iterable<string>)].length).toBe(1);
|
||
});
|
||
|
||
it('distinguishes different images via the fingerprint', async () => {
|
||
const pdfDoc = await PDFDocument.create();
|
||
const page = pdfDoc.addPage();
|
||
const _cache = new Map<string | number, unknown>();
|
||
|
||
const pngA =
|
||
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAA1J' +
|
||
'REFUGFdj+P///38ACfsD/QVDRcoAAAAASUVORK5CYII=';
|
||
// Same size/header/trailer shape as pngA but different middle bytes —
|
||
// the fingerprint must still distinguish them. Because the key is a
|
||
// hash over every byte, any differing byte flips the hash with
|
||
// overwhelming probability.
|
||
const pngB =
|
||
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAD8S7TTAAAAAXNSR0IArs4c6QAAAA1J' +
|
||
'REFUGFdj+P///38ACfsD/QVDRcoAAAAASUVORK5CYII=';
|
||
|
||
const base = {
|
||
name: 'pic',
|
||
type: 'image',
|
||
position: { x: 0, y: 0 },
|
||
width: 50,
|
||
height: 50,
|
||
};
|
||
|
||
const argA = {
|
||
value: pngA,
|
||
schema: { ...base, content: pngA } as unknown as Schema,
|
||
basePdf: BLANK_PDF,
|
||
pdfLib,
|
||
pdfDoc,
|
||
page,
|
||
options: {},
|
||
_cache,
|
||
} as unknown as PDFRenderProps<Schema>;
|
||
|
||
const argB = { ...argA, value: pngB, schema: { ...base, content: pngB } as unknown as Schema };
|
||
|
||
await image.pdf(argA);
|
||
await image.pdf(argB);
|
||
|
||
// Two different images must produce two distinct cache entries.
|
||
expect([...(_cache.keys() as Iterable<string>)].length).toBe(2);
|
||
});
|
||
});
|