diff --git a/backend/go/localvqe/Makefile b/backend/go/localvqe/Makefile index b607288fc..7b66e9371 100644 --- a/backend/go/localvqe/Makefile +++ b/backend/go/localvqe/Makefile @@ -9,7 +9,7 @@ JOBS?=$(shell nproc --ignore=1) # LocalVQE upstream version pin. Bump to a specific commit when picking up # a new release; `main` works for development but is not reproducible. LOCALVQE_REPO?=https://github.com/localai-org/LocalVQE -LOCALVQE_VERSION?=72bfb4c6 +LOCALVQE_VERSION?=b0f0378a450e87c871b85689554801601ca56d98 # LocalVQE handles CPU feature selection internally (it ships the multiple # libggml-cpu-*.so variants and its loader picks the best one at runtime @@ -27,7 +27,8 @@ endif # LocalVQE upstream supports CPU + Vulkan only. Other BUILD_TYPE values # fall through to the default CPU build — Vulkan is already as fast as the -# specialised GPU paths would be on this 1.3 M-parameter model. +# specialised GPU paths would be on these small (1.3 M–4.8 M parameter) +# models. ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=ON -DLOCALVQE_VULKAN=ON else ifeq ($(OS),Darwin) diff --git a/backend/go/localvqe/golocalvqe.go b/backend/go/localvqe/golocalvqe.go index b0575c3be..5d7c862d5 100644 --- a/backend/go/localvqe/golocalvqe.go +++ b/backend/go/localvqe/golocalvqe.go @@ -3,7 +3,6 @@ package main import ( "encoding/binary" "fmt" - "io" "os" "path/filepath" "runtime" @@ -11,6 +10,7 @@ import ( "strings" "unsafe" + "github.com/go-audio/wav" "github.com/mudler/LocalAI/pkg/grpc/base" pb "github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/mudler/xlog" @@ -46,24 +46,24 @@ const ( // through the options builder (CppOptionsNew + setters + CppNewWithOptions) // — the bare localvqe_new path doesn't expose backend / device selection. var ( - CppOptionsNew func() uintptr - CppOptionsFree func(opts uintptr) - CppOptionsSetModelPath func(opts uintptr, modelPath string) int32 - CppOptionsSetBackend func(opts uintptr, backend string) int32 - CppOptionsSetDevice func(opts uintptr, device int32) int32 - CppNewWithOptions func(opts uintptr) uintptr - CppFree func(ctx uintptr) - CppProcessF32 func(ctx uintptr, mic, ref uintptr, nSamples int32, out uintptr) int32 - CppProcessS16 func(ctx uintptr, mic, ref uintptr, nSamples int32, out uintptr) int32 - CppProcessFrameF32 func(ctx uintptr, mic, ref uintptr, hopSamples int32, out uintptr) int32 - CppProcessFrameS16 func(ctx uintptr, mic, ref uintptr, hopSamples int32, out uintptr) int32 - CppReset func(ctx uintptr) - CppLastError func(ctx uintptr) string - CppSampleRate func(ctx uintptr) int32 - CppHopLength func(ctx uintptr) int32 - CppFFTSize func(ctx uintptr) int32 - CppSetNoiseGate func(ctx uintptr, enabled int32, thresholdDBFS float32) int32 - CppGetNoiseGate func(ctx uintptr, enabledOut, thresholdDBFSOut uintptr) int32 + CppOptionsNew func() uintptr + CppOptionsFree func(opts uintptr) + CppOptionsSetModelPath func(opts uintptr, modelPath string) int32 + CppOptionsSetBackend func(opts uintptr, backend string) int32 + CppOptionsSetDevice func(opts uintptr, device int32) int32 + CppNewWithOptions func(opts uintptr) uintptr + CppFree func(ctx uintptr) + CppProcessF32 func(ctx uintptr, mic, ref uintptr, nSamples int32, out uintptr) int32 + CppProcessS16 func(ctx uintptr, mic, ref uintptr, nSamples int32, out uintptr) int32 + CppProcessFrameF32 func(ctx uintptr, mic, ref uintptr, hopSamples int32, out uintptr) int32 + CppProcessFrameS16 func(ctx uintptr, mic, ref uintptr, hopSamples int32, out uintptr) int32 + CppReset func(ctx uintptr) + CppLastError func(ctx uintptr) string + CppSampleRate func(ctx uintptr) int32 + CppHopLength func(ctx uintptr) int32 + CppFFTSize func(ctx uintptr) int32 + CppSetNoiseGate func(ctx uintptr, enabled int32, thresholdDBFS float32) int32 + CppGetNoiseGate func(ctx uintptr, enabledOut, thresholdDBFSOut uintptr) int32 ) // LocalVQE speaks gRPC against LocalVQE's flat C ABI. The streaming @@ -490,11 +490,14 @@ func (v *LocalVQE) applyStreamConfig(cfg *pb.AudioTransformStreamConfig) error { // ---- WAV I/O ---------------------------------------------------------- // -// Minimal mono PCM WAV reader/writer. Only handles the subset LocalVQE -// cares about (mono, 16-bit signed, no extensible chunks). For broader -// audio support the HTTP layer's `audio.NormalizeAudioFile` already -// converts arbitrary input to a canonical WAV before we see it; this -// reader just decodes the canonical shape. +// Reader/writer for the mono 16-bit PCM shape LocalVQE works with. Decoding +// goes through the shared go-audio/wav decoder (as the whisper and parakeet +// backends do) so RIFF chunk walking is handled robustly — an 18/40-byte +// extensible `fmt ` chunk, or JUNK/bext/LIST metadata before or after `data` +// (e.g. ffmpeg's trailing "Lavf" tag), is skipped rather than spliced into +// the PCM stream as an audible click. The HTTP layer normalises arbitrary +// input to WAV before we see it, but that WAV is ffmpeg output and is not +// guaranteed to be the canonical 44-byte layout. func readMonoWAVf32(path string) ([]float32, int, error) { f, err := os.Open(path) @@ -502,35 +505,26 @@ func readMonoWAVf32(path string) ([]float32, int, error) { return nil, 0, err } defer func() { _ = f.Close() }() - header := make([]byte, 44) - if _, err := io.ReadFull(f, header); err != nil { - return nil, 0, err + + buf, err := wav.NewDecoder(f).FullPCMBuffer() + if err != nil { + return nil, 0, fmt.Errorf("decode WAV: %w", err) } - if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" { + if buf == nil || buf.Format == nil { return nil, 0, fmt.Errorf("not a WAV file") } - channels := binary.LittleEndian.Uint16(header[22:24]) - sampleRate := binary.LittleEndian.Uint32(header[24:28]) - bitsPerSample := binary.LittleEndian.Uint16(header[34:36]) - - if channels != 1 { - return nil, 0, fmt.Errorf("only mono WAV supported (got %d channels)", channels) + if buf.Format.NumChannels != 1 { + return nil, 0, fmt.Errorf("only mono WAV supported (got %d channels)", buf.Format.NumChannels) } - if bitsPerSample != 16 { - return nil, 0, fmt.Errorf("only 16-bit PCM supported (got %d bits)", bitsPerSample) + if buf.SourceBitDepth != 16 { + return nil, 0, fmt.Errorf("only 16-bit PCM supported (got %d bits)", buf.SourceBitDepth) } - - rest, err := io.ReadAll(f) - if err != nil { - return nil, 0, err + if len(buf.Data) == 0 { + return nil, 0, fmt.Errorf("WAV has no audio data") } - n := len(rest) / 2 - out := make([]float32, n) - for i := 0; i < n; i++ { - s := int16(binary.LittleEndian.Uint16(rest[i*2 : i*2+2])) - out[i] = float32(s) / 32768.0 - } - return out, int(sampleRate), nil + // AsFloat32Buffer normalises by 2^(bitDepth-1) == /32768 for 16-bit, + // matching the model's expected [-1, 1) input range. + return buf.AsFloat32Buffer().Data, buf.Format.SampleRate, nil } func writeMonoWAVf32(path string, samples []float32, sampleRate int) error { @@ -546,13 +540,13 @@ func writeMonoWAVf32(path string, samples []float32, sampleRate int) error { binary.LittleEndian.PutUint32(header[4:8], 36+dataLen) copy(header[8:12], []byte("WAVE")) copy(header[12:16], []byte("fmt ")) - binary.LittleEndian.PutUint32(header[16:20], 16) // fmt chunk size - binary.LittleEndian.PutUint16(header[20:22], 1) // PCM - binary.LittleEndian.PutUint16(header[22:24], 1) // mono + binary.LittleEndian.PutUint32(header[16:20], 16) // fmt chunk size + binary.LittleEndian.PutUint16(header[20:22], 1) // PCM + binary.LittleEndian.PutUint16(header[22:24], 1) // mono binary.LittleEndian.PutUint32(header[24:28], uint32(sampleRate)) binary.LittleEndian.PutUint32(header[28:32], uint32(sampleRate*2)) // byte rate - binary.LittleEndian.PutUint16(header[32:34], 2) // block align - binary.LittleEndian.PutUint16(header[34:36], 16) // bits per sample + binary.LittleEndian.PutUint16(header[32:34], 2) // block align + binary.LittleEndian.PutUint16(header[34:36], 16) // bits per sample copy(header[36:40], []byte("data")) binary.LittleEndian.PutUint32(header[40:44], dataLen) if _, err := f.Write(header); err != nil { diff --git a/backend/go/localvqe/localvqe_test.go b/backend/go/localvqe/localvqe_test.go index 5053dfeb1..60541441e 100644 --- a/backend/go/localvqe/localvqe_test.go +++ b/backend/go/localvqe/localvqe_test.go @@ -1,7 +1,9 @@ package main import ( + "encoding/binary" "os" + "path/filepath" "testing" pb "github.com/mudler/LocalAI/pkg/grpc/proto" @@ -92,6 +94,147 @@ var _ = Describe("LocalVQE-cpp", func() { }) }) + Context("readMonoWAVf32 chunk parsing", func() { + // chunk builds a word-aligned RIFF sub-chunk (id + size + body + pad). + chunk := func(id string, body []byte) []byte { + out := append([]byte(id), 0, 0, 0, 0) + binary.LittleEndian.PutUint32(out[4:8], uint32(len(body))) + out = append(out, body...) + if len(body)&1 == 1 { + out = append(out, 0) // pad byte for odd-sized chunks + } + return out + } + // fmtBody returns a PCM `fmt ` chunk body. extra bytes simulate the + // 18/40-byte extensible form (cbSize + extension). + fmtBody := func(channels, bits uint16, rate uint32, extra int) []byte { + b := make([]byte, 16+extra) + binary.LittleEndian.PutUint16(b[0:2], 1) // PCM + binary.LittleEndian.PutUint16(b[2:4], channels) + binary.LittleEndian.PutUint32(b[4:8], rate) + binary.LittleEndian.PutUint32(b[8:12], rate*uint32(channels)*uint32(bits)/8) + binary.LittleEndian.PutUint16(b[12:14], channels*bits/8) + binary.LittleEndian.PutUint16(b[14:16], bits) + if extra >= 2 { + binary.LittleEndian.PutUint16(b[16:18], uint16(extra-2)) // cbSize + } + return b + } + // pcm encodes int16 samples little-endian. + pcm := func(samples ...int16) []byte { + b := make([]byte, len(samples)*2) + for i, s := range samples { + binary.LittleEndian.PutUint16(b[i*2:i*2+2], uint16(s)) + } + return b + } + riff := func(chunks ...[]byte) []byte { + body := []byte("WAVE") + for _, c := range chunks { + body = append(body, c...) + } + out := append([]byte("RIFF"), 0, 0, 0, 0) + binary.LittleEndian.PutUint32(out[4:8], uint32(len(body))) + return append(out, body...) + } + writeWAV := func(b []byte) string { + p := filepath.Join(GinkgoT().TempDir(), "in.wav") + Expect(os.WriteFile(p, b, 0o600)).To(Succeed()) + return p + } + // A canonical sample run with distinct values so any off-by-one / + // misalignment shows up as wrong numbers, not just wrong length. + samples := []int16{1000, -2000, 3000, -4000, 5000, -6000} + expectSamples := func(got []float32) { + Expect(got).To(HaveLen(len(samples))) + for i, s := range samples { + Expect(got[i]).To(BeNumerically("~", float32(s)/32768.0, 1e-6)) + } + } + + It("reads a canonical 44-byte WAV", func() { + p := writeWAV(riff(chunk("fmt ", fmtBody(1, 16, 16000, 0)), chunk("data", pcm(samples...)))) + out, sr, err := readMonoWAVf32(p) + Expect(err).ToNot(HaveOccurred()) + Expect(sr).To(Equal(16000)) + expectSamples(out) + }) + + It("ignores a LIST/JUNK chunk placed before data (no leading-impulse splice)", func() { + p := writeWAV(riff( + chunk("fmt ", fmtBody(1, 16, 16000, 0)), + chunk("JUNK", []byte("padding-bytes-here!")), // odd length → exercises pad + chunk("LIST", []byte("INFOISFTLavf60.0")), + chunk("data", pcm(samples...)), + )) + out, sr, err := readMonoWAVf32(p) + Expect(err).ToNot(HaveOccurred()) + Expect(sr).To(Equal(16000)) + expectSamples(out) // not corrupted by the preceding chunks + }) + + It("honours the data chunk size and drops a trailing metadata chunk", func() { + p := writeWAV(riff( + chunk("fmt ", fmtBody(1, 16, 16000, 0)), + chunk("data", pcm(samples...)), + chunk("LIST", []byte("INFOISFTLavf60.16.100")), // ffmpeg trailer tag + )) + out, _, err := readMonoWAVf32(p) + Expect(err).ToNot(HaveOccurred()) + expectSamples(out) // trailing LIST bytes not decoded as PCM + }) + + It("handles the 18-byte extensible fmt chunk", func() { + p := writeWAV(riff(chunk("fmt ", fmtBody(1, 16, 16000, 2)), chunk("data", pcm(samples...)))) + out, sr, err := readMonoWAVf32(p) + Expect(err).ToNot(HaveOccurred()) + Expect(sr).To(Equal(16000)) + expectSamples(out) + }) + + It("rejects non-mono input", func() { + p := writeWAV(riff(chunk("fmt ", fmtBody(2, 16, 16000, 0)), chunk("data", pcm(samples...)))) + _, _, err := readMonoWAVf32(p) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("mono")) + }) + + It("rejects non-16-bit input", func() { + p := writeWAV(riff(chunk("fmt ", fmtBody(1, 8, 16000, 0)), chunk("data", pcm(samples...)))) + _, _, err := readMonoWAVf32(p) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("16-bit")) + }) + + It("rejects a non-WAV file", func() { + p := writeWAV([]byte("not a riff file at all")) + _, _, err := readMonoWAVf32(p) + Expect(err).To(HaveOccurred()) + }) + + It("errors when the data chunk is missing", func() { + // fmt but no data: the decoder must fail rather than return an + // empty (or garbage) sample slice. The exact message is the + // decoder's, so just assert it errors. + p := writeWAV(riff(chunk("fmt ", fmtBody(1, 16, 16000, 0)))) + _, _, err := readMonoWAVf32(p) + Expect(err).To(HaveOccurred()) + }) + + It("round-trips through writeMonoWAVf32", func() { + p := filepath.Join(GinkgoT().TempDir(), "rt.wav") + in := []float32{0.1, -0.2, 0.3, -0.4} + Expect(writeMonoWAVf32(p, in, 16000)).To(Succeed()) + out, sr, err := readMonoWAVf32(p) + Expect(err).ToNot(HaveOccurred()) + Expect(sr).To(Equal(16000)) + Expect(out).To(HaveLen(len(in))) + for i := range in { + Expect(out[i]).To(BeNumerically("~", in[i], 1e-4)) + } + }) + }) + Context("model-gated integration (LOCALVQE_MODEL_PATH)", func() { It("load + sample rate + hop + fft", func() { path := modelPathOrSkip() diff --git a/core/http/middleware/security_headers.go b/core/http/middleware/security_headers.go index 9a3ae8d48..969a335a0 100644 --- a/core/http/middleware/security_headers.go +++ b/core/http/middleware/security_headers.go @@ -17,7 +17,10 @@ func SecurityHeaders() echo.MiddlewareFunc { "img-src 'self' data: blob: https:; " + "media-src 'self' data: blob:; " + "font-src 'self' data:; " + - "connect-src 'self' ws: wss: https:; " + + // blob: lets the waveform renderer XHR/fetch a freshly-created object + // URL (e.g. an uploaded clip before it has a server URL). XHR/fetch of + // blob: falls under connect-src, not media-src. + "connect-src 'self' ws: wss: https: blob:; " + "frame-src 'self' blob:; " + "worker-src 'self' blob:; " + "object-src 'none'; " + diff --git a/core/http/middleware/security_headers_test.go b/core/http/middleware/security_headers_test.go index af43822ea..76430b093 100644 --- a/core/http/middleware/security_headers_test.go +++ b/core/http/middleware/security_headers_test.go @@ -32,6 +32,9 @@ var _ = Describe("SecurityHeaders", func() { Expect(csp).To(ContainSubstring("frame-ancestors 'self'")) Expect(csp).To(ContainSubstring("object-src 'none'")) Expect(csp).To(ContainSubstring("base-uri 'self'")) + // blob: must be in connect-src so the waveform renderer can XHR/fetch + // a freshly-created object URL (uploaded/enhanced clip). + Expect(csp).To(ContainSubstring("connect-src 'self' ws: wss: https: blob:")) }) It("sets X-Content-Type-Options: nosniff", func() { diff --git a/core/http/react-ui/coverage-baseline.txt b/core/http/react-ui/coverage-baseline.txt index 4f8b89534..b4be1a3b7 100644 --- a/core/http/react-ui/coverage-baseline.txt +++ b/core/http/react-ui/coverage-baseline.txt @@ -1 +1 @@ -38.29 \ No newline at end of file +39.86 \ No newline at end of file diff --git a/core/http/react-ui/e2e/agents.spec.js b/core/http/react-ui/e2e/agents.spec.js index 40fe2d99c..ebfebd153 100644 --- a/core/http/react-ui/e2e/agents.spec.js +++ b/core/http/react-ui/e2e/agents.spec.js @@ -20,5 +20,10 @@ test.describe('Agents page', () => { page.waitForURL(/\/app\/agents\/new$/), create.click(), ]) + // Wait for AgentCreate.jsx to actually render, not just for the URL to + // change. Ending the test the instant the route matched let the component + // mount race the coverage teardown — its ~400 lines were collected only + // when the render won, swinging total UI coverage ~1pp run-to-run. + await expect(page.getByRole('heading', { name: 'Create Agent' })).toBeVisible() }) }) diff --git a/core/http/react-ui/e2e/audio-transform.spec.js b/core/http/react-ui/e2e/audio-transform.spec.js index 53fbeabe7..c428e95f8 100644 --- a/core/http/react-ui/e2e/audio-transform.spec.js +++ b/core/http/react-ui/e2e/audio-transform.spec.js @@ -66,6 +66,33 @@ function makeFakeWav(name) { return { name, mimeType: 'audio/wav', buffer: buf } } +// Build a WAV carrying a real sine tone, long enough that the spectrogram +// STFT produces several frames (a few thousand samples). Used to exercise the +// FFT / heatmap path, which the 4-sample silent fixture can't. +function makeToneWav(name, freq = 1000, seconds = 0.4, sampleRate = 16000) { + const samples = Math.floor(seconds * sampleRate) + const dataLen = samples * 2 + const buf = Buffer.alloc(44 + dataLen) + buf.write('RIFF', 0) + buf.writeUInt32LE(36 + dataLen, 4) + buf.write('WAVE', 8) + buf.write('fmt ', 12) + buf.writeUInt32LE(16, 16) + buf.writeUInt16LE(1, 20) + buf.writeUInt16LE(1, 22) + buf.writeUInt32LE(sampleRate, 24) + buf.writeUInt32LE(sampleRate * 2, 28) + buf.writeUInt16LE(2, 32) + buf.writeUInt16LE(16, 34) + buf.write('data', 36) + buf.writeUInt32LE(dataLen, 40) + for (let i = 0; i < samples; i++) { + const v = Math.round(Math.sin((2 * Math.PI * freq * i) / sampleRate) * 16000) + buf.writeInt16LE(v, 44 + i * 2) + } + return { name, mimeType: 'audio/wav', buffer: buf } +} + test.describe('Audio Transform', () => { test.beforeEach(async ({ page }) => { await mockCapabilities(page, [ @@ -169,6 +196,26 @@ test.describe('Audio Transform', () => { await expect(page.getByTestId('media-history-item')).toHaveCount(1) }) + test('renders an input spectrogram on upload and an output one after transform', async ({ page }) => { + mockAudioTransform(page, 'enhanced.wav') + + await page.goto('/app/transform') + await expect(page.getByRole('button', { name: 'localvqe' })).toBeVisible({ timeout: 10_000 }) + + // Choosing a clip should render its input spectrogram immediately — no + // backend round-trip needed (it's computed client-side from the bytes). + await page.locator('input[type="file"]').first().setInputFiles(makeToneWav('tone.wav')) + await expect(page.getByTestId('spectrogram-input')).toBeVisible({ timeout: 10_000 }) + + // Until a transform runs the output side shows a "compare" placeholder. + await expect(page.getByText(/Transform to compare/)).toBeVisible() + + await page.getByRole('button', { name: /Transform/ }).last().click() + + // After processing, the output spectrum panel appears alongside the input. + await expect(page.getByText('Output spectrum')).toBeVisible({ timeout: 10_000 }) + }) + test('shows an error banner when the backend returns 4xx', async ({ page }) => { await page.route('**/audio/transformations', (route) => { if (route.request().method() !== 'POST') return route.continue() diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css index a71eda5f5..01528ed7b 100644 --- a/core/http/react-ui/src/App.css +++ b/core/http/react-ui/src/App.css @@ -6984,6 +6984,88 @@ select.input { color: var(--color-primary); } +/* Spectrogram (AudioTransform spectral view) */ +.audio-spectrogram-pair { + display: grid; + grid-template-columns: 1fr 1fr; + gap: var(--spacing-md); +} +@media (max-width: 720px) { + .audio-spectrogram-pair { + grid-template-columns: 1fr; + } +} +.audio-spectrogram { + display: flex; + flex-direction: column; + gap: var(--spacing-xs); + width: 100%; + min-width: 0; +} +.audio-spectrogram__label { + font-size: var(--text-sm); + color: var(--color-text-secondary); +} +.audio-spectrogram__canvas-wrap { + position: relative; + width: 100%; + background: var(--color-surface-sunken); + border: 1px solid var(--color-border-subtle); + border-radius: var(--radius-md); + overflow: hidden; +} +.audio-spectrogram__canvas-wrap--empty { + display: flex; + align-items: center; + justify-content: center; +} +.audio-spectrogram__hint { + color: var(--color-text-muted); + font-size: var(--text-sm); +} +.audio-spectrogram__loading { + position: absolute; + inset: 0; + display: flex; + align-items: center; + justify-content: center; + color: var(--color-text-muted); + font-size: var(--text-sm); +} +.audio-spectrogram__error { + padding: var(--spacing-md); + color: var(--color-error); + font-size: var(--text-sm); +} +.audio-spectrogram__axis { + position: absolute; + left: 6px; + font-size: 10px; + color: var(--color-text-muted); + background: var(--color-bg-overlay); + padding: 0 4px; + border-radius: var(--radius-sm); + pointer-events: none; + font-variant-numeric: tabular-nums; +} +.audio-spectrogram__axis--top { + top: 4px; +} +.audio-spectrogram__axis--bottom { + bottom: 4px; +} +.audio-spectrogram__duration { + position: absolute; + right: 8px; + bottom: 6px; + font-size: 11px; + color: var(--color-text-muted); + font-variant-numeric: tabular-nums; + background: var(--color-bg-overlay); + padding: 1px 6px; + border-radius: var(--radius-sm); +} + /* Audio Transform Studio tab */ .audio-transform-stack { display: flex; diff --git a/core/http/react-ui/src/components/audio/Spectrogram.jsx b/core/http/react-ui/src/components/audio/Spectrogram.jsx new file mode 100644 index 000000000..98907611c --- /dev/null +++ b/core/http/react-ui/src/components/audio/Spectrogram.jsx @@ -0,0 +1,105 @@ +import { useEffect, useRef } from 'react' +import useSpectrogram from '../../hooks/useSpectrogram' + +// Spectrogram — canvas heatmap of a clip's magnitude STFT (time × frequency). +// Time runs left→right, frequency low→high bottom→top, brighter = more energy. +// Used on the AudioTransform page to show input next to output so the user can +// see which bands the model attenuates (dark gaps that were bright in the +// input). Mirrors WaveformPlayer's canvas/label/overlay structure. +export default function Spectrogram({ src, label, height = 140, testId }) { + const canvasRef = useRef(null) + const { spectrogram, frames, bins, maxFreq, duration, error, loading } = useSpectrogram(src) + + useEffect(() => { + const canvas = canvasRef.current + if (!canvas) return + const dpr = window.devicePixelRatio || 1 + const cssW = canvas.clientWidth + const cssH = height + canvas.width = Math.floor(cssW * dpr) + canvas.height = Math.floor(cssH * dpr) + const ctx = canvas.getContext('2d') + ctx.setTransform(dpr, 0, 0, dpr, 0, 0) + ctx.clearRect(0, 0, cssW, cssH) + if (!spectrogram || !frames || !bins) return + + // Paint at native (frames × bins) resolution into an offscreen canvas, + // then let drawImage smooth-scale it up — far cheaper than filling + // cssW×cssH rects, and the GPU handles the interpolation. + const img = ctx.createImageData(frames, bins) + for (let f = 0; f < frames; f++) { + for (let b = 0; b < bins; b++) { + const [r, g, bl] = magma(spectrogram[f * bins + b]) + // Flip the frequency axis: image row 0 is the top = highest freq. + const o = ((bins - 1 - b) * frames + f) * 4 + img.data[o] = r + img.data[o + 1] = g + img.data[o + 2] = bl + img.data[o + 3] = 255 + } + } + const off = document.createElement('canvas') + off.width = frames + off.height = bins + off.getContext('2d').putImageData(img, 0, 0) + ctx.imageSmoothingEnabled = true + ctx.drawImage(off, 0, 0, cssW, cssH) + }, [spectrogram, frames, bins, height]) + + if (!src) return null + + return ( +
+ {label &&
{label}
} +
+ {error ? ( +
{error}
+ ) : ( + + )} + {maxFreq > 0 && !error && ( + <> + {fmtHz(maxFreq)} + 0 Hz + + )} + {duration > 0 && !error && ( + {duration.toFixed(1)}s + )} + {loading && !error &&
Analysing…
} +
+
+ ) +} + +function fmtHz(hz) { + if (hz >= 1000) return `${(hz / 1000).toFixed(hz % 1000 === 0 ? 0 : 1)} kHz` + return `${Math.round(hz)} Hz` +} + +// magma — compact perceptual colormap (black→purple→orange→white) sampled at 8 +// control points and linearly interpolated. Perceptually uniform maps read +// far better for spectral magnitude than a raw hue ramp. v is clamped to [0,1]. +const MAGMA = [ + [0, 0, 4], + [40, 11, 84], + [101, 21, 110], + [159, 42, 99], + [212, 72, 66], + [245, 125, 21], + [250, 193, 39], + [252, 253, 191], +] +function magma(v) { + const t = v <= 0 ? 0 : v >= 1 ? 1 : v + const x = t * (MAGMA.length - 1) + const i = Math.floor(x) + const frac = x - i + const a = MAGMA[i] + const b = MAGMA[Math.min(i + 1, MAGMA.length - 1)] + return [ + Math.round(a[0] + (b[0] - a[0]) * frac), + Math.round(a[1] + (b[1] - a[1]) * frac), + Math.round(a[2] + (b[2] - a[2]) * frac), + ] +} diff --git a/core/http/react-ui/src/hooks/useAudioPeaks.js b/core/http/react-ui/src/hooks/useAudioPeaks.js index 31dfa05eb..88ebea2dd 100644 --- a/core/http/react-ui/src/hooks/useAudioPeaks.js +++ b/core/http/react-ui/src/hooks/useAudioPeaks.js @@ -5,7 +5,7 @@ import { useEffect, useState } from 'react' // and most browsers cap concurrent AudioContexts at ~6. Keep one alive for // the lifetime of the tab and reuse it across decodes. let sharedCtx = null -function getSharedAudioContext() { +export function getSharedAudioContext() { if (sharedCtx) return sharedCtx const Ctx = window.AudioContext || window.webkitAudioContext if (!Ctx) return null diff --git a/core/http/react-ui/src/hooks/useSpectrogram.js b/core/http/react-ui/src/hooks/useSpectrogram.js new file mode 100644 index 000000000..c6f2c6f9c --- /dev/null +++ b/core/http/react-ui/src/hooks/useSpectrogram.js @@ -0,0 +1,107 @@ +import { useEffect, useState } from 'react' +import { getSharedAudioContext } from './useAudioPeaks' +import { fftRadix2 } from '../utils/fft' + +// Hann windows are reused across frames and across clips, so cache one per +// size. The window tapers each frame to suppress spectral leakage (the +// vertical smearing you'd otherwise get from hard frame edges). +const windowCache = new Map() +function hann(n) { + let w = windowCache.get(n) + if (w) return w + w = new Float32Array(n) + for (let i = 0; i < n; i++) w[i] = 0.5 - 0.5 * Math.cos((2 * Math.PI * i) / (n - 1)) + windowCache.set(n, w) + return w +} + +const EMPTY = { spectrogram: null, frames: 0, bins: 0, maxFreq: 0, duration: 0, error: null, loading: false } + +// useSpectrogram — decode an audio source (blob/data/http URL) and compute a +// magnitude STFT suitable for a spectrogram heatmap. Returns +// `{ spectrogram, frames, bins, maxFreq, duration, error, loading }` where +// `spectrogram` is a Float32Array of `frames * bins` values, row-major by +// frame, normalised so the dB floor maps to 0 and the loudest bin to 1. +// `bins` spans 0..Nyquist (`maxFreq`). +// +// fftSize/hop default to the LocalVQE frame geometry (512/256) so the picture +// lines up with how the model itself frames the audio. Long clips are +// strided down to at most `maxFrames` columns — the heatmap is only a few +// hundred px wide, so computing an FFT per native hop would be wasted work. +export default function useSpectrogram( + src, + { fftSize = 512, hop = 256, maxFrames = 900, dbFloor = -90 } = {}, +) { + const [state, setState] = useState(EMPTY) + + useEffect(() => { + setState(EMPTY) + if (!src) return + let cancelled = false + setState((s) => ({ ...s, loading: true })) + + async function run() { + try { + const resp = await fetch(src) + const raw = await resp.arrayBuffer() + const ctx = getSharedAudioContext() + if (!ctx) throw new Error('Web Audio API not available') + const audio = await ctx.decodeAudioData(raw.slice(0)) + if (cancelled) return + + const data = audio.getChannelData(0) + const bins = fftSize >> 1 + const win = hann(fftSize) + + // Frame count, then a stride so we never run more than maxFrames FFTs. + const rawFrames = data.length >= fftSize ? 1 + Math.floor((data.length - fftSize) / hop) : 1 + const stride = rawFrames > maxFrames ? Math.ceil(rawFrames / maxFrames) : 1 + const frames = Math.ceil(rawFrames / stride) + + const spec = new Float32Array(frames * bins) + const re = new Float64Array(fftSize) + const im = new Float64Array(fftSize) + let peakDb = dbFloor + + for (let f = 0; f < frames; f++) { + const start = f * stride * hop + for (let i = 0; i < fftSize; i++) { + const s = start + i + re[i] = s < data.length ? data[s] * win[i] : 0 + im[i] = 0 + } + fftRadix2(re, im) + for (let b = 0; b < bins; b++) { + const mag = Math.hypot(re[b], im[b]) / fftSize + let db = mag > 0 ? 20 * Math.log10(mag) : dbFloor + if (db < dbFloor) db = dbFloor + spec[f * bins + b] = db + if (db > peakDb) peakDb = db + } + } + + // Normalise dB into [0,1] against [dbFloor, peakDb]. + const range = peakDb - dbFloor || 1 + for (let i = 0; i < spec.length; i++) spec[i] = (spec[i] - dbFloor) / range + + if (cancelled) return + setState({ + spectrogram: spec, + frames, + bins, + maxFreq: audio.sampleRate / 2, + duration: audio.duration, + error: null, + loading: false, + }) + } catch (e) { + if (!cancelled) setState((s) => ({ ...s, error: e?.message || 'Could not analyse audio', loading: false })) + } + } + + run() + return () => { cancelled = true } + }, [src, fftSize, hop, maxFrames, dbFloor]) + + return state +} diff --git a/core/http/react-ui/src/pages/AudioTransform.jsx b/core/http/react-ui/src/pages/AudioTransform.jsx index 99e3911c8..98526751d 100644 --- a/core/http/react-ui/src/pages/AudioTransform.jsx +++ b/core/http/react-ui/src/pages/AudioTransform.jsx @@ -5,6 +5,7 @@ import { CAP_AUDIO_TRANSFORM } from '../utils/capabilities' import LoadingSpinner from '../components/LoadingSpinner' import ErrorWithTraceLink from '../components/ErrorWithTraceLink' import WaveformPlayer from '../components/audio/WaveformPlayer' +import Spectrogram from '../components/audio/Spectrogram' import { audioTransformApi } from '../utils/api' import { useMediaCapture } from '../hooks/useMediaCapture' import useObjectUrl from '../hooks/useObjectUrl' @@ -261,6 +262,24 @@ export default function AudioTransform() { ) : (
+ {audioUrl && ( +
+ + {outputUrl ? ( + + ) : ( +
+
Output spectrum
+
+ Transform to compare attenuation +
+
+ )} +
+ )} {outputUrl && ( diff --git a/core/http/react-ui/src/utils/fft.js b/core/http/react-ui/src/utils/fft.js new file mode 100644 index 000000000..35ee4cff8 --- /dev/null +++ b/core/http/react-ui/src/utils/fft.js @@ -0,0 +1,47 @@ +// Minimal in-place iterative radix-2 Cooley–Tukey FFT. +// +// The AudioTransform spectrogram only needs forward transforms of short real +// frames (≤2048 samples), so a compact ~30-line implementation beats pulling +// in a dependency and shipping it in the bundle. `re` and `im` are mutated in +// place; `n = re.length` must be a power of two (the caller picks fftSize). +export function fftRadix2(re, im) { + const n = re.length + if (n <= 1) return + + // Bit-reversal permutation: reorder samples so the butterfly stage below can + // run in place. + for (let i = 1, j = 0; i < n; i++) { + let bit = n >> 1 + for (; j & bit; bit >>= 1) j ^= bit + j ^= bit + if (i < j) { + const tr = re[i]; re[i] = re[j]; re[j] = tr + const ti = im[i]; im[i] = im[j]; im[j] = ti + } + } + + // Butterflies, doubling the transform length each pass. + for (let len = 2; len <= n; len <<= 1) { + const half = len >> 1 + const ang = (-2 * Math.PI) / len + const wpr = Math.cos(ang) + const wpi = Math.sin(ang) + for (let i = 0; i < n; i += len) { + let wr = 1 + let wi = 0 + for (let k = 0; k < half; k++) { + const a = i + k + const b = a + half + const tr = wr * re[b] - wi * im[b] + const ti = wr * im[b] + wi * re[b] + re[b] = re[a] - tr + im[b] = im[a] - ti + re[a] += tr + im[a] += ti + const nwr = wr * wpr - wi * wpi + wi = wr * wpi + wi * wpr + wr = nwr + } + } + } +} diff --git a/docs/content/features/audio-transform.md b/docs/content/features/audio-transform.md index 61269b409..511b2e3d7 100644 --- a/docs/content/features/audio-transform.md +++ b/docs/content/features/audio-transform.md @@ -103,9 +103,11 @@ ends the session cleanly. ### Latency -LocalVQE has 16 ms algorithmic latency (one hop). At runtime, ~1.66 ms of CPU -time per frame on a modern desktop, leaving the rest of the budget for -network and downstream playback. +LocalVQE has 16 ms algorithmic latency (one hop). At runtime the per-frame CPU +cost depends on the model: ~1.6 ms for the compact 1.3 M models (v1.1/v1.2, +~9.7× realtime) and ~3.3 ms for the wider v1.3 4.8 M model (~4.7× realtime) on +a 4-thread modern desktop, leaving the rest of the budget for network and +downstream playback. ## Backend-specific tuning (LocalVQE) @@ -120,11 +122,16 @@ A reasonable starting point is `-50` dBFS. ## Configuring a model +LocalVQE ships several weight releases in the gallery: `localvqe-v1.3-4.8m` +(current default — best quality), `localvqe-v1.2-1.3m` and `localvqe-v1.1-1.3m` +(compact, ~¼ the per-hop cost — good for low-core or power-constrained hosts). +All share the same backend and request API; only the `model` filename differs. + ```yaml name: localvqe backend: localvqe parameters: - model: localvqe-v1.1-1.3M-f32.gguf + model: localvqe-v1.3-4.8M-f32.gguf # Backend-specific defaults can be set in Options[]; per-request # params[*] form fields override. diff --git a/flake.nix b/flake.nix index 9ffc94709..30f57a057 100644 --- a/flake.nix +++ b/flake.nix @@ -81,6 +81,11 @@ gotools # goimports go-tools # staticcheck + # Audio transforms: pkg/utils/ffmpeg_test.go shells out to the + # `ffmpeg` CLI, exercised by `make test-coverage` (the pre-commit + # gate). Headless build = the CLI without GUI/X deps. + ffmpeg-headless + # Common dev conveniences git curl diff --git a/gallery/index.yaml b/gallery/index.yaml index 865eec9f0..97b0d472f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -29903,6 +29903,68 @@ - filename: localvqe-v1.1-1.3M-f32.gguf sha256: c118227c6b433d6aa36d9e4b993e0f31aa60787ea38d301d04db917a4a2b0a84 uri: huggingface://LocalAI-io/LocalVQE/localvqe-v1.1-1.3M-f32.gguf +- name: localvqe-v1.2-1.3m + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/localai-org/LocalVQE + - https://huggingface.co/LocalAI-io/LocalVQE + description: | + LocalVQE v1.2 (1.3 M parameters, F32) — compact joint acoustic echo + cancellation, noise suppression, and dereverberation for 16 kHz mono + speech. Shares the same DeepVQE-style architecture (arch_version 3) as + v1.3 but with narrower encoder/decoder widths, so it runs at ~9.7× + realtime (~1.6 ms per 16 ms frame on a 4-thread Zen4 CPU) — about ¼ the + per-hop cost of v1.3. Widens the echo-search window to 1024 ms (v1.1 used + 512 ms). ~5 MB on disk. The budget-friendly choice for low-core or + power-constrained devices. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/260893928 + tags: + - audio-transform + - aec + - acoustic-echo-cancellation + - noise-suppression + - dereverberation + - cpu + overrides: + backend: localvqe + parameters: + model: localvqe-v1.2-1.3M-f32.gguf + files: + - filename: localvqe-v1.2-1.3M-f32.gguf + sha256: 4856ecf5f522b23fb2bc5caeac81f323c0ef1c4c156a9c7d40a6adbe092ba9ce + uri: huggingface://LocalAI-io/LocalVQE/localvqe-v1.2-1.3M-f32.gguf +- name: localvqe-v1.3-4.8m + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/localai-org/LocalVQE + - https://huggingface.co/LocalAI-io/LocalVQE + description: | + LocalVQE v1.3 (4.8 M parameters, F32) — current default release. Joint + acoustic echo cancellation, noise suppression, and dereverberation for + 16 kHz mono speech, with a wider encoder/decoder trained from scratch + under a noise-floor-aware loss recipe. ~4.7× realtime (~3.3 ms per 16 ms + frame on a 4-thread Zen4 CPU); ~19 MB on disk. Improves doubletalk speech + quality (+0.25 deg MOS) and far-end echo cancellation (ERLE +5.2–9.3 dB) + over v1.2; on far-end-only scenes some users may still prefer v1.2's + gentler trade-off. Same 16 ms algorithmic latency as the compact models. + license: apache-2.0 + icon: https://avatars.githubusercontent.com/u/260893928 + tags: + - audio-transform + - aec + - acoustic-echo-cancellation + - noise-suppression + - dereverberation + - cpu + overrides: + backend: localvqe + parameters: + model: localvqe-v1.3-4.8M-f32.gguf + files: + - filename: localvqe-v1.3-4.8M-f32.gguf + sha256: c4f7912485c32cfc206c536f2f050b52513f2f613fdbc616391f6b26ab1d51ec + uri: huggingface://LocalAI-io/LocalVQE/localvqe-v1.3-4.8M-f32.gguf - name: tlacuilo-12b url: github:mudler/LocalAI/gallery/mistral-0.3.yaml@master urls: diff --git a/pkg/audio/audio.go b/pkg/audio/audio.go index 8a0b33586..1d3d9c17f 100644 --- a/pkg/audio/audio.go +++ b/pkg/audio/audio.go @@ -77,22 +77,59 @@ func NewWAVHeaderWithRate(pcmLen, sampleRate uint32) WAVHeader { // WAVHeaderSize is the size of a standard PCM WAV header in bytes. const WAVHeaderSize = 44 -// StripWAVHeader removes a WAV header from audio data, returning raw PCM. -// If the data is too short to contain a header, it is returned unchanged. +// wavDataChunk walks the RIFF sub-chunks of an in-memory WAV and returns the +// `data` chunk payload (a sub-slice of data, not a copy) plus the sample rate +// from `fmt `. ok is false when data isn't a RIFF/WAVE stream or carries no +// data chunk — callers then fall back to treating the input as raw PCM. +// +// Walking the chunks rather than assuming the canonical 44-byte header is what +// keeps an 18/40-byte extensible `fmt `, or JUNK/LIST/bext metadata before or +// after `data` (e.g. ffmpeg's trailing "Lavf" tag), from being spliced into +// the PCM as an audible click. +func wavDataChunk(data []byte) (pcm []byte, sampleRate int, ok bool) { + if len(data) < 12 || string(data[0:4]) != "RIFF" || string(data[8:12]) != "WAVE" { + return nil, 0, false + } + for off := 12; off+8 <= len(data); { + id := string(data[off : off+4]) + size := int(binary.LittleEndian.Uint32(data[off+4 : off+8])) + body := off + 8 + if size < 0 || body+size > len(data) { + // Truncated/garbage size — clamp to what's left so a short final + // chunk doesn't drop an otherwise valid data chunk. + size = len(data) - body + } + switch id { + case "fmt ": + if size >= 16 { + sampleRate = int(binary.LittleEndian.Uint32(data[body+4 : body+8])) + } + case "data": + return data[body : body+size], sampleRate, true + } + // Chunks are word-aligned: an odd size is followed by a pad byte. + off = body + size + (size & 1) + } + return nil, 0, false +} + +// StripWAVHeader removes a WAV header from audio data, returning raw PCM. If +// the data isn't a recognisable WAV (e.g. it's already raw PCM) it is returned +// unchanged. Locates the `data` chunk by walking the RIFF structure rather +// than assuming a fixed 44-byte header — see [wavDataChunk]. func StripWAVHeader(data []byte) []byte { - if len(data) > WAVHeaderSize { - return data[WAVHeaderSize:] + if pcm, _, ok := wavDataChunk(data); ok { + return pcm } return data } -// ParseWAV strips the WAV header and returns the raw PCM along with the -// sample rate read from the header. If the data is too short to contain a -// valid header the PCM is returned as-is with sampleRate=0. +// ParseWAV returns the raw PCM of a WAV's `data` chunk along with the sample +// rate from `fmt `. If the data isn't a recognisable WAV it is returned as-is +// with sampleRate=0. Walks the RIFF structure — see [wavDataChunk]. func ParseWAV(data []byte) (pcm []byte, sampleRate int) { - if len(data) <= WAVHeaderSize { - return data, 0 + if pcm, sr, ok := wavDataChunk(data); ok { + return pcm, sr } - sr := int(binary.LittleEndian.Uint32(data[24:28])) - return data[WAVHeaderSize:], sr + return data, 0 } diff --git a/pkg/audio/audio_test.go b/pkg/audio/audio_test.go index 836aa27ae..f13e51201 100644 --- a/pkg/audio/audio_test.go +++ b/pkg/audio/audio_test.go @@ -96,4 +96,81 @@ var _ = Describe("WAV utilities", func() { Expect(gotPCM).To(Equal(short)) }) }) + + Describe("non-canonical RIFF layouts", func() { + // chunk builds a word-aligned RIFF sub-chunk (id + size + body + pad). + chunk := func(id string, body []byte) []byte { + out := append([]byte(id), 0, 0, 0, 0) + binary.LittleEndian.PutUint32(out[4:8], uint32(len(body))) + out = append(out, body...) + if len(body)&1 == 1 { + out = append(out, 0) // pad byte for odd-sized chunks + } + return out + } + // fmtBody is a mono 16-bit PCM `fmt ` body; extra simulates the + // 18/40-byte extensible form (cbSize + extension). + fmtBody := func(rate uint32, extra int) []byte { + b := make([]byte, 16+extra) + binary.LittleEndian.PutUint16(b[0:2], 1) // PCM + binary.LittleEndian.PutUint16(b[2:4], 1) // mono + binary.LittleEndian.PutUint32(b[4:8], rate) // sample rate + binary.LittleEndian.PutUint32(b[8:12], rate*2) // byte rate + binary.LittleEndian.PutUint16(b[12:14], 2) // block align + binary.LittleEndian.PutUint16(b[14:16], 16) // bits per sample + if extra >= 2 { + binary.LittleEndian.PutUint16(b[16:18], uint16(extra-2)) // cbSize + } + return b + } + riff := func(chunks ...[]byte) []byte { + body := []byte("WAVE") + for _, c := range chunks { + body = append(body, c...) + } + out := append([]byte("RIFF"), 0, 0, 0, 0) + binary.LittleEndian.PutUint32(out[4:8], uint32(len(body))) + return append(out, body...) + } + pcm := []byte{1, 2, 3, 4, 5, 6, 7, 8} + + It("ignores JUNK/LIST chunks before data (no leading splice)", func() { + w := riff( + chunk("fmt ", fmtBody(16000, 0)), + chunk("JUNK", []byte("padding-bytes-x")), // odd length → exercises pad + chunk("LIST", []byte("INFOISFTLavf")), + chunk("data", pcm), + ) + gotPCM, rate := ParseWAV(w) + Expect(rate).To(Equal(16000)) + Expect(gotPCM).To(Equal(pcm)) + Expect(StripWAVHeader(w)).To(Equal(pcm)) + }) + + It("honours the data chunk size and drops a trailing chunk", func() { + w := riff( + chunk("fmt ", fmtBody(24000, 0)), + chunk("data", pcm), + chunk("LIST", []byte("INFOISFTLavf60.16")), // ffmpeg trailer tag + ) + gotPCM, rate := ParseWAV(w) + Expect(rate).To(Equal(24000)) + Expect(gotPCM).To(Equal(pcm)) // trailing LIST not spliced in + }) + + It("handles an 18-byte extensible fmt chunk", func() { + w := riff(chunk("fmt ", fmtBody(16000, 2)), chunk("data", pcm)) + gotPCM, rate := ParseWAV(w) + Expect(rate).To(Equal(16000)) + Expect(gotPCM).To(Equal(pcm)) + }) + + It("returns non-WAV input unchanged", func() { + raw := []byte("this is definitely not a riff wave file") + gotPCM, rate := ParseWAV(raw) + Expect(rate).To(Equal(0)) + Expect(gotPCM).To(Equal(raw)) + Expect(StripWAVHeader(raw)).To(Equal(raw)) + }) + }) }) diff --git a/scripts/ui-coverage-check.sh b/scripts/ui-coverage-check.sh index 54532b48d..33a43748c 100755 --- a/scripts/ui-coverage-check.sh +++ b/scripts/ui-coverage-check.sh @@ -4,17 +4,20 @@ # # Compares the total line coverage in an nyc coverage-summary.json against a # committed baseline and fails (exit 1) if it dropped by more than -# UI_COVERAGE_TOLERANCE percentage points (default 0.8). The React UI e2e suite +# UI_COVERAGE_TOLERANCE percentage points (default 0.1). The React UI e2e suite # drives the real app, so a removed feature or deleted spec shows up as a # coverage drop here. # -# UI e2e line coverage is NOT deterministic: async/debounced paths (e.g. the -# VRAM estimate's 500ms debounce) mean identical specs vary run-to-run. With the -# V8 path's single-chunk coverage build (vite.config.js inlineDynamicImports) -# the observed wobble is ~0.5pp, similar to the old istanbul path. The tolerance -# absorbs that jitter — keep it just above the observed wobble so a real ~1pp -# regression still trips the gate. -# (The Go gate carries a smaller tolerance for the same reason — its e2e slice.) +# The tolerance exists only to absorb the irreducible measurement noise floor, +# NOT to permit regression. UI e2e coverage USED to swing ~1pp run-to-run, which +# forced a loose 0.8pp band — but that swing was a bug, not inherent jitter: a +# spec that navigated to a route and ended on the URL assertion let the target +# component's render race the coverage teardown, so ~400 lines were collected +# only when the render won (see e2e/agents.spec.js → AgentCreate). With that race +# fixed, repeated runs land within ~0.013pp (a handful of lines) of each other, +# so the band is tightened to 0.1pp — enough for the noise floor, tight enough +# that a real ~40-line regression still trips the gate. If a future run wobbles +# more, fix the racing spec (await a rendered element) rather than loosening this. # # When coverage rises meaningfully, regenerate and commit the baseline with: # make test-ui-coverage-baseline @@ -22,7 +25,7 @@ set -eu summary="${1:?usage: ui-coverage-check.sh SUMMARY_JSON BASELINE_FILE}" baseline_file="${2:?usage: ui-coverage-check.sh SUMMARY_JSON BASELINE_FILE}" -tolerance="${UI_COVERAGE_TOLERANCE:-0.8}" +tolerance="${UI_COVERAGE_TOLERANCE:-0.1}" if [ ! -f "$summary" ]; then echo "ui-coverage-check: coverage summary not found: $summary" >&2