From e05dece93c803545e346c1a4765596695d95a788 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 22 Jun 2026 13:32:02 +0000 Subject: [PATCH] feat(recon): honor LocalAI per-model threads in voice/face-detect backends LocalAI spawns one backend process per model and serves requests concurrently, so the engines' own min(hardware_concurrency, 8) default can oversubscribe cores. Forward the per-model Threads value from the gRPC LoadModel options into the engine via VOICEDETECT_THREADS / FACEDETECT_THREADS (read at backend construction) before the capi load. A non-positive Threads is treated as unset, leaving the engine default. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --- backend/go/face-detect/gofacedetect.go | 15 +++++++++++++++ backend/go/voice-detect/govoicedetect.go | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/backend/go/face-detect/gofacedetect.go b/backend/go/face-detect/gofacedetect.go index 5577a2404..4ad6c067c 100644 --- a/backend/go/face-detect/gofacedetect.go +++ b/backend/go/face-detect/gofacedetect.go @@ -8,6 +8,7 @@ import ( "math" "os" "path/filepath" + "strconv" "strings" "time" "unsafe" @@ -66,6 +67,20 @@ func (f *FaceDetect) Load(opts *pb.ModelOptions) error { f.opts.modelName = filepath.Base(model) } + // Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns + // one backend process per model and serves requests concurrently, so the + // engine's own min(hardware_concurrency, 8) default can oversubscribe cores. + // FACEDETECT_THREADS is read by the engine at backend construction, so it + // must be set before the capi load. A non-positive Threads means "unset": + // leave the env alone so the engine keeps its sane default. + threads := opts.Threads + if threads > 0 { + if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil { + return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err) + } + xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads) + } + xlog.Info("face-detect: loading model", "model", model, "verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion()) diff --git a/backend/go/voice-detect/govoicedetect.go b/backend/go/voice-detect/govoicedetect.go index ea648e896..2bbe74bd0 100644 --- a/backend/go/voice-detect/govoicedetect.go +++ b/backend/go/voice-detect/govoicedetect.go @@ -5,7 +5,9 @@ import ( "errors" "fmt" "math" + "os" "path/filepath" + "strconv" "strings" "time" "unsafe" @@ -63,6 +65,20 @@ func (v *VoiceDetect) Load(opts *pb.ModelOptions) error { v.opts.modelName = filepath.Base(model) } + // Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns + // one backend process per model and serves requests concurrently, so the + // engine's own min(hardware_concurrency, 8) default can oversubscribe cores. + // VOICEDETECT_THREADS is read by the engine at backend construction, so it + // must be set before the capi load. A non-positive Threads means "unset": + // leave the env alone so the engine keeps its sane default. + threads := opts.Threads + if threads > 0 { + if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil { + return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err) + } + xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads) + } + xlog.Info("voice-detect: loading model", "model", model, "verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())