diff --git a/backend/go/face-detect/gofacedetect.go b/backend/go/face-detect/gofacedetect.go index 5577a2404..4ad6c067c 100644 --- a/backend/go/face-detect/gofacedetect.go +++ b/backend/go/face-detect/gofacedetect.go @@ -8,6 +8,7 @@ import ( "math" "os" "path/filepath" + "strconv" "strings" "time" "unsafe" @@ -66,6 +67,20 @@ func (f *FaceDetect) Load(opts *pb.ModelOptions) error { f.opts.modelName = filepath.Base(model) } + // Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns + // one backend process per model and serves requests concurrently, so the + // engine's own min(hardware_concurrency, 8) default can oversubscribe cores. + // FACEDETECT_THREADS is read by the engine at backend construction, so it + // must be set before the capi load. A non-positive Threads means "unset": + // leave the env alone so the engine keeps its sane default. + threads := opts.Threads + if threads > 0 { + if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil { + return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err) + } + xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads) + } + xlog.Info("face-detect: loading model", "model", model, "verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion()) diff --git a/backend/go/voice-detect/govoicedetect.go b/backend/go/voice-detect/govoicedetect.go index ea648e896..2bbe74bd0 100644 --- a/backend/go/voice-detect/govoicedetect.go +++ b/backend/go/voice-detect/govoicedetect.go @@ -5,7 +5,9 @@ import ( "errors" "fmt" "math" + "os" "path/filepath" + "strconv" "strings" "time" "unsafe" @@ -63,6 +65,20 @@ func (v *VoiceDetect) Load(opts *pb.ModelOptions) error { v.opts.modelName = filepath.Base(model) } + // Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns + // one backend process per model and serves requests concurrently, so the + // engine's own min(hardware_concurrency, 8) default can oversubscribe cores. + // VOICEDETECT_THREADS is read by the engine at backend construction, so it + // must be set before the capi load. A non-positive Threads means "unset": + // leave the env alone so the engine keeps its sane default. + threads := opts.Threads + if threads > 0 { + if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil { + return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err) + } + xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads) + } + xlog.Info("voice-detect: loading model", "model", model, "verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())