feat(recon): honor LocalAI per-model threads in voice/face-detect backends

LocalAI spawns one backend process per model and serves requests concurrently, so the engines' own min(hardware_concurrency, 8) default can oversubscribe cores. Forward the per-model Threads value from the gRPC LoadModel options into the engine via VOICEDETECT_THREADS / FACEDETECT_THREADS (read at backend construction) before the capi load. A non-positive Threads is treated as unset, leaving the engine default. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code]
2026-06-22 15:49:12 -04:00 · 2026-06-22 13:32:02 +00:00
parent 7c2a347e79
commit e05dece93c
2 changed files with 31 additions and 0 deletions
--- a/backend/go/face-detect/gofacedetect.go
+++ b/backend/go/face-detect/gofacedetect.go
@@ -8,6 +8,7 @@ import (
 	"math"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"time"
 	"unsafe"
@@ -66,6 +67,20 @@ func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
 		f.opts.modelName = filepath.Base(model)
 	}

+	// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
+	// one backend process per model and serves requests concurrently, so the
+	// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
+	// FACEDETECT_THREADS is read by the engine at backend construction, so it
+	// must be set before the capi load. A non-positive Threads means "unset":
+	// leave the env alone so the engine keeps its sane default.
+	threads := opts.Threads
+	if threads > 0 {
+		if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
+			return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
+		}
+		xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
+	}
+
 	xlog.Info("face-detect: loading model", "model", model,
 		"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())

--- a/backend/go/voice-detect/govoicedetect.go
+++ b/backend/go/voice-detect/govoicedetect.go
@@ -5,7 +5,9 @@ import (
 	"errors"
 	"fmt"
 	"math"
+	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"time"
 	"unsafe"
@@ -63,6 +65,20 @@ func (v *VoiceDetect) Load(opts *pb.ModelOptions) error {
 		v.opts.modelName = filepath.Base(model)
 	}

+	// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
+	// one backend process per model and serves requests concurrently, so the
+	// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
+	// VOICEDETECT_THREADS is read by the engine at backend construction, so it
+	// must be set before the capi load. A non-positive Threads means "unset":
+	// leave the env alone so the engine keeps its sane default.
+	threads := opts.Threads
+	if threads > 0 {
+		if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
+			return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err)
+		}
+		xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads)
+	}
+
 	xlog.Info("voice-detect: loading model", "model", model,
 		"verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())