mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-22 15:49:12 -04:00
feat(recon): honor LocalAI per-model threads in voice/face-detect backends
LocalAI spawns one backend process per model and serves requests concurrently, so the engines' own min(hardware_concurrency, 8) default can oversubscribe cores. Forward the per-model Threads value from the gRPC LoadModel options into the engine via VOICEDETECT_THREADS / FACEDETECT_THREADS (read at backend construction) before the capi load. A non-positive Threads is treated as unset, leaving the engine default. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
@@ -66,6 +67,20 @@ func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
|
||||
f.opts.modelName = filepath.Base(model)
|
||||
}
|
||||
|
||||
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||
// one backend process per model and serves requests concurrently, so the
|
||||
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||
// FACEDETECT_THREADS is read by the engine at backend construction, so it
|
||||
// must be set before the capi load. A non-positive Threads means "unset":
|
||||
// leave the env alone so the engine keeps its sane default.
|
||||
threads := opts.Threads
|
||||
if threads > 0 {
|
||||
if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||
return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
|
||||
}
|
||||
xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
|
||||
}
|
||||
|
||||
xlog.Info("face-detect: loading model", "model", model,
|
||||
"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||
|
||||
|
||||
@@ -5,7 +5,9 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
@@ -63,6 +65,20 @@ func (v *VoiceDetect) Load(opts *pb.ModelOptions) error {
|
||||
v.opts.modelName = filepath.Base(model)
|
||||
}
|
||||
|
||||
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||
// one backend process per model and serves requests concurrently, so the
|
||||
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||
// VOICEDETECT_THREADS is read by the engine at backend construction, so it
|
||||
// must be set before the capi load. A non-positive Threads means "unset":
|
||||
// leave the env alone so the engine keeps its sane default.
|
||||
threads := opts.Threads
|
||||
if threads > 0 {
|
||||
if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||
return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err)
|
||||
}
|
||||
xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads)
|
||||
}
|
||||
|
||||
xlog.Info("voice-detect: loading model", "model", model,
|
||||
"verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user