From bd12103ed491d0e1c2bd5fe9b8b2d6d401413beb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 13 Feb 2026 22:23:06 +0100 Subject: [PATCH] chore: compute capabilities once (#8555) Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 2 +- pkg/system/capabilities.go | 57 ++++++++++++++++++++++---------------- pkg/system/state.go | 4 +++ 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index a67b35fad..4b845f9a4 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -83,7 +83,7 @@ type RunCMD struct { EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"` TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"` AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"` - OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"` + OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"` Version bool } diff --git a/pkg/system/capabilities.go b/pkg/system/capabilities.go index 35f409017..fc26f617f 100644 --- a/pkg/system/capabilities.go +++ b/pkg/system/capabilities.go @@ -45,9 +45,8 @@ const ( ) var ( - cuda13DirExists bool - cuda12DirExists bool - capabilityLogged bool + cuda13DirExists bool + cuda12DirExists bool ) func init() { @@ -72,9 +71,15 @@ func (s *SystemState) Capability(capMap map[string]string) string { } func (s *SystemState) getSystemCapabilities() string { + + if s.systemCapabilities != "" { + return s.systemCapabilities + } + capability := os.Getenv(capabilityEnv) if capability != "" { xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv) + s.systemCapabilities = capability return capability } @@ -88,26 +93,26 @@ func (s *SystemState) getSystemCapabilities() string { // This might be used by e.g. container images to specify which // backends to pull in automatically when installing meta backends. if _, err := os.Stat(capabilityRunFile); err == nil { - if !capabilityLogged { - capability, err := os.ReadFile(capabilityRunFile) - if err == nil { - xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv) - capabilityLogged = true - return strings.Trim(strings.TrimSpace(string(capability)), "\n") - } + capability, err := os.ReadFile(capabilityRunFile) + if err == nil { + xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv) + s.systemCapabilities = strings.Trim(strings.TrimSpace(string(capability)), "\n") + return s.systemCapabilities } } // If we are on mac and arm64, we will return metal if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv) - return metal + s.systemCapabilities = metal + return s.systemCapabilities } // If we are on mac and x86, we will return darwin-x86 if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" { xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv) - return darwinX86 + s.systemCapabilities = darwinX86 + return s.systemCapabilities } // If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t @@ -115,39 +120,43 @@ func (s *SystemState) getSystemCapabilities() string { if s.GPUVendor == Nvidia { xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv) if cuda13DirExists { - return nvidiaL4TCuda13 + s.systemCapabilities = nvidiaL4TCuda13 + return s.systemCapabilities } if cuda12DirExists { - return nvidiaL4TCuda12 + s.systemCapabilities = nvidiaL4TCuda12 + return s.systemCapabilities } - return nvidiaL4T + s.systemCapabilities = nvidiaL4T + return s.systemCapabilities } } if cuda13DirExists { - return nvidiaCuda13 + s.systemCapabilities = nvidiaCuda13 + return s.systemCapabilities } if cuda12DirExists { - return nvidiaCuda12 + s.systemCapabilities = nvidiaCuda12 + return s.systemCapabilities } if s.GPUVendor == "" { xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv) - return defaultCapability + s.systemCapabilities = defaultCapability + return s.systemCapabilities } - if !capabilityLogged { - xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv) - capabilityLogged = true - } // If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env if s.VRAM <= 4*1024*1024*1024 { xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv) - return defaultCapability + s.systemCapabilities = defaultCapability + return s.systemCapabilities } - return s.GPUVendor + s.systemCapabilities = s.GPUVendor + return s.systemCapabilities } // BackendPreferenceTokens returns a list of substrings that represent the preferred diff --git a/pkg/system/state.go b/pkg/system/state.go index 7c6d8b724..6e8d2a335 100644 --- a/pkg/system/state.go +++ b/pkg/system/state.go @@ -19,6 +19,8 @@ type SystemState struct { Backend Backend Model Model VRAM uint64 + + systemCapabilities string } type SystemStateOptions func(*SystemState) @@ -53,5 +55,7 @@ func GetSystemState(opts ...SystemStateOptions) (*SystemState, error) { state.VRAM, _ = xsysinfo.TotalAvailableVRAM() xlog.Debug("Total available VRAM", "vram", state.VRAM) + state.getSystemCapabilities() + return state, nil }