Files
LocalAI/pkg/system/capabilities.go
2026-01-07 16:18:27 +01:00

158 lines
4.9 KiB
Go

// Package system provides system detection utilities, including GPU/vendor detection
// and capability classification used to select optimal backends at runtime.
package system
import (
"os"
"path/filepath"
"runtime"
"strings"
"github.com/mudler/xlog"
)
const (
defaultCapability = "default"
nvidiaL4T = "nvidia-l4t"
darwinX86 = "darwin-x86"
metal = "metal"
nvidia = "nvidia"
amd = "amd"
intel = "intel"
vulkan = "vulkan"
nvidiaCuda13 = "nvidia-cuda-13"
nvidiaCuda12 = "nvidia-cuda-12"
nvidiaL4TCuda12 = "nvidia-l4t-cuda-12"
nvidiaL4TCuda13 = "nvidia-l4t-cuda-13"
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
defaultRunFile = "/run/localai/capability"
)
var (
cuda13DirExists bool
cuda12DirExists bool
)
func init() {
_, err := os.Stat(filepath.Join("usr", "local", "cuda-13"))
cuda13DirExists = err == nil
_, err = os.Stat(filepath.Join("usr", "local", "cuda-12"))
cuda12DirExists = err == nil
}
func (s *SystemState) Capability(capMap map[string]string) string {
reportedCapability := s.getSystemCapabilities()
// Check if the reported capability is in the map
if _, exists := capMap[reportedCapability]; exists {
xlog.Debug("Using reported capability", "reportedCapability", reportedCapability, "capMap", capMap)
return reportedCapability
}
xlog.Debug("The requested capability was not found, using default capability", "reportedCapability", reportedCapability, "capMap", capMap)
// Otherwise, return the default capability (catch-all)
return defaultCapability
}
func (s *SystemState) getSystemCapabilities() string {
capability := os.Getenv(capabilityEnv)
if capability != "" {
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
return capability
}
capabilityRunFile := defaultRunFile
capabilityRunFileEnv := os.Getenv(capabilityRunFileEnv)
if capabilityRunFileEnv != "" {
capabilityRunFile = capabilityRunFileEnv
}
// Check if /run/localai/capability exists and use it
// This might be used by e.g. container images to specify which
// backends to pull in automatically when installing meta backends.
if _, err := os.Stat(capabilityRunFile); err == nil {
capability, err := os.ReadFile(capabilityRunFile)
if err == nil {
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
return strings.Trim(strings.TrimSpace(string(capability)), "\n")
}
}
// If we are on mac and arm64, we will return metal
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
return metal
}
// If we are on mac and x86, we will return darwin-x86
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
return darwinX86
}
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
if s.GPUVendor == nvidia {
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
if cuda13DirExists {
return nvidiaL4TCuda13
}
if cuda12DirExists {
return nvidiaL4TCuda12
}
return nvidiaL4T
}
}
if cuda13DirExists {
return nvidiaCuda13
}
if cuda12DirExists {
return nvidiaCuda12
}
if s.GPUVendor == "" {
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
return defaultCapability
}
xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv)
// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
if s.VRAM <= 4*1024*1024*1024 {
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
return defaultCapability
}
return s.GPUVendor
}
// BackendPreferenceTokens returns a list of substrings that represent the preferred
// backend implementation order for the current system capability. Callers can use
// these tokens to select the most appropriate concrete backend among multiple
// candidates sharing the same alias (e.g., "llama-cpp").
func (s *SystemState) BackendPreferenceTokens() []string {
capStr := strings.ToLower(s.getSystemCapabilities())
switch {
case strings.HasPrefix(capStr, nvidia):
return []string{"cuda", "vulkan", "cpu"}
case strings.HasPrefix(capStr, amd):
return []string{"rocm", "hip", "vulkan", "cpu"}
case strings.HasPrefix(capStr, intel):
return []string{"sycl", intel, "cpu"}
case strings.HasPrefix(capStr, metal):
return []string{"metal", "cpu"}
case strings.HasPrefix(capStr, darwinX86):
return []string{"darwin-x86", "cpu"}
case strings.HasPrefix(capStr, vulkan):
return []string{"vulkan", "cpu"}
default:
return []string{"cpu"}
}
}