From bca250e2bd42f55dd4fdc361e101491d8fb9217f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Jun 2026 22:02:14 +0000 Subject: [PATCH] =?UTF-8?q?feat(config):=20node-aware=20hardware=20default?= =?UTF-8?q?s=20=E2=80=94=20larger=20physical=20batch=20on=20Blackwell?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A larger physical batch (n_batch/n_ubatch) materially lifts MoE prefill on NVIDIA Blackwell consumer GPUs (sm_120/121, incl. GB10 / DGX Spark) — measured on a GB10 with Qwen3-Coder-30B-A3B, the prefill ceiling rises (ub512 ~2994 -> ub2048 ~3316 t/s) and saturates around 2048. The heuristic lives in core/config alongside the other config overriders (ApplyInferenceDefaults, guessDefaultsFromFile/NGPULayers) — they all fill the ModelConfig from heuristics, so hardware tuning is the same domain and stays in one place. It is parameterized on a GPU descriptor (not direct detection) so it works in both deployment shapes: - Single host: SetDefaults applies it with the LocalGPU. - Distributed: only the worker sees the GPU, so the worker reports its compute capability on registration (gpu_compute_capability -> BackendNode), and the router re-applies the SAME core/config heuristic for the SELECTED node before loading — fixing the case where the frontend has no GPU at all. Explicit `batch:` always wins (only managed default values are touched). xsysinfo gains NVIDIAComputeCapability() (detection only); all interpretation lives in core/config. Tests: core/config, pkg/xsysinfo, core/services/nodes. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto --- core/config/hardware_defaults.go | 118 ++++++++++++++++++ core/config/hardware_defaults_test.go | 59 +++++++++ core/config/model_config.go | 5 + core/http/endpoints/localai/nodes.go | 48 +++---- core/services/nodes/registry.go | 6 + core/services/nodes/router.go | 27 ++++ .../nodes/router_hardware_internal_test.go | 33 +++++ core/services/worker/registration.go | 5 + pkg/xsysinfo/computecap_internal_test.go | 23 ++++ pkg/xsysinfo/gpu.go | 98 +++++++++++++-- 10 files changed, 390 insertions(+), 32 deletions(-) create mode 100644 core/config/hardware_defaults.go create mode 100644 core/config/hardware_defaults_test.go create mode 100644 core/services/nodes/router_hardware_internal_test.go create mode 100644 pkg/xsysinfo/computecap_internal_test.go diff --git a/core/config/hardware_defaults.go b/core/config/hardware_defaults.go new file mode 100644 index 000000000..617e01632 --- /dev/null +++ b/core/config/hardware_defaults.go @@ -0,0 +1,118 @@ +package config + +import ( + "strconv" + "strings" + + "github.com/mudler/LocalAI/pkg/xsysinfo" + "github.com/mudler/xlog" +) + +// Hardware-driven model-config defaults. +// +// This sits alongside the other config overriders (ApplyInferenceDefaults for +// model families, guessDefaultsFromFile for GGUF/NGPULayers): they all +// heuristically fill ModelConfig values the user left unset. Hardware tuning is +// the same domain — "adjust the config from the device that will run it" — so +// it lives here rather than scattered into the backend or a separate package. +// +// The heuristics are parameterized on a GPU descriptor (not on direct +// detection) so they apply in both deployment shapes: SetDefaults passes the +// LocalGPU on a single host, and the distributed router passes the *selected +// node's* reported GPU before loading there (the frontend that loaded the +// config may have no GPU at all). + +// GPU describes the device that will run a model. +type GPU struct { + // Vendor is "nvidia", "amd", … (matches xsysinfo vendor constants). + Vendor string + // ComputeCapability is the NVIDIA compute capability as "major.minor" + // (e.g. "12.1" for GB10 / DGX Spark). Empty for non-NVIDIA / unknown. + ComputeCapability string + // VRAM is total device memory in bytes (0 = unknown). + VRAM uint64 +} + +// Physical batch (n_batch / n_ubatch) defaults. +const ( + // DefaultPhysicalBatch is the conservative default when no hardware-specific + // tuning applies. Matches backend.DefaultBatchSize. + DefaultPhysicalBatch = 512 + // BlackwellPhysicalBatch is the default on NVIDIA Blackwell consumer GPUs + // (sm_12x: sm_120 RTX 50-series, sm_121 GB10 / DGX Spark). A larger physical + // batch materially lifts MoE prefill there (per-expert GEMM tiles fill + // better); measured on a GB10 with Qwen3-30B-A3B to saturate around 2048. + BlackwellPhysicalBatch = 2048 +) + +// IsNVIDIABlackwell reports whether the GPU is in the NVIDIA Blackwell consumer +// family (sm_12x). Datacenter Blackwell (B100/B200/GB200, sm_100 / cc 10.0) +// reports a different compute capability and is intentionally not matched. +func (g GPU) IsNVIDIABlackwell() bool { + maj, _ := parseComputeCapability(g.ComputeCapability) + return maj >= 12 +} + +// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the +// given hardware, used when the model config leaves batch unset. +func PhysicalBatch(g GPU) int { + if g.IsNVIDIABlackwell() { + return BlackwellPhysicalBatch + } + return DefaultPhysicalBatch +} + +// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns. +// Callers that re-tune a value chosen by an upstream host (the distributed +// router correcting the frontend's guess) use this to avoid clobbering an +// explicit user batch such as 1024. +func IsManagedPhysicalBatch(n int) bool { + return n == DefaultPhysicalBatch || n == BlackwellPhysicalBatch +} + +// LocalGPU builds a GPU descriptor from local detection. Used by SetDefaults on +// a single host; the distributed router builds the descriptor from the selected +// node's reported info instead. +func LocalGPU() GPU { + vendor, _ := xsysinfo.DetectGPUVendor() + return GPU{ + Vendor: vendor, + ComputeCapability: xsysinfo.NVIDIAComputeCapability(), + } +} + +// ApplyHardwareDefaults fills ModelConfig values that depend on the target GPU +// and were left unset by the user. Currently: a larger physical batch on +// Blackwell. Explicit config always wins (we only touch zero values). +func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) { + if cfg == nil { + return + } + if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() { + cfg.Batch = BlackwellPhysicalBatch + xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch", + "batch", cfg.Batch, "compute_cap", gpu.ComputeCapability) + } +} + +// parseComputeCapability splits a "major.minor" string into integer parts. +// Returns (-1, -1) when it can't be parsed. +func parseComputeCapability(cc string) (int, int) { + cc = strings.TrimSpace(cc) + if cc == "" { + return -1, -1 + } + majStr, minStr := cc, "0" + if dot := strings.IndexByte(cc, '.'); dot >= 0 { + majStr, minStr = cc[:dot], cc[dot+1:] + } + maj, err := strconv.Atoi(strings.TrimSpace(majStr)) + if err != nil { + return -1, -1 + } + min, err := strconv.Atoi(strings.TrimSpace(minStr)) + if err != nil { + min = 0 + } + return maj, min +} diff --git a/core/config/hardware_defaults_test.go b/core/config/hardware_defaults_test.go new file mode 100644 index 000000000..3d15ef14b --- /dev/null +++ b/core/config/hardware_defaults_test.go @@ -0,0 +1,59 @@ +package config_test + +import ( + . "github.com/mudler/LocalAI/core/config" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Hardware-driven config defaults", func() { + DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)", + func(cc string, want bool) { + Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want)) + }, + Entry("GB10 12.1", "12.1", true), + Entry("RTX 50 12.0", "12.0", true), + Entry("future 13.0", "13.0", true), + Entry("Hopper 9.0", "9.0", false), + Entry("Ada 8.9", "8.9", false), + Entry("datacenter Blackwell sm_100 10.0", "10.0", false), + Entry("unknown", "", false), + ) + + Describe("PhysicalBatch / IsManagedPhysicalBatch", func() { + It("returns the Blackwell batch on Blackwell", func() { + Expect(PhysicalBatch(GPU{ComputeCapability: "12.1"})).To(Equal(BlackwellPhysicalBatch)) + }) + It("returns the default batch otherwise", func() { + Expect(PhysicalBatch(GPU{ComputeCapability: "9.0"})).To(Equal(DefaultPhysicalBatch)) + Expect(PhysicalBatch(GPU{})).To(Equal(DefaultPhysicalBatch)) + }) + It("recognizes managed defaults but not explicit values", func() { + Expect(IsManagedPhysicalBatch(DefaultPhysicalBatch)).To(BeTrue()) + Expect(IsManagedPhysicalBatch(BlackwellPhysicalBatch)).To(BeTrue()) + Expect(IsManagedPhysicalBatch(1024)).To(BeFalse()) + }) + }) + + Describe("ApplyHardwareDefaults", func() { + It("raises an unset batch to 2048 on Blackwell", func() { + cfg := &ModelConfig{} + ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"}) + Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch)) + }) + It("leaves batch unset on non-Blackwell", func() { + cfg := &ModelConfig{} + ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"}) + Expect(cfg.Batch).To(Equal(0)) + }) + It("never overrides an explicit batch", func() { + cfg := &ModelConfig{} + cfg.Batch = 1024 + ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"}) + Expect(cfg.Batch).To(Equal(1024)) + }) + It("no-ops on nil", func() { + Expect(func() { ApplyHardwareDefaults(nil, GPU{ComputeCapability: "12.1"}) }).ToNot(Panic()) + }) + }) +}) diff --git a/core/config/model_config.go b/core/config/model_config.go index dfe151a64..b57395916 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -1111,6 +1111,11 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) { // This ensures gallery-installed and runtime-loaded models get optimal parameters. ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model) + // Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell). + // Uses the local GPU here; in distributed mode the router re-applies the same + // heuristics for the selected node's GPU before loading. Explicit config wins. + ApplyHardwareDefaults(cfg, LocalGPU()) + // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22 defaultTopP := 0.95 defaultTopK := 40 diff --git a/core/http/endpoints/localai/nodes.go b/core/http/endpoints/localai/nodes.go index 5a6edab22..820cb137f 100644 --- a/core/http/endpoints/localai/nodes.go +++ b/core/http/endpoints/localai/nodes.go @@ -70,17 +70,20 @@ func GetNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc { // RegisterNodeRequest is the request body for registering a new worker node. type RegisterNodeRequest struct { - Name string `json:"name"` - NodeType string `json:"node_type,omitempty"` // "backend" (default) or "agent" - Address string `json:"address"` - HTTPAddress string `json:"http_address,omitempty"` - Token string `json:"token,omitempty"` - TotalVRAM uint64 `json:"total_vram,omitempty"` - AvailableVRAM uint64 `json:"available_vram,omitempty"` - TotalRAM uint64 `json:"total_ram,omitempty"` - AvailableRAM uint64 `json:"available_ram,omitempty"` - GPUVendor string `json:"gpu_vendor,omitempty"` - Labels map[string]string `json:"labels,omitempty"` + Name string `json:"name"` + NodeType string `json:"node_type,omitempty"` // "backend" (default) or "agent" + Address string `json:"address"` + HTTPAddress string `json:"http_address,omitempty"` + Token string `json:"token,omitempty"` + TotalVRAM uint64 `json:"total_vram,omitempty"` + AvailableVRAM uint64 `json:"available_vram,omitempty"` + TotalRAM uint64 `json:"total_ram,omitempty"` + AvailableRAM uint64 `json:"available_ram,omitempty"` + GPUVendor string `json:"gpu_vendor,omitempty"` + // GPUComputeCapability is the worker GPU's compute capability ("major.minor", + // e.g. "12.1" for GB10). Used by the router for per-arch option tuning. + GPUComputeCapability string `json:"gpu_compute_capability,omitempty"` + Labels map[string]string `json:"labels,omitempty"` // MaxReplicasPerModel is the per-node cap on replicas of any single model. // Workers older than this field omit it; we coerce 0 → 1 below to preserve // historical single-replica behavior. @@ -152,17 +155,18 @@ func RegisterNodeEndpoint(registry *nodes.NodeRegistry, expectedToken string, au } node := &nodes.BackendNode{ - Name: req.Name, - NodeType: nodeType, - Address: req.Address, - HTTPAddress: req.HTTPAddress, - TokenHash: tokenHash, - TotalVRAM: req.TotalVRAM, - AvailableVRAM: req.AvailableVRAM, - TotalRAM: req.TotalRAM, - AvailableRAM: req.AvailableRAM, - GPUVendor: req.GPUVendor, - MaxReplicasPerModel: maxReplicasPerModel, + Name: req.Name, + NodeType: nodeType, + Address: req.Address, + HTTPAddress: req.HTTPAddress, + TokenHash: tokenHash, + TotalVRAM: req.TotalVRAM, + AvailableVRAM: req.AvailableVRAM, + TotalRAM: req.TotalRAM, + AvailableRAM: req.AvailableRAM, + GPUVendor: req.GPUVendor, + GPUComputeCapability: req.GPUComputeCapability, + MaxReplicasPerModel: maxReplicasPerModel, } ctx := c.Request().Context() diff --git a/core/services/nodes/registry.go b/core/services/nodes/registry.go index 3d34d086c..aafee13cb 100644 --- a/core/services/nodes/registry.go +++ b/core/services/nodes/registry.go @@ -36,6 +36,11 @@ type BackendNode struct { TotalRAM uint64 `gorm:"column:total_ram" json:"total_ram"` // Total system RAM in bytes (fallback when no GPU) AvailableRAM uint64 `gorm:"column:available_ram" json:"available_ram"` // Available system RAM in bytes GPUVendor string `gorm:"column:gpu_vendor;size:32" json:"gpu_vendor"` // nvidia, amd, intel, vulkan, unknown + // GPUComputeCapability is the worker GPU's compute capability as + // "major.minor" (e.g. "12.1" for GB10 / DGX Spark). Reported by the worker + // on registration; used by the router to pick per-arch options (e.g. a + // larger physical batch on Blackwell). Empty when unknown / non-NVIDIA. + GPUComputeCapability string `gorm:"column:gpu_compute_capability;size:16" json:"gpu_compute_capability"` // MaxReplicasPerModel caps how many replicas of any one model can run on // this node concurrently. Default 1 preserves the historical "one // (node, model)" assumption; set higher (via worker --max-replicas-per-model) @@ -69,6 +74,7 @@ const ( ColReservedVRAM = "reserved_vram" ColAvailableRAM = "available_ram" ColGPUVendor = "gpu_vendor" + ColGPUComputeCap = "gpu_compute_capability" ColLastHeartbeat = "last_heartbeat" ColMaxReplicasPerModel = "max_replicas_per_model" ) diff --git a/core/services/nodes/router.go b/core/services/nodes/router.go index e5ce52306..df778a689 100644 --- a/core/services/nodes/router.go +++ b/core/services/nodes/router.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/services/advisorylock" "github.com/mudler/LocalAI/core/services/nodes/prefixcache" "github.com/mudler/LocalAI/pkg/distributedhdr" @@ -138,6 +139,27 @@ type scheduleLoadResult struct { ReplicaIndex int } +// applyNodeHardwareDefaults tunes node-agnostic ModelOptions to the GPU of the +// node that was actually selected to run the model, reusing the same hardware +// heuristics as single-host config loading (core/config). On Blackwell it +// raises the physical batch; on non-Blackwell it resets a hardware-default that +// an upstream host (the GPU-less frontend in distributed mode) guessed higher. +// Only values the heuristics themselves manage are touched, so an explicit user +// batch (e.g. 1024) is never overridden. +func applyNodeHardwareDefaults(opts *pb.ModelOptions, node *BackendNode) { + if opts == nil || node == nil { + return + } + if !config.IsManagedPhysicalBatch(int(opts.NBatch)) { + return + } + opts.NBatch = int32(config.PhysicalBatch(config.GPU{ + Vendor: node.GPUVendor, + ComputeCapability: node.GPUComputeCapability, + VRAM: node.TotalVRAM, + })) +} + // scheduleAndLoad is the shared core for loading a model on a new node. // Used by both Route() (for first-time loads) and ScheduleAndLoadModel() (for reconciler scale-ups). // @@ -153,6 +175,11 @@ func (r *SmartRouter) scheduleAndLoad(ctx context.Context, backendType, tracking return nil, fmt.Errorf("no available nodes: %w", err) } + // Tune node-agnostic options to the SELECTED node's GPU. Only now do we know + // which node (and its compute capability) will run the model — the frontend + // that built modelOpts may have no GPU at all in distributed mode. + applyNodeHardwareDefaults(modelOpts, node) + // Pre-stage model files via FileStager before loading loadOpts := modelOpts if r.fileStager != nil && modelOpts != nil { diff --git a/core/services/nodes/router_hardware_internal_test.go b/core/services/nodes/router_hardware_internal_test.go new file mode 100644 index 000000000..361e02700 --- /dev/null +++ b/core/services/nodes/router_hardware_internal_test.go @@ -0,0 +1,33 @@ +package nodes + +import ( + "github.com/mudler/LocalAI/core/config" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("applyNodeHardwareDefaults", func() { + It("raises a managed default batch on a Blackwell node", func() { + opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch} + applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1"}) + Expect(opts.NBatch).To(BeEquivalentTo(config.BlackwellPhysicalBatch)) + }) + + It("resets a Blackwell guess on a non-Blackwell node", func() { + // frontend (Blackwell) guessed high, but the selected node is not Blackwell + opts := &pb.ModelOptions{NBatch: config.BlackwellPhysicalBatch} + applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "9.0"}) + Expect(opts.NBatch).To(BeEquivalentTo(config.DefaultPhysicalBatch)) + }) + + It("never overrides an explicit (non-managed) batch", func() { + opts := &pb.ModelOptions{NBatch: 1024} + applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1"}) + Expect(opts.NBatch).To(BeEquivalentTo(int32(1024))) + }) + + It("no-ops on nil inputs", func() { + Expect(func() { applyNodeHardwareDefaults(nil, nil) }).ToNot(Panic()) + }) +}) diff --git a/core/services/worker/registration.go b/core/services/worker/registration.go index 87a8a7966..432cc845b 100644 --- a/core/services/worker/registration.go +++ b/core/services/worker/registration.go @@ -73,6 +73,10 @@ func (cfg *Config) registrationBody() map[string]any { // Detect GPU info for VRAM-aware scheduling totalVRAM, _ := xsysinfo.TotalAvailableVRAM() gpuVendor, _ := xsysinfo.DetectGPUVendor() + // Compute capability (e.g. "12.1" for GB10) lets the router pick per-arch + // options (e.g. larger physical batch on Blackwell). Detected on the worker + // because only the worker sees the GPU in distributed mode. + gpuComputeCap := xsysinfo.NVIDIAComputeCapability() maxReplicas := cfg.MaxReplicasPerModel if maxReplicas < 1 { @@ -85,6 +89,7 @@ func (cfg *Config) registrationBody() map[string]any { "total_vram": totalVRAM, "available_vram": totalVRAM, // initially all VRAM is available "gpu_vendor": gpuVendor, + "gpu_compute_capability": gpuComputeCap, "max_replicas_per_model": maxReplicas, } diff --git a/pkg/xsysinfo/computecap_internal_test.go b/pkg/xsysinfo/computecap_internal_test.go new file mode 100644 index 000000000..3bf2602d0 --- /dev/null +++ b/pkg/xsysinfo/computecap_internal_test.go @@ -0,0 +1,23 @@ +package xsysinfo + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("parseComputeCap", func() { + DescribeTable("splits major.minor", + func(in string, maj, min int) { + m, n := parseComputeCap(in) + Expect(m).To(Equal(maj)) + Expect(n).To(Equal(min)) + }, + Entry("GB10 / DGX Spark", "12.1", 12, 1), + Entry("RTX 50-series", "12.0", 12, 0), + Entry("Hopper", "9.0", 9, 0), + Entry("major only", "12", 12, 0), + Entry("whitespace", " 12.1 ", 12, 1), + Entry("empty", "", -1, -1), + Entry("garbage", "abc", -1, -1), + ) +}) diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index a5575edb8..f0185ddeb 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -38,9 +38,9 @@ var UnifiedMemoryDevices = []string{ // GPUMemoryInfo contains real-time GPU memory usage information type GPUMemoryInfo struct { - Index int `json:"index"` - Name string `json:"name"` - Vendor string `json:"vendor"` + Index int `json:"index"` + Name string `json:"name"` + Vendor string `json:"vendor"` // BDF is the canonical PCI bus address (dddd:bb:dd.f) when known. // Populated by detection paths that can attribute the device to a // PCI location (clinfo, future amdgpu/nvidia paths); empty for @@ -307,6 +307,84 @@ func GetGPUAggregateInfo() GPUAggregateInfo { return aggregate } +var ( + computeCapOnce sync.Once + computeCapResult string +) + +// NVIDIAComputeCapability returns the highest NVIDIA GPU compute capability on +// this host as a "major.minor" string (e.g. "12.1" for GB10 / DGX Spark), or "" +// when nvidia-smi is unavailable or reports none. Detected once and cached. +// +// This runs where the GPU actually is. In distributed mode it is reported by +// each worker on registration so the router can make per-node decisions rather +// than guessing from the (possibly GPU-less) frontend host. +func NVIDIAComputeCapability() string { + computeCapOnce.Do(func() { + computeCapResult = detectNVIDIAComputeCapability() + }) + return computeCapResult +} + +func detectNVIDIAComputeCapability() string { + if _, err := exec.LookPath("nvidia-smi"); err != nil { + return "" + } + + cmd := exec.Command("nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + xlog.Debug("nvidia-smi compute_cap query failed", "error", err, "stderr", stderr.String()) + return "" + } + + best := "" + bestMajor, bestMinor := -1, -1 + for _, line := range strings.Split(strings.TrimSpace(stdout.String()), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + maj, min := parseComputeCap(line) + if maj < 0 { + continue + } + if maj > bestMajor || (maj == bestMajor && min > bestMinor) { + bestMajor, bestMinor, best = maj, min, line + } + } + if best != "" { + xlog.Debug("NVIDIA compute capability detected", "compute_cap", best) + } + return best +} + +// parseComputeCap splits a "major.minor" compute-capability string into its +// integer parts. Returns (-1, -1) if it can't be parsed. +func parseComputeCap(cc string) (int, int) { + cc = strings.TrimSpace(cc) + if cc == "" { + return -1, -1 + } + majStr, minStr := cc, "0" + if dot := strings.IndexByte(cc, '.'); dot >= 0 { + majStr, minStr = cc[:dot], cc[dot+1:] + } + maj, err := strconv.Atoi(strings.TrimSpace(majStr)) + if err != nil { + return -1, -1 + } + min, err := strconv.Atoi(strings.TrimSpace(minStr)) + if err != nil { + min = 0 + } + return maj, min +} + // getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi func getNVIDIAGPUMemory() []GPUMemoryInfo { // Check if nvidia-smi is available @@ -866,12 +944,12 @@ func getVulkanGPUMemory() []GPUMemoryInfo { } type vulkanGPUTextInfo struct { - index int - name string - deviceType string - totalVRAM uint64 - budgetVRAM uint64 - usageVRAM uint64 + index int + name string + deviceType string + totalVRAM uint64 + budgetVRAM uint64 + usageVRAM uint64 } func parseVulkanGPUMemoryText(r io.Reader) []GPUMemoryInfo { @@ -909,7 +987,7 @@ func parseVulkanGPUMemoryText(r io.Reader) []GPUMemoryInfo { } else if current.usageVRAM != 0 && current.budgetVRAM == 0 { current.budgetVRAM = current.totalVRAM - current.usageVRAM } else if current.usageVRAM == 0 && current.budgetVRAM == 0 { - current.usageVRAM = 0 + current.usageVRAM = 0 current.budgetVRAM = current.totalVRAM }