Files
LocalAI/core/config/hardware_defaults_test.go
Ettore Di Giacinto bca250e2bd feat(config): node-aware hardware defaults — larger physical batch on Blackwell
A larger physical batch (n_batch/n_ubatch) materially lifts MoE prefill on
NVIDIA Blackwell consumer GPUs (sm_120/121, incl. GB10 / DGX Spark) — measured
on a GB10 with Qwen3-Coder-30B-A3B, the prefill ceiling rises (ub512 ~2994 ->
ub2048 ~3316 t/s) and saturates around 2048.

The heuristic lives in core/config alongside the other config overriders
(ApplyInferenceDefaults, guessDefaultsFromFile/NGPULayers) — they all fill the
ModelConfig from heuristics, so hardware tuning is the same domain and stays in
one place. It is parameterized on a GPU descriptor (not direct detection) so it
works in both deployment shapes:

- Single host: SetDefaults applies it with the LocalGPU.
- Distributed: only the worker sees the GPU, so the worker reports its compute
  capability on registration (gpu_compute_capability -> BackendNode), and the
  router re-applies the SAME core/config heuristic for the SELECTED node before
  loading — fixing the case where the frontend has no GPU at all.

Explicit `batch:` always wins (only managed default values are touched).
xsysinfo gains NVIDIAComputeCapability() (detection only); all interpretation
lives in core/config. Tests: core/config, pkg/xsysinfo, core/services/nodes.

Assisted-by: Claude:opus-4.8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-19 22:02:14 +00:00

60 lines
2.1 KiB
Go

package config_test
import (
. "github.com/mudler/LocalAI/core/config"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Hardware-driven config defaults", func() {
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
func(cc string, want bool) {
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
},
Entry("GB10 12.1", "12.1", true),
Entry("RTX 50 12.0", "12.0", true),
Entry("future 13.0", "13.0", true),
Entry("Hopper 9.0", "9.0", false),
Entry("Ada 8.9", "8.9", false),
Entry("datacenter Blackwell sm_100 10.0", "10.0", false),
Entry("unknown", "", false),
)
Describe("PhysicalBatch / IsManagedPhysicalBatch", func() {
It("returns the Blackwell batch on Blackwell", func() {
Expect(PhysicalBatch(GPU{ComputeCapability: "12.1"})).To(Equal(BlackwellPhysicalBatch))
})
It("returns the default batch otherwise", func() {
Expect(PhysicalBatch(GPU{ComputeCapability: "9.0"})).To(Equal(DefaultPhysicalBatch))
Expect(PhysicalBatch(GPU{})).To(Equal(DefaultPhysicalBatch))
})
It("recognizes managed defaults but not explicit values", func() {
Expect(IsManagedPhysicalBatch(DefaultPhysicalBatch)).To(BeTrue())
Expect(IsManagedPhysicalBatch(BlackwellPhysicalBatch)).To(BeTrue())
Expect(IsManagedPhysicalBatch(1024)).To(BeFalse())
})
})
Describe("ApplyHardwareDefaults", func() {
It("raises an unset batch to 2048 on Blackwell", func() {
cfg := &ModelConfig{}
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
})
It("leaves batch unset on non-Blackwell", func() {
cfg := &ModelConfig{}
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
Expect(cfg.Batch).To(Equal(0))
})
It("never overrides an explicit batch", func() {
cfg := &ModelConfig{}
cfg.Batch = 1024
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
Expect(cfg.Batch).To(Equal(1024))
})
It("no-ops on nil", func() {
Expect(func() { ApplyHardwareDefaults(nil, GPU{ComputeCapability: "12.1"}) }).ToNot(Panic())
})
})
})