mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-20 14:49:09 -04:00
A larger physical batch (n_batch/n_ubatch) materially lifts MoE prefill on NVIDIA Blackwell consumer GPUs (sm_120/121, incl. GB10 / DGX Spark) — measured on a GB10 with Qwen3-Coder-30B-A3B, the prefill ceiling rises (ub512 ~2994 -> ub2048 ~3316 t/s) and saturates around 2048. The heuristic lives in core/config alongside the other config overriders (ApplyInferenceDefaults, guessDefaultsFromFile/NGPULayers) — they all fill the ModelConfig from heuristics, so hardware tuning is the same domain and stays in one place. It is parameterized on a GPU descriptor (not direct detection) so it works in both deployment shapes: - Single host: SetDefaults applies it with the LocalGPU. - Distributed: only the worker sees the GPU, so the worker reports its compute capability on registration (gpu_compute_capability -> BackendNode), and the router re-applies the SAME core/config heuristic for the SELECTED node before loading — fixing the case where the frontend has no GPU at all. Explicit `batch:` always wins (only managed default values are touched). xsysinfo gains NVIDIAComputeCapability() (detection only); all interpretation lives in core/config. Tests: core/config, pkg/xsysinfo, core/services/nodes. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
60 lines
2.1 KiB
Go
60 lines
2.1 KiB
Go
package config_test
|
|
|
|
import (
|
|
. "github.com/mudler/LocalAI/core/config"
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("Hardware-driven config defaults", func() {
|
|
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
|
func(cc string, want bool) {
|
|
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
|
},
|
|
Entry("GB10 12.1", "12.1", true),
|
|
Entry("RTX 50 12.0", "12.0", true),
|
|
Entry("future 13.0", "13.0", true),
|
|
Entry("Hopper 9.0", "9.0", false),
|
|
Entry("Ada 8.9", "8.9", false),
|
|
Entry("datacenter Blackwell sm_100 10.0", "10.0", false),
|
|
Entry("unknown", "", false),
|
|
)
|
|
|
|
Describe("PhysicalBatch / IsManagedPhysicalBatch", func() {
|
|
It("returns the Blackwell batch on Blackwell", func() {
|
|
Expect(PhysicalBatch(GPU{ComputeCapability: "12.1"})).To(Equal(BlackwellPhysicalBatch))
|
|
})
|
|
It("returns the default batch otherwise", func() {
|
|
Expect(PhysicalBatch(GPU{ComputeCapability: "9.0"})).To(Equal(DefaultPhysicalBatch))
|
|
Expect(PhysicalBatch(GPU{})).To(Equal(DefaultPhysicalBatch))
|
|
})
|
|
It("recognizes managed defaults but not explicit values", func() {
|
|
Expect(IsManagedPhysicalBatch(DefaultPhysicalBatch)).To(BeTrue())
|
|
Expect(IsManagedPhysicalBatch(BlackwellPhysicalBatch)).To(BeTrue())
|
|
Expect(IsManagedPhysicalBatch(1024)).To(BeFalse())
|
|
})
|
|
})
|
|
|
|
Describe("ApplyHardwareDefaults", func() {
|
|
It("raises an unset batch to 2048 on Blackwell", func() {
|
|
cfg := &ModelConfig{}
|
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
|
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
|
})
|
|
It("leaves batch unset on non-Blackwell", func() {
|
|
cfg := &ModelConfig{}
|
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
|
|
Expect(cfg.Batch).To(Equal(0))
|
|
})
|
|
It("never overrides an explicit batch", func() {
|
|
cfg := &ModelConfig{}
|
|
cfg.Batch = 1024
|
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
|
Expect(cfg.Batch).To(Equal(1024))
|
|
})
|
|
It("no-ops on nil", func() {
|
|
Expect(func() { ApplyHardwareDefaults(nil, GPU{ComputeCapability: "12.1"}) }).ToNot(Panic())
|
|
})
|
|
})
|
|
})
|