diff --git a/core/config/hardware_defaults.go b/core/config/hardware_defaults.go index 617e01632..d4c6b09f7 100644 --- a/core/config/hardware_defaults.go +++ b/core/config/hardware_defaults.go @@ -70,10 +70,11 @@ func IsManagedPhysicalBatch(n int) bool { return n == DefaultPhysicalBatch || n == BlackwellPhysicalBatch } -// LocalGPU builds a GPU descriptor from local detection. Used by SetDefaults on -// a single host; the distributed router builds the descriptor from the selected -// node's reported info instead. -func LocalGPU() GPU { +// localGPU builds a GPU descriptor from local detection, used by SetDefaults on +// a single host (the distributed router builds it from the selected node's +// reported info instead). It is a package var so tests can inject a +// deterministic device — detection does a live nvidia-smi call. +var localGPU = func() GPU { vendor, _ := xsysinfo.DetectGPUVendor() return GPU{ Vendor: vendor, diff --git a/core/config/hardware_defaults_internal_test.go b/core/config/hardware_defaults_internal_test.go new file mode 100644 index 000000000..52c674c2d --- /dev/null +++ b/core/config/hardware_defaults_internal_test.go @@ -0,0 +1,37 @@ +package config + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Single-instance path: SetDefaults applies hardware defaults from the local +// GPU. The detection seam (localGPU) is injected so the path is deterministic +// without a real GPU. +var _ = Describe("SetDefaults hardware defaults (single-instance)", func() { + var orig func() GPU + BeforeEach(func() { orig = localGPU }) + AfterEach(func() { localGPU = orig }) + + It("sets the physical batch on a local Blackwell GPU", func() { + localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} } + cfg := &ModelConfig{} + cfg.SetDefaults() + Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch)) + }) + + It("leaves batch unset on a non-Blackwell local GPU", func() { + localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} } + cfg := &ModelConfig{} + cfg.SetDefaults() + Expect(cfg.Batch).To(Equal(0)) + }) + + It("never overrides an explicit batch", func() { + localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} } + cfg := &ModelConfig{} + cfg.Batch = 1024 + cfg.SetDefaults() + Expect(cfg.Batch).To(Equal(1024)) + }) +}) diff --git a/core/config/model_config.go b/core/config/model_config.go index b57395916..75136ec6c 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -1114,7 +1114,7 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) { // Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell). // Uses the local GPU here; in distributed mode the router re-applies the same // heuristics for the selected node's GPU before loading. Explicit config wins. - ApplyHardwareDefaults(cfg, LocalGPU()) + ApplyHardwareDefaults(cfg, localGPU()) // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22 defaultTopP := 0.95