mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 10:47:23 -04:00
* fix(gpu-detect): clinfo --json fallback for Intel discrete VRAM
ghw returns 0 VRAM for any i915-driven Intel GPU because the kernel
driver doesn't expose VRAM through the sysfs paths ghw checks (no
mem_info_vram_total — that's an amdgpu interface). xpu-smi, the
canonical Intel tool, isn't in the oneAPI base image (it lives in a
separate xpumanager package). The capability gate added in 19c92c70
("default to CPU if there is less than 4GB of GPU available") then
demotes the host to CPU even on a 16 GB Arc A770.
clinfo ships with the OpenCL ICD loader and is present in the oneAPI
base image, so plug it in as the last-resort Intel VRAM source:
xpu-smi -> intel_gpu_top -> clinfo --json
The parser drops UMA devices via HOST_UNIFIED_MEMORY=true so an iGPU
sibling can't double-count system RAM, and dedups by PCI BDF when
multiple ICDs enumerate the same physical device (POCL caps reported
GLOBAL_MEM_SIZE at 4 GiB; the largest non-capped value wins).
Subprocess is wrapped in a 2s timeout and memoised with sync.OnceValue
— GPU hardware is static for the process lifetime. The Intel branch
also short-circuits when ghw saw no Intel vendor, so NVIDIA-only hosts
don't pay the spawn cost.
Verified end-to-end on Intel Arc A770: ghw -> 0, clinfo path reports
16,225,243,136 bytes (15.11 GiB), capability gate now passes naturally
without LOCALAI_FORCE_META_BACKEND_CAPABILITY=intel.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
* feat(gpu-detect): live VRAM usage from DRM fdinfo
The clinfo fallback reports total VRAM correctly but leaves UsedVRAM
at 0 because OpenCL has no portable live-memory property — the UI
ends up showing 0% utilisation even when llama-cpp is actually
holding gigabytes in device memory.
Fill that gap with the standardised Linux DRM fdinfo interface
(Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19). Walking
/proc/<pid>/fdinfo for any fd that points at /dev/dri/render* yields
drm-total-<region> / drm-resident-<region> keys; aggregate per
render-node, resolve the render node to a PCI BDF via
/sys/class/drm/<name>/device, and merge the result into the matching
GPUMemoryInfo by BDF.
Region naming is driver-defined — i915 uses "local0" for device-local
VRAM, amdgpu and xe use "vram0" — so a prefix-match on local/vram
covers all three DRM drivers that LocalAI cares about. system/gtt/
stolen regions are deliberately excluded since they're host RAM
mirrors and would double-count against system RAM.
GPUMemoryInfo gains an optional BDF field (`bdf,omitempty` in JSON)
so future vendor-specific detectors can plug into the same matcher.
Empty BDF skips the merge — non-PCI devices and detection paths that
don't surface PCI location keep their existing behaviour.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
---------
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
143 lines
4.3 KiB
Go
143 lines
4.3 KiB
Go
package xsysinfo
|
|
|
|
import (
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
// i915FdInfo is a captured /proc/<pid>/fdinfo/<fd> from a llama-cpp
|
|
// process holding an Intel Arc render-node fd. "local0" is i915's
|
|
// device-local VRAM region; system0 is host-visible buffer mirror.
|
|
const i915FdInfo = `pos: 0
|
|
flags: 02100002
|
|
mnt_id: 16
|
|
ino: 1234
|
|
drm-driver: i915
|
|
drm-client-id: 42
|
|
drm-pdev: 0000:03:00.0
|
|
drm-total-system0: 312 KiB
|
|
drm-resident-system0: 312 KiB
|
|
drm-total-local0: 5396348 KiB
|
|
drm-resident-local0: 5396348 KiB
|
|
drm-total-stolen-local0: 0
|
|
drm-resident-stolen-local0: 0
|
|
drm-engine-render: 1234567 ns
|
|
drm-engine-copy: 2345 ns
|
|
drm-engine-video: 0 ns
|
|
drm-engine-capacity-video: 2
|
|
`
|
|
|
|
// amdgpuFdInfo mirrors the i915 schema with AMD's region names. amdgpu
|
|
// uses "vram0" for device-local and "gtt0" for host-pinned memory.
|
|
const amdgpuFdInfo = `pos: 0
|
|
flags: 02100002
|
|
mnt_id: 16
|
|
drm-driver: amdgpu
|
|
drm-pdev: 0000:0a:00.0
|
|
drm-total-vram0: 8589934592 B
|
|
drm-resident-vram0: 8589934592 B
|
|
drm-total-gtt0: 1048576 B
|
|
drm-resident-gtt0: 1048576 B
|
|
drm-engine-gfx: 123456 ns
|
|
`
|
|
|
|
// systemOnlyFdInfo: a DRM client that only allocates host buffers
|
|
// (CPU-only fallback, GUI compositor, etc.). VRAM total must be 0.
|
|
const systemOnlyFdInfo = `drm-driver: i915
|
|
drm-total-system0: 16384 KiB
|
|
drm-resident-system0: 16384 KiB
|
|
drm-total-local0: 0
|
|
`
|
|
|
|
// noDRMFdInfo: regular file fd (e.g. socket, pipe). Parser must return
|
|
// 0 without panicking.
|
|
const noDRMFdInfo = `pos: 0
|
|
flags: 02100002
|
|
mnt_id: 16
|
|
ino: 5678
|
|
`
|
|
|
|
// bareBytesFdInfo: older kernels emit byte counts without a unit
|
|
// suffix. Must be parsed as raw bytes, not multiplied by 1024.
|
|
const bareBytesFdInfo = `drm-driver: xe
|
|
drm-total-vram0: 17179869184
|
|
drm-resident-vram0: 17179869184
|
|
`
|
|
|
|
var _ = Describe("parseDRMFdInfoVRAM", func() {
|
|
DescribeTable("extracts device-local VRAM totals from fdinfo",
|
|
func(input string, want uint64) {
|
|
Expect(parseDRMFdInfoVRAM([]byte(input))).To(Equal(want))
|
|
},
|
|
Entry("empty input", "", uint64(0)),
|
|
Entry("non-DRM fdinfo", noDRMFdInfo, uint64(0)),
|
|
Entry("system-only client reports 0 VRAM", systemOnlyFdInfo, uint64(0)),
|
|
Entry("i915 local0 in KiB", i915FdInfo, uint64(5396348*1024)),
|
|
Entry("amdgpu vram0 in bytes", amdgpuFdInfo, uint64(8589934592)),
|
|
Entry("xe vram0 bare bytes", bareBytesFdInfo, uint64(17179869184)),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("parseDRMFdInfoBytes", func() {
|
|
DescribeTable("parses sizes with and without unit suffixes",
|
|
func(in string, want uint64) {
|
|
Expect(parseDRMFdInfoBytes(in)).To(Equal(want))
|
|
},
|
|
Entry("bare bytes", "\t1024", uint64(1024)),
|
|
Entry("KiB", "\t1024 KiB", uint64(1024*1024)),
|
|
Entry("MiB", "\t512 MiB", uint64(512*1024*1024)),
|
|
Entry("GiB", "\t2 GiB", uint64(2*1024*1024*1024)),
|
|
Entry("unrecognised unit falls through to raw bytes", "\t1024 B", uint64(1024)),
|
|
Entry("empty", "", uint64(0)),
|
|
Entry("not a number", "\tnotanumber KiB", uint64(0)),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("isVRAMRegion", func() {
|
|
DescribeTable("recognises device-local regions",
|
|
func(region string, want bool) {
|
|
Expect(isVRAMRegion(region)).To(Equal(want))
|
|
},
|
|
Entry("local0", "local0", true),
|
|
Entry("local1", "local1", true),
|
|
Entry("vram0", "vram0", true),
|
|
Entry("vram1", "vram1", true),
|
|
Entry("system0", "system0", false),
|
|
Entry("gtt0", "gtt0", false),
|
|
Entry("stolen-local0", "stolen-local0", false),
|
|
Entry("stolen-system0", "stolen-system0", false),
|
|
Entry("cpu", "cpu", false),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("applyDRMUsage", func() {
|
|
const total = uint64(16225243136)
|
|
base := GPUMemoryInfo{Name: "Arc A770", TotalVRAM: total, FreeVRAM: total}
|
|
|
|
It("leaves defaults untouched when there is no usage", func() {
|
|
got := applyDRMUsage(base, 0)
|
|
Expect(got.UsedVRAM).To(Equal(uint64(0)))
|
|
Expect(got.FreeVRAM).To(Equal(total))
|
|
Expect(got.UsagePercent).To(Equal(float64(0)))
|
|
})
|
|
|
|
It("rederives free and percent from usage", func() {
|
|
used := uint64(5_396_348 * 1024)
|
|
got := applyDRMUsage(base, used)
|
|
Expect(got.UsedVRAM).To(Equal(used))
|
|
Expect(got.FreeVRAM).To(Equal(total - used))
|
|
Expect(got.UsagePercent).To(Equal(float64(used) / float64(total) * 100))
|
|
})
|
|
|
|
It("clamps over-commit to total", func() {
|
|
got := applyDRMUsage(base, total*2)
|
|
Expect(got.UsedVRAM).To(Equal(total))
|
|
Expect(got.FreeVRAM).To(Equal(uint64(0)))
|
|
})
|
|
|
|
It("guards against div-by-zero on zero total", func() {
|
|
got := applyDRMUsage(GPUMemoryInfo{}, 1024)
|
|
Expect(got.UsagePercent).To(Equal(float64(0)))
|
|
})
|
|
})
|