mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-07 00:06:51 -04:00
* fix(gpu-detect): clinfo --json fallback for Intel discrete VRAM
ghw returns 0 VRAM for any i915-driven Intel GPU because the kernel
driver doesn't expose VRAM through the sysfs paths ghw checks (no
mem_info_vram_total — that's an amdgpu interface). xpu-smi, the
canonical Intel tool, isn't in the oneAPI base image (it lives in a
separate xpumanager package). The capability gate added in 19c92c70
("default to CPU if there is less than 4GB of GPU available") then
demotes the host to CPU even on a 16 GB Arc A770.
clinfo ships with the OpenCL ICD loader and is present in the oneAPI
base image, so plug it in as the last-resort Intel VRAM source:
xpu-smi -> intel_gpu_top -> clinfo --json
The parser drops UMA devices via HOST_UNIFIED_MEMORY=true so an iGPU
sibling can't double-count system RAM, and dedups by PCI BDF when
multiple ICDs enumerate the same physical device (POCL caps reported
GLOBAL_MEM_SIZE at 4 GiB; the largest non-capped value wins).
Subprocess is wrapped in a 2s timeout and memoised with sync.OnceValue
— GPU hardware is static for the process lifetime. The Intel branch
also short-circuits when ghw saw no Intel vendor, so NVIDIA-only hosts
don't pay the spawn cost.
Verified end-to-end on Intel Arc A770: ghw -> 0, clinfo path reports
16,225,243,136 bytes (15.11 GiB), capability gate now passes naturally
without LOCALAI_FORCE_META_BACKEND_CAPABILITY=intel.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
* feat(gpu-detect): live VRAM usage from DRM fdinfo
The clinfo fallback reports total VRAM correctly but leaves UsedVRAM
at 0 because OpenCL has no portable live-memory property — the UI
ends up showing 0% utilisation even when llama-cpp is actually
holding gigabytes in device memory.
Fill that gap with the standardised Linux DRM fdinfo interface
(Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19). Walking
/proc/<pid>/fdinfo for any fd that points at /dev/dri/render* yields
drm-total-<region> / drm-resident-<region> keys; aggregate per
render-node, resolve the render node to a PCI BDF via
/sys/class/drm/<name>/device, and merge the result into the matching
GPUMemoryInfo by BDF.
Region naming is driver-defined — i915 uses "local0" for device-local
VRAM, amdgpu and xe use "vram0" — so a prefix-match on local/vram
covers all three DRM drivers that LocalAI cares about. system/gtt/
stolen regions are deliberately excluded since they're host RAM
mirrors and would double-count against system RAM.
GPUMemoryInfo gains an optional BDF field (`bdf,omitempty` in JSON)
so future vendor-specific detectors can plug into the same matcher.
Empty BDF skips the merge — non-PCI devices and detection paths that
don't surface PCI location keep their existing behaviour.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
---------
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
192 lines
5.9 KiB
Go
192 lines
5.9 KiB
Go
package xsysinfo
|
|
|
|
import (
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
const nvidiaRTX5070TiJSON = `{
|
|
"devices": [
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "NVIDIA GeForce RTX 5070 Ti",
|
|
"CL_DEVICE_VENDOR": "NVIDIA Corporation",
|
|
"CL_DEVICE_VENDOR_ID": 4318,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 16609378304,
|
|
"CL_DEVICE_PCI_BUS_INFO_KHR": "PCI-E, 0000:01:00.0",
|
|
"CL_DEVICE_PCI_BUS_ID_NV": 1,
|
|
"CL_DEVICE_PCI_SLOT_ID_NV": 0,
|
|
"CL_DEVICE_PCI_DOMAIN_ID_NV": 0
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}`
|
|
|
|
// intelArcPlusIGPUJSON exercises the HOST_UNIFIED_MEMORY=true filter:
|
|
// the iGPU sibling on the same Intel platform must be dropped to
|
|
// avoid double-counting system RAM as VRAM.
|
|
const intelArcPlusIGPUJSON = `{
|
|
"devices": [
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "Intel(R) Arc(TM) A770 Graphics",
|
|
"CL_DEVICE_VENDOR": "Intel(R) Corporation",
|
|
"CL_DEVICE_VENDOR_ID": 32902,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 16225243136,
|
|
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
|
|
},
|
|
{
|
|
"CL_DEVICE_NAME": "Intel(R) UHD Graphics 770",
|
|
"CL_DEVICE_VENDOR": "Intel(R) Corporation",
|
|
"CL_DEVICE_VENDOR_ID": 32902,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": true,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 25000000000,
|
|
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:00:02.0"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}`
|
|
|
|
// dualICDSameDeviceJSON exercises BDF dedup when two ICDs enumerate
|
|
// the same physical device with different reported sizes (POCL caps
|
|
// at 4 GiB for legacy alloc-size compatibility).
|
|
const dualICDSameDeviceJSON = `{
|
|
"devices": [
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "Intel(R) Arc(TM) A770 Graphics",
|
|
"CL_DEVICE_VENDOR_ID": 32902,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 16225243136,
|
|
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "pthread-Arc-A770",
|
|
"CL_DEVICE_VENDOR_ID": 32902,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 4294967296,
|
|
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}`
|
|
|
|
// cpuOnlyJSON: a POCL-only host. Filtered by CL_DEVICE_TYPE — without
|
|
// this guard CPU memory would be mistakenly reported as VRAM.
|
|
const cpuOnlyJSON = `{
|
|
"devices": [
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "pthread-x86_64",
|
|
"CL_DEVICE_VENDOR": "GenuineIntel",
|
|
"CL_DEVICE_VENDOR_ID": 32902,
|
|
"CL_DEVICE_TYPE": {"raw": 2, "type": ["CL_DEVICE_TYPE_CPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": true,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 33324494848
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}`
|
|
|
|
// noBDFJSON: an ICD that reports no PCI fields at all. Device is
|
|
// still counted but doesn't participate in dedup.
|
|
const noBDFJSON = `{
|
|
"devices": [
|
|
{
|
|
"online": [
|
|
{
|
|
"CL_DEVICE_NAME": "Some Accelerator GPU",
|
|
"CL_DEVICE_VENDOR_ID": 0,
|
|
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
|
|
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
|
|
"CL_DEVICE_GLOBAL_MEM_SIZE": 8589934592
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}`
|
|
|
|
var _ = Describe("parseCLInfoJSON", func() {
|
|
DescribeTable("classifies and dedups clinfo devices",
|
|
func(input string, wantCount int, want []GPUMemoryInfo) {
|
|
got := parseCLInfoJSON([]byte(input))
|
|
Expect(got).To(HaveLen(wantCount))
|
|
for i, w := range want {
|
|
Expect(got[i].Name).To(Equal(w.Name))
|
|
Expect(got[i].Vendor).To(Equal(w.Vendor))
|
|
Expect(got[i].TotalVRAM).To(Equal(w.TotalVRAM))
|
|
}
|
|
},
|
|
Entry("empty object returns nothing", `{}`, 0, nil),
|
|
Entry("malformed JSON returns nothing without panicking", `{not valid`, 0, nil),
|
|
Entry("CPU-only platform is filtered out", cpuOnlyJSON, 0, nil),
|
|
Entry("NVIDIA dGPU is recognised by vendor ID and BDF",
|
|
nvidiaRTX5070TiJSON, 1, []GPUMemoryInfo{{
|
|
Name: "NVIDIA GeForce RTX 5070 Ti",
|
|
Vendor: VendorNVIDIA,
|
|
TotalVRAM: 16609378304,
|
|
}}),
|
|
Entry("Intel Arc with iGPU sibling: iGPU dropped, Arc reported",
|
|
intelArcPlusIGPUJSON, 1, []GPUMemoryInfo{{
|
|
Name: "Intel(R) Arc(TM) A770 Graphics",
|
|
Vendor: VendorIntel,
|
|
TotalVRAM: 16225243136,
|
|
}}),
|
|
Entry("Dual ICD enumerating same Arc: deduped, larger size wins",
|
|
dualICDSameDeviceJSON, 1, []GPUMemoryInfo{{
|
|
Name: "Intel(R) Arc(TM) A770 Graphics",
|
|
Vendor: VendorIntel,
|
|
TotalVRAM: 16225243136, // not the POCL 4 GiB cap
|
|
}}),
|
|
Entry("Device without PCI info is still counted",
|
|
noBDFJSON, 1, []GPUMemoryInfo{{
|
|
Name: "Some Accelerator GPU",
|
|
Vendor: VendorUnknown,
|
|
TotalVRAM: 8589934592,
|
|
}}),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("normalizeBDF", func() {
|
|
DescribeTable("canonicalises PCI bus addresses",
|
|
func(in, want string) {
|
|
Expect(normalizeBDF(in)).To(Equal(want))
|
|
},
|
|
Entry("already canonical", "0000:03:00.0", "0000:03:00.0"),
|
|
Entry("missing domain", "03:00.0", "0000:03:00.0"),
|
|
Entry("uppercase hex", "AB:CD.0", "0000:ab:cd.0"),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("clinfoBDF", func() {
|
|
It("synthesises a canonical BDF from NVIDIA pre-KHR integer fields", func() {
|
|
// Older NVIDIA OpenCL exposes BDF via three integer fields instead
|
|
// of the KHR string; the synthesised result must be canonical.
|
|
d := clinfoDevice{
|
|
PCIBusNV: 1,
|
|
PCISlotNV: 0,
|
|
PCIDomainNV: 0,
|
|
}
|
|
Expect(clinfoBDF(d)).To(Equal("0000:01:00.0"))
|
|
})
|
|
})
|