fix(intel): VRAM detection (#9944)

* fix(gpu-detect): clinfo --json fallback for Intel discrete VRAM

ghw returns 0 VRAM for any i915-driven Intel GPU because the kernel
driver doesn't expose VRAM through the sysfs paths ghw checks (no
mem_info_vram_total — that's an amdgpu interface). xpu-smi, the
canonical Intel tool, isn't in the oneAPI base image (it lives in a
separate xpumanager package). The capability gate added in 19c92c70
("default to CPU if there is less than 4GB of GPU available") then
demotes the host to CPU even on a 16 GB Arc A770.

clinfo ships with the OpenCL ICD loader and is present in the oneAPI
base image, so plug it in as the last-resort Intel VRAM source:

  xpu-smi -> intel_gpu_top -> clinfo --json

The parser drops UMA devices via HOST_UNIFIED_MEMORY=true so an iGPU
sibling can't double-count system RAM, and dedups by PCI BDF when
multiple ICDs enumerate the same physical device (POCL caps reported
GLOBAL_MEM_SIZE at 4 GiB; the largest non-capped value wins).

Subprocess is wrapped in a 2s timeout and memoised with sync.OnceValue
— GPU hardware is static for the process lifetime. The Intel branch
also short-circuits when ghw saw no Intel vendor, so NVIDIA-only hosts
don't pay the spawn cost.

Verified end-to-end on Intel Arc A770: ghw -> 0, clinfo path reports
16,225,243,136 bytes (15.11 GiB), capability gate now passes naturally
without LOCALAI_FORCE_META_BACKEND_CAPABILITY=intel.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat(gpu-detect): live VRAM usage from DRM fdinfo

The clinfo fallback reports total VRAM correctly but leaves UsedVRAM
at 0 because OpenCL has no portable live-memory property — the UI
ends up showing 0% utilisation even when llama-cpp is actually
holding gigabytes in device memory.

Fill that gap with the standardised Linux DRM fdinfo interface
(Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19). Walking
/proc/<pid>/fdinfo for any fd that points at /dev/dri/render* yields
drm-total-<region> / drm-resident-<region> keys; aggregate per
render-node, resolve the render node to a PCI BDF via
/sys/class/drm/<name>/device, and merge the result into the matching
GPUMemoryInfo by BDF.

Region naming is driver-defined — i915 uses "local0" for device-local
VRAM, amdgpu and xe use "vram0" — so a prefix-match on local/vram
covers all three DRM drivers that LocalAI cares about. system/gtt/
stolen regions are deliberately excluded since they're host RAM
mirrors and would double-count against system RAM.

GPUMemoryInfo gains an optional BDF field (`bdf,omitempty` in JSON)
so future vendor-specific detectors can plug into the same matcher.
Empty BDF skips the merge — non-PCI devices and detection paths that
don't surface PCI location keep their existing behaviour.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Richard Palethorpe
2026-05-25 08:29:00 +01:00
committed by GitHub
parent 6a80e23733
commit 90ea327178
6 changed files with 759 additions and 6 deletions

221
pkg/xsysinfo/clinfo.go Normal file
View File

@@ -0,0 +1,221 @@
package xsysinfo
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os/exec"
"strings"
"sync"
"time"
"github.com/mudler/xlog"
)
const (
clDeviceTypeGPU = "CL_DEVICE_TYPE_GPU"
clinfoTimeout = 2 * time.Second
)
// clinfoOutput is the subset of `clinfo --json` we read. clinfo emits
// one entry under "devices" per platform, in the same order as
// "platforms"; live devices are under "online".
type clinfoOutput struct {
Devices []struct {
Online []clinfoDevice `json:"online"`
} `json:"devices"`
}
type clinfoDevice struct {
Name string `json:"CL_DEVICE_NAME"`
Vendor string `json:"CL_DEVICE_VENDOR"`
VendorID uint32 `json:"CL_DEVICE_VENDOR_ID"`
Type clinfoTypeProp `json:"CL_DEVICE_TYPE"`
HostUnifiedMemory bool `json:"CL_DEVICE_HOST_UNIFIED_MEMORY"`
GlobalMemSize uint64 `json:"CL_DEVICE_GLOBAL_MEM_SIZE"`
PCIBusInfoKHR string `json:"CL_DEVICE_PCI_BUS_INFO_KHR"`
PCIDomainNV int `json:"CL_DEVICE_PCI_DOMAIN_ID_NV"`
PCIBusNV int `json:"CL_DEVICE_PCI_BUS_ID_NV"`
PCISlotNV int `json:"CL_DEVICE_PCI_SLOT_ID_NV"`
}
// clinfoTypeProp matches against the type-string array rather than
// CL_DEVICE_TYPE.raw so a future CL_DEVICE_TYPE_CUSTOM can't sneak
// past as a GPU.
type clinfoTypeProp struct {
Raw uint32 `json:"raw"`
Type []string `json:"type"`
}
func (t clinfoTypeProp) isGPU() bool {
for _, s := range t.Type {
if s == clDeviceTypeGPU {
return true
}
}
return false
}
// clinfoOnce caches the result for the process lifetime. GPU hardware
// doesn't change between calls and the subprocess is ~150 ms.
var clinfoOnce = sync.OnceValue(runCLInfo)
func runCLInfo() []GPUMemoryInfo {
if _, err := exec.LookPath("clinfo"); err != nil {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), clinfoTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "clinfo", "--json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("clinfo failed", "error", err, "stderr", stderr.String())
return nil
}
return parseCLInfoJSON(stdout.Bytes())
}
// getCLInfoGPUMemory is a best-effort fallback for hosts where the
// vendor's own management binary (nvidia-smi / xpu-smi / rocm-smi)
// isn't installed but the OpenCL ICD is. Live used/free aren't exposed
// via standard CL_ properties; we synthesise them by attributing
// per-process VRAM allocations from the kernel DRM fdinfo interface
// to each clinfo-reported GPU via the shared PCI BDF.
func getCLInfoGPUMemory() []GPUMemoryInfo {
gpus := clinfoOnce()
if len(gpus) == 0 {
return nil
}
usage := drmFdInfoUsageByBDF()
for i := range gpus {
gpus[i] = applyDRMUsage(gpus[i], usage[gpus[i].BDF])
}
return gpus
}
// applyDRMUsage stamps live VRAM accounting onto a GPUMemoryInfo
// whose TotalVRAM came from a static source (e.g. clinfo). Caller
// already populated TotalVRAM and FreeVRAM=TotalVRAM as defaults; if
// DRM accounting reports usage, we trust it and rederive free/percent.
func applyDRMUsage(g GPUMemoryInfo, used uint64) GPUMemoryInfo {
if used == 0 || g.TotalVRAM == 0 {
return g
}
if used > g.TotalVRAM {
// Process-private DRM total can momentarily exceed device
// VRAM (over-commit via host memory mirror). Clamp so the UI
// doesn't display absurd percentages.
used = g.TotalVRAM
}
g.UsedVRAM = used
g.FreeVRAM = g.TotalVRAM - used
g.UsagePercent = float64(used) / float64(g.TotalVRAM) * 100
return g
}
// parseCLInfoJSON returns one GPUMemoryInfo per discrete GPU. UMA
// devices (iGPU/APU) are dropped because their "VRAM" is system RAM
// and would double-count against the capability gate. When the same
// physical device is enumerated by multiple ICDs (Intel OpenCL + POCL,
// for example), the BDF dedup keeps the largest reported size — some
// ICDs cap at 4 GiB for legacy alloc-size compatibility.
func parseCLInfoJSON(raw []byte) []GPUMemoryInfo {
var out clinfoOutput
if err := json.Unmarshal(raw, &out); err != nil {
xlog.Debug("clinfo: failed to parse --json output", "error", err)
return nil
}
byBDF := map[string]GPUMemoryInfo{}
var noBDF []GPUMemoryInfo
for _, plat := range out.Devices {
for _, d := range plat.Online {
if !d.Type.isGPU() || d.HostUnifiedMemory || d.GlobalMemSize == 0 {
continue
}
bdf := clinfoBDF(d)
info := GPUMemoryInfo{
Name: strings.TrimSpace(d.Name),
Vendor: clinfoVendor(d.VendorID, d.Vendor),
BDF: bdf,
TotalVRAM: d.GlobalMemSize,
FreeVRAM: d.GlobalMemSize,
}
if bdf == "" {
noBDF = append(noBDF, info)
continue
}
if existing, ok := byBDF[bdf]; !ok || info.TotalVRAM > existing.TotalVRAM {
byBDF[bdf] = info
}
}
}
all := make([]GPUMemoryInfo, 0, len(byBDF)+len(noBDF))
for _, g := range byBDF {
all = append(all, g)
}
all = append(all, noBDF...)
for i := range all {
all[i].Index = i
}
return all
}
func clinfoVendor(vendorID uint32, name string) string {
switch vendorID {
case 0x10de:
return VendorNVIDIA
case 0x1002, 0x1022: // 0x1022 is the AMD CPU vendor ID, also reported by some APU OpenCL devices.
return VendorAMD
case 0x8086:
return VendorIntel
case 0x106B:
return VendorApple
}
n := strings.ToLower(name)
switch {
case strings.Contains(n, "nvidia"):
return VendorNVIDIA
case strings.Contains(n, "advanced micro devices"), strings.Contains(n, "amd"):
return VendorAMD
case strings.Contains(n, "intel"):
return VendorIntel
case strings.Contains(n, "apple"):
return VendorApple
}
return VendorUnknown
}
// clinfoBDF returns the device's canonical `dddd:bb:dd.f` PCI address,
// or "" when no PCI location is reported. The KHR form is `"PCI-E,
// 0000:01:00.0"` on NVIDIA and bare `"0000:01:00.0"` on most others.
func clinfoBDF(d clinfoDevice) string {
if d.PCIBusInfoKHR != "" {
s := d.PCIBusInfoKHR
if i := strings.LastIndex(s, " "); i >= 0 {
s = s[i+1:]
}
if c := strings.Count(s, ":"); c == 1 || c == 2 {
return normalizeBDF(s)
}
}
// NVIDIA pre-KHR per-axis fields. An all-zero result is
// indistinguishable from "fields absent", but no GPU sits at
// 0000:00:00.0 so the false negative is harmless.
if d.PCIBusNV != 0 || d.PCISlotNV != 0 || d.PCIDomainNV != 0 {
return fmt.Sprintf("%04x:%02x:%02x.0", d.PCIDomainNV, d.PCIBusNV, d.PCISlotNV)
}
return ""
}
func normalizeBDF(s string) string {
if strings.Count(s, ":") == 1 {
return strings.ToLower("0000:" + s)
}
return strings.ToLower(s)
}

191
pkg/xsysinfo/clinfo_test.go Normal file
View File

@@ -0,0 +1,191 @@
package xsysinfo
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const nvidiaRTX5070TiJSON = `{
"devices": [
{
"online": [
{
"CL_DEVICE_NAME": "NVIDIA GeForce RTX 5070 Ti",
"CL_DEVICE_VENDOR": "NVIDIA Corporation",
"CL_DEVICE_VENDOR_ID": 4318,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
"CL_DEVICE_GLOBAL_MEM_SIZE": 16609378304,
"CL_DEVICE_PCI_BUS_INFO_KHR": "PCI-E, 0000:01:00.0",
"CL_DEVICE_PCI_BUS_ID_NV": 1,
"CL_DEVICE_PCI_SLOT_ID_NV": 0,
"CL_DEVICE_PCI_DOMAIN_ID_NV": 0
}
]
}
]
}`
// intelArcPlusIGPUJSON exercises the HOST_UNIFIED_MEMORY=true filter:
// the iGPU sibling on the same Intel platform must be dropped to
// avoid double-counting system RAM as VRAM.
const intelArcPlusIGPUJSON = `{
"devices": [
{
"online": [
{
"CL_DEVICE_NAME": "Intel(R) Arc(TM) A770 Graphics",
"CL_DEVICE_VENDOR": "Intel(R) Corporation",
"CL_DEVICE_VENDOR_ID": 32902,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
"CL_DEVICE_GLOBAL_MEM_SIZE": 16225243136,
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
},
{
"CL_DEVICE_NAME": "Intel(R) UHD Graphics 770",
"CL_DEVICE_VENDOR": "Intel(R) Corporation",
"CL_DEVICE_VENDOR_ID": 32902,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": true,
"CL_DEVICE_GLOBAL_MEM_SIZE": 25000000000,
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:00:02.0"
}
]
}
]
}`
// dualICDSameDeviceJSON exercises BDF dedup when two ICDs enumerate
// the same physical device with different reported sizes (POCL caps
// at 4 GiB for legacy alloc-size compatibility).
const dualICDSameDeviceJSON = `{
"devices": [
{
"online": [
{
"CL_DEVICE_NAME": "Intel(R) Arc(TM) A770 Graphics",
"CL_DEVICE_VENDOR_ID": 32902,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
"CL_DEVICE_GLOBAL_MEM_SIZE": 16225243136,
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
}
]
},
{
"online": [
{
"CL_DEVICE_NAME": "pthread-Arc-A770",
"CL_DEVICE_VENDOR_ID": 32902,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
"CL_DEVICE_GLOBAL_MEM_SIZE": 4294967296,
"CL_DEVICE_PCI_BUS_INFO_KHR": "0000:03:00.0"
}
]
}
]
}`
// cpuOnlyJSON: a POCL-only host. Filtered by CL_DEVICE_TYPE — without
// this guard CPU memory would be mistakenly reported as VRAM.
const cpuOnlyJSON = `{
"devices": [
{
"online": [
{
"CL_DEVICE_NAME": "pthread-x86_64",
"CL_DEVICE_VENDOR": "GenuineIntel",
"CL_DEVICE_VENDOR_ID": 32902,
"CL_DEVICE_TYPE": {"raw": 2, "type": ["CL_DEVICE_TYPE_CPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": true,
"CL_DEVICE_GLOBAL_MEM_SIZE": 33324494848
}
]
}
]
}`
// noBDFJSON: an ICD that reports no PCI fields at all. Device is
// still counted but doesn't participate in dedup.
const noBDFJSON = `{
"devices": [
{
"online": [
{
"CL_DEVICE_NAME": "Some Accelerator GPU",
"CL_DEVICE_VENDOR_ID": 0,
"CL_DEVICE_TYPE": {"raw": 4, "type": ["CL_DEVICE_TYPE_GPU"]},
"CL_DEVICE_HOST_UNIFIED_MEMORY": false,
"CL_DEVICE_GLOBAL_MEM_SIZE": 8589934592
}
]
}
]
}`
var _ = Describe("parseCLInfoJSON", func() {
DescribeTable("classifies and dedups clinfo devices",
func(input string, wantCount int, want []GPUMemoryInfo) {
got := parseCLInfoJSON([]byte(input))
Expect(got).To(HaveLen(wantCount))
for i, w := range want {
Expect(got[i].Name).To(Equal(w.Name))
Expect(got[i].Vendor).To(Equal(w.Vendor))
Expect(got[i].TotalVRAM).To(Equal(w.TotalVRAM))
}
},
Entry("empty object returns nothing", `{}`, 0, nil),
Entry("malformed JSON returns nothing without panicking", `{not valid`, 0, nil),
Entry("CPU-only platform is filtered out", cpuOnlyJSON, 0, nil),
Entry("NVIDIA dGPU is recognised by vendor ID and BDF",
nvidiaRTX5070TiJSON, 1, []GPUMemoryInfo{{
Name: "NVIDIA GeForce RTX 5070 Ti",
Vendor: VendorNVIDIA,
TotalVRAM: 16609378304,
}}),
Entry("Intel Arc with iGPU sibling: iGPU dropped, Arc reported",
intelArcPlusIGPUJSON, 1, []GPUMemoryInfo{{
Name: "Intel(R) Arc(TM) A770 Graphics",
Vendor: VendorIntel,
TotalVRAM: 16225243136,
}}),
Entry("Dual ICD enumerating same Arc: deduped, larger size wins",
dualICDSameDeviceJSON, 1, []GPUMemoryInfo{{
Name: "Intel(R) Arc(TM) A770 Graphics",
Vendor: VendorIntel,
TotalVRAM: 16225243136, // not the POCL 4 GiB cap
}}),
Entry("Device without PCI info is still counted",
noBDFJSON, 1, []GPUMemoryInfo{{
Name: "Some Accelerator GPU",
Vendor: VendorUnknown,
TotalVRAM: 8589934592,
}}),
)
})
var _ = Describe("normalizeBDF", func() {
DescribeTable("canonicalises PCI bus addresses",
func(in, want string) {
Expect(normalizeBDF(in)).To(Equal(want))
},
Entry("already canonical", "0000:03:00.0", "0000:03:00.0"),
Entry("missing domain", "03:00.0", "0000:03:00.0"),
Entry("uppercase hex", "AB:CD.0", "0000:ab:cd.0"),
)
})
var _ = Describe("clinfoBDF", func() {
It("synthesises a canonical BDF from NVIDIA pre-KHR integer fields", func() {
// Older NVIDIA OpenCL exposes BDF via three integer fields instead
// of the KHR string; the synthesised result must be canonical.
d := clinfoDevice{
PCIBusNV: 1,
PCISlotNV: 0,
PCIDomainNV: 0,
}
Expect(clinfoBDF(d)).To(Equal("0000:01:00.0"))
})
})

147
pkg/xsysinfo/drmfdinfo.go Normal file
View File

@@ -0,0 +1,147 @@
package xsysinfo
import (
"bufio"
"bytes"
"os"
"path/filepath"
"strconv"
"strings"
)
// drmFdInfoUsageByBDF walks /proc/<pid>/fdinfo/<fd> for every fd that
// points at /dev/dri/render* and aggregates per-GPU VRAM allocations.
// Keyed by the PCI BDF (dddd:bb:dd.f) of the render node so callers
// can match against any GPU detection result.
//
// The kernel exposes per-process DRM accounting via standardised
// fdinfo keys (Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19):
//
// drm-total-<region>: bytes the process has bound to <region>
// drm-resident-<region>: bytes currently resident in <region>
//
// Region names are driver-defined: i915 uses "local*" for device-local
// VRAM, amdgpu and xe use "vram*". We sum any region whose name
// starts with "local" or "vram"; "system*" / "gtt*" / "stolen-*" are
// excluded since they're host RAM mirrors.
//
// Returns an empty map when no process holds a DRM render fd or the
// kernel doesn't emit the accounting keys (older kernels, exotic
// drivers). The walker is read-only and survives unreadable proc
// entries (other users' processes, transient PIDs).
func drmFdInfoUsageByBDF() map[string]uint64 {
byRender := drmFdInfoUsageByRenderNode()
if len(byRender) == 0 {
return nil
}
out := make(map[string]uint64, len(byRender))
for name, used := range byRender {
bdf := renderNodeBDF(name)
if bdf == "" {
continue
}
out[bdf] += used
}
return out
}
func drmFdInfoUsageByRenderNode() map[string]uint64 {
procs, _ := filepath.Glob("/proc/[0-9]*/fd")
if len(procs) == 0 {
return nil
}
out := map[string]uint64{}
for _, fdDir := range procs {
pidDir := filepath.Dir(fdDir)
entries, err := os.ReadDir(fdDir)
if err != nil {
// /proc race: process exited or unreadable. Skip silently.
continue
}
for _, entry := range entries {
target, err := os.Readlink(filepath.Join(fdDir, entry.Name()))
if err != nil {
continue
}
const renderPrefix = "/dev/dri/render"
if !strings.HasPrefix(target, renderPrefix) {
continue
}
renderName := strings.TrimPrefix(target, "/dev/dri/")
data, err := os.ReadFile(filepath.Join(pidDir, "fdinfo", entry.Name()))
if err != nil {
continue
}
out[renderName] += parseDRMFdInfoVRAM(data)
}
}
return out
}
// parseDRMFdInfoVRAM sums `drm-total-<region>` bytes across all VRAM
// regions in a single fdinfo blob. Values are formatted as
// "<number> <KiB|MiB|GiB>" or bare bytes; both are accepted.
func parseDRMFdInfoVRAM(data []byte) uint64 {
var total uint64
sc := bufio.NewScanner(bytes.NewReader(data))
for sc.Scan() {
line := sc.Text()
const prefix = "drm-total-"
if !strings.HasPrefix(line, prefix) {
continue
}
key, value, ok := strings.Cut(line, ":")
if !ok {
continue
}
region := strings.TrimPrefix(key, prefix)
if !isVRAMRegion(region) {
continue
}
total += parseDRMFdInfoBytes(value)
}
return total
}
func isVRAMRegion(region string) bool {
return strings.HasPrefix(region, "local") || strings.HasPrefix(region, "vram")
}
func parseDRMFdInfoBytes(value string) uint64 {
fields := strings.Fields(value)
if len(fields) == 0 {
return 0
}
n, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return 0
}
if len(fields) < 2 {
return n
}
switch strings.ToLower(fields[1]) {
case "kib":
return n * 1024
case "mib":
return n * 1024 * 1024
case "gib":
return n * 1024 * 1024 * 1024
}
return n
}
// renderNodeBDF resolves a DRM render-node basename (e.g. "renderD129")
// to its underlying PCI BDF by following /sys/class/drm/<name>/device.
// Returns "" for non-PCI devices or symlink read errors.
func renderNodeBDF(name string) string {
link, err := os.Readlink("/sys/class/drm/" + name + "/device")
if err != nil {
return ""
}
base := filepath.Base(link)
// Sanity-check: BDF format is dddd:bb:dd.f
if strings.Count(base, ":") != 2 || strings.Count(base, ".") != 1 {
return ""
}
return strings.ToLower(base)
}

View File

@@ -0,0 +1,142 @@
package xsysinfo
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// i915FdInfo is a captured /proc/<pid>/fdinfo/<fd> from a llama-cpp
// process holding an Intel Arc render-node fd. "local0" is i915's
// device-local VRAM region; system0 is host-visible buffer mirror.
const i915FdInfo = `pos: 0
flags: 02100002
mnt_id: 16
ino: 1234
drm-driver: i915
drm-client-id: 42
drm-pdev: 0000:03:00.0
drm-total-system0: 312 KiB
drm-resident-system0: 312 KiB
drm-total-local0: 5396348 KiB
drm-resident-local0: 5396348 KiB
drm-total-stolen-local0: 0
drm-resident-stolen-local0: 0
drm-engine-render: 1234567 ns
drm-engine-copy: 2345 ns
drm-engine-video: 0 ns
drm-engine-capacity-video: 2
`
// amdgpuFdInfo mirrors the i915 schema with AMD's region names. amdgpu
// uses "vram0" for device-local and "gtt0" for host-pinned memory.
const amdgpuFdInfo = `pos: 0
flags: 02100002
mnt_id: 16
drm-driver: amdgpu
drm-pdev: 0000:0a:00.0
drm-total-vram0: 8589934592 B
drm-resident-vram0: 8589934592 B
drm-total-gtt0: 1048576 B
drm-resident-gtt0: 1048576 B
drm-engine-gfx: 123456 ns
`
// systemOnlyFdInfo: a DRM client that only allocates host buffers
// (CPU-only fallback, GUI compositor, etc.). VRAM total must be 0.
const systemOnlyFdInfo = `drm-driver: i915
drm-total-system0: 16384 KiB
drm-resident-system0: 16384 KiB
drm-total-local0: 0
`
// noDRMFdInfo: regular file fd (e.g. socket, pipe). Parser must return
// 0 without panicking.
const noDRMFdInfo = `pos: 0
flags: 02100002
mnt_id: 16
ino: 5678
`
// bareBytesFdInfo: older kernels emit byte counts without a unit
// suffix. Must be parsed as raw bytes, not multiplied by 1024.
const bareBytesFdInfo = `drm-driver: xe
drm-total-vram0: 17179869184
drm-resident-vram0: 17179869184
`
var _ = Describe("parseDRMFdInfoVRAM", func() {
DescribeTable("extracts device-local VRAM totals from fdinfo",
func(input string, want uint64) {
Expect(parseDRMFdInfoVRAM([]byte(input))).To(Equal(want))
},
Entry("empty input", "", uint64(0)),
Entry("non-DRM fdinfo", noDRMFdInfo, uint64(0)),
Entry("system-only client reports 0 VRAM", systemOnlyFdInfo, uint64(0)),
Entry("i915 local0 in KiB", i915FdInfo, uint64(5396348*1024)),
Entry("amdgpu vram0 in bytes", amdgpuFdInfo, uint64(8589934592)),
Entry("xe vram0 bare bytes", bareBytesFdInfo, uint64(17179869184)),
)
})
var _ = Describe("parseDRMFdInfoBytes", func() {
DescribeTable("parses sizes with and without unit suffixes",
func(in string, want uint64) {
Expect(parseDRMFdInfoBytes(in)).To(Equal(want))
},
Entry("bare bytes", "\t1024", uint64(1024)),
Entry("KiB", "\t1024 KiB", uint64(1024*1024)),
Entry("MiB", "\t512 MiB", uint64(512*1024*1024)),
Entry("GiB", "\t2 GiB", uint64(2*1024*1024*1024)),
Entry("unrecognised unit falls through to raw bytes", "\t1024 B", uint64(1024)),
Entry("empty", "", uint64(0)),
Entry("not a number", "\tnotanumber KiB", uint64(0)),
)
})
var _ = Describe("isVRAMRegion", func() {
DescribeTable("recognises device-local regions",
func(region string, want bool) {
Expect(isVRAMRegion(region)).To(Equal(want))
},
Entry("local0", "local0", true),
Entry("local1", "local1", true),
Entry("vram0", "vram0", true),
Entry("vram1", "vram1", true),
Entry("system0", "system0", false),
Entry("gtt0", "gtt0", false),
Entry("stolen-local0", "stolen-local0", false),
Entry("stolen-system0", "stolen-system0", false),
Entry("cpu", "cpu", false),
)
})
var _ = Describe("applyDRMUsage", func() {
const total = uint64(16225243136)
base := GPUMemoryInfo{Name: "Arc A770", TotalVRAM: total, FreeVRAM: total}
It("leaves defaults untouched when there is no usage", func() {
got := applyDRMUsage(base, 0)
Expect(got.UsedVRAM).To(Equal(uint64(0)))
Expect(got.FreeVRAM).To(Equal(total))
Expect(got.UsagePercent).To(Equal(float64(0)))
})
It("rederives free and percent from usage", func() {
used := uint64(5_396_348 * 1024)
got := applyDRMUsage(base, used)
Expect(got.UsedVRAM).To(Equal(used))
Expect(got.FreeVRAM).To(Equal(total - used))
Expect(got.UsagePercent).To(Equal(float64(used) / float64(total) * 100))
})
It("clamps over-commit to total", func() {
got := applyDRMUsage(base, total*2)
Expect(got.UsedVRAM).To(Equal(total))
Expect(got.FreeVRAM).To(Equal(uint64(0)))
})
It("guards against div-by-zero on zero total", func() {
got := applyDRMUsage(GPUMemoryInfo{}, 1024)
Expect(got.UsagePercent).To(Equal(float64(0)))
})
})

View File

@@ -41,6 +41,13 @@ type GPUMemoryInfo struct {
Index int `json:"index"`
Name string `json:"name"`
Vendor string `json:"vendor"`
// BDF is the canonical PCI bus address (dddd:bb:dd.f) when known.
// Populated by detection paths that can attribute the device to a
// PCI location (clinfo, future amdgpu/nvidia paths); empty for
// non-PCI devices (Apple, integrated SoCs) or detection paths
// that don't surface it (nvidia-smi --query-gpu doesn't include
// pci.bus_id by default).
BDF string `json:"bdf,omitempty"`
TotalVRAM uint64 `json:"total_vram"` // Total VRAM in bytes
UsedVRAM uint64 `json:"used_vram"` // Used VRAM in bytes
FreeVRAM uint64 `json:"free_vram"` // Free VRAM in bytes
@@ -515,16 +522,48 @@ func getAMDGPUMemory() []GPUMemoryInfo {
return gpus
}
// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top
// getIntelGPUMemory queries Intel GPUs via xpu-smi, intel_gpu_top, or
// clinfo (in that order). xpu-smi is the canonical Intel tool but
// requires the separate xpumanager package; clinfo ships with the
// OpenCL ICD loader and is present in most oneAPI base images, so it
// serves as the last-resort fallback.
func getIntelGPUMemory() []GPUMemoryInfo {
// Try xpu-smi first (Intel's official GPU management tool)
gpus := getIntelXPUSMI()
if len(gpus) > 0 {
if gpus := getIntelXPUSMI(); len(gpus) > 0 {
return gpus
}
if gpus := getIntelGPUTop(); len(gpus) > 0 {
return gpus
}
// clinfo enumerates every OpenCL platform, so guard the
// subprocess with the cached ghw GPU list: non-Intel hosts skip
// it entirely.
if !hasGHWVendor(VendorIntel) {
return nil
}
var out []GPUMemoryInfo
for _, g := range getCLInfoGPUMemory() {
if g.Vendor == VendorIntel {
out = append(out, g)
}
}
return out
}
// Fallback to intel_gpu_top
return getIntelGPUTop()
// hasGHWVendor reports whether ghw observed any GPU whose vendor name
// matches (case-insensitive substring). Uses the package-level cache
// in GPUs() so the call is free after the first invocation.
func hasGHWVendor(vendor string) bool {
gpus, _ := GPUs()
target := strings.ToUpper(vendor)
for _, g := range gpus {
if g.DeviceInfo == nil || g.DeviceInfo.Vendor == nil {
continue
}
if strings.Contains(strings.ToUpper(g.DeviceInfo.Vendor.Name), target) {
return true
}
}
return false
}
// getIntelXPUSMI queries Intel GPUs using xpu-smi

View File

@@ -0,0 +1,13 @@
package xsysinfo
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestXsysinfo(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "xsysinfo test suite")
}