mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 02:38:19 -04:00
* fix(gpu-detect): clinfo --json fallback for Intel discrete VRAM
ghw returns 0 VRAM for any i915-driven Intel GPU because the kernel
driver doesn't expose VRAM through the sysfs paths ghw checks (no
mem_info_vram_total — that's an amdgpu interface). xpu-smi, the
canonical Intel tool, isn't in the oneAPI base image (it lives in a
separate xpumanager package). The capability gate added in 19c92c70
("default to CPU if there is less than 4GB of GPU available") then
demotes the host to CPU even on a 16 GB Arc A770.
clinfo ships with the OpenCL ICD loader and is present in the oneAPI
base image, so plug it in as the last-resort Intel VRAM source:
xpu-smi -> intel_gpu_top -> clinfo --json
The parser drops UMA devices via HOST_UNIFIED_MEMORY=true so an iGPU
sibling can't double-count system RAM, and dedups by PCI BDF when
multiple ICDs enumerate the same physical device (POCL caps reported
GLOBAL_MEM_SIZE at 4 GiB; the largest non-capped value wins).
Subprocess is wrapped in a 2s timeout and memoised with sync.OnceValue
— GPU hardware is static for the process lifetime. The Intel branch
also short-circuits when ghw saw no Intel vendor, so NVIDIA-only hosts
don't pay the spawn cost.
Verified end-to-end on Intel Arc A770: ghw -> 0, clinfo path reports
16,225,243,136 bytes (15.11 GiB), capability gate now passes naturally
without LOCALAI_FORCE_META_BACKEND_CAPABILITY=intel.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
* feat(gpu-detect): live VRAM usage from DRM fdinfo
The clinfo fallback reports total VRAM correctly but leaves UsedVRAM
at 0 because OpenCL has no portable live-memory property — the UI
ends up showing 0% utilisation even when llama-cpp is actually
holding gigabytes in device memory.
Fill that gap with the standardised Linux DRM fdinfo interface
(Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19). Walking
/proc/<pid>/fdinfo for any fd that points at /dev/dri/render* yields
drm-total-<region> / drm-resident-<region> keys; aggregate per
render-node, resolve the render node to a PCI BDF via
/sys/class/drm/<name>/device, and merge the result into the matching
GPUMemoryInfo by BDF.
Region naming is driver-defined — i915 uses "local0" for device-local
VRAM, amdgpu and xe use "vram0" — so a prefix-match on local/vram
covers all three DRM drivers that LocalAI cares about. system/gtt/
stolen regions are deliberately excluded since they're host RAM
mirrors and would double-count against system RAM.
GPUMemoryInfo gains an optional BDF field (`bdf,omitempty` in JSON)
so future vendor-specific detectors can plug into the same matcher.
Empty BDF skips the merge — non-PCI devices and detection paths that
don't surface PCI location keep their existing behaviour.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
---------
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
148 lines
3.9 KiB
Go
148 lines
3.9 KiB
Go
package xsysinfo
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// drmFdInfoUsageByBDF walks /proc/<pid>/fdinfo/<fd> for every fd that
|
|
// points at /dev/dri/render* and aggregates per-GPU VRAM allocations.
|
|
// Keyed by the PCI BDF (dddd:bb:dd.f) of the render node so callers
|
|
// can match against any GPU detection result.
|
|
//
|
|
// The kernel exposes per-process DRM accounting via standardised
|
|
// fdinfo keys (Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19):
|
|
//
|
|
// drm-total-<region>: bytes the process has bound to <region>
|
|
// drm-resident-<region>: bytes currently resident in <region>
|
|
//
|
|
// Region names are driver-defined: i915 uses "local*" for device-local
|
|
// VRAM, amdgpu and xe use "vram*". We sum any region whose name
|
|
// starts with "local" or "vram"; "system*" / "gtt*" / "stolen-*" are
|
|
// excluded since they're host RAM mirrors.
|
|
//
|
|
// Returns an empty map when no process holds a DRM render fd or the
|
|
// kernel doesn't emit the accounting keys (older kernels, exotic
|
|
// drivers). The walker is read-only and survives unreadable proc
|
|
// entries (other users' processes, transient PIDs).
|
|
func drmFdInfoUsageByBDF() map[string]uint64 {
|
|
byRender := drmFdInfoUsageByRenderNode()
|
|
if len(byRender) == 0 {
|
|
return nil
|
|
}
|
|
out := make(map[string]uint64, len(byRender))
|
|
for name, used := range byRender {
|
|
bdf := renderNodeBDF(name)
|
|
if bdf == "" {
|
|
continue
|
|
}
|
|
out[bdf] += used
|
|
}
|
|
return out
|
|
}
|
|
|
|
func drmFdInfoUsageByRenderNode() map[string]uint64 {
|
|
procs, _ := filepath.Glob("/proc/[0-9]*/fd")
|
|
if len(procs) == 0 {
|
|
return nil
|
|
}
|
|
out := map[string]uint64{}
|
|
for _, fdDir := range procs {
|
|
pidDir := filepath.Dir(fdDir)
|
|
entries, err := os.ReadDir(fdDir)
|
|
if err != nil {
|
|
// /proc race: process exited or unreadable. Skip silently.
|
|
continue
|
|
}
|
|
for _, entry := range entries {
|
|
target, err := os.Readlink(filepath.Join(fdDir, entry.Name()))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
const renderPrefix = "/dev/dri/render"
|
|
if !strings.HasPrefix(target, renderPrefix) {
|
|
continue
|
|
}
|
|
renderName := strings.TrimPrefix(target, "/dev/dri/")
|
|
data, err := os.ReadFile(filepath.Join(pidDir, "fdinfo", entry.Name()))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
out[renderName] += parseDRMFdInfoVRAM(data)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// parseDRMFdInfoVRAM sums `drm-total-<region>` bytes across all VRAM
|
|
// regions in a single fdinfo blob. Values are formatted as
|
|
// "<number> <KiB|MiB|GiB>" or bare bytes; both are accepted.
|
|
func parseDRMFdInfoVRAM(data []byte) uint64 {
|
|
var total uint64
|
|
sc := bufio.NewScanner(bytes.NewReader(data))
|
|
for sc.Scan() {
|
|
line := sc.Text()
|
|
const prefix = "drm-total-"
|
|
if !strings.HasPrefix(line, prefix) {
|
|
continue
|
|
}
|
|
key, value, ok := strings.Cut(line, ":")
|
|
if !ok {
|
|
continue
|
|
}
|
|
region := strings.TrimPrefix(key, prefix)
|
|
if !isVRAMRegion(region) {
|
|
continue
|
|
}
|
|
total += parseDRMFdInfoBytes(value)
|
|
}
|
|
return total
|
|
}
|
|
|
|
func isVRAMRegion(region string) bool {
|
|
return strings.HasPrefix(region, "local") || strings.HasPrefix(region, "vram")
|
|
}
|
|
|
|
func parseDRMFdInfoBytes(value string) uint64 {
|
|
fields := strings.Fields(value)
|
|
if len(fields) == 0 {
|
|
return 0
|
|
}
|
|
n, err := strconv.ParseUint(fields[0], 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
if len(fields) < 2 {
|
|
return n
|
|
}
|
|
switch strings.ToLower(fields[1]) {
|
|
case "kib":
|
|
return n * 1024
|
|
case "mib":
|
|
return n * 1024 * 1024
|
|
case "gib":
|
|
return n * 1024 * 1024 * 1024
|
|
}
|
|
return n
|
|
}
|
|
|
|
// renderNodeBDF resolves a DRM render-node basename (e.g. "renderD129")
|
|
// to its underlying PCI BDF by following /sys/class/drm/<name>/device.
|
|
// Returns "" for non-PCI devices or symlink read errors.
|
|
func renderNodeBDF(name string) string {
|
|
link, err := os.Readlink("/sys/class/drm/" + name + "/device")
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
base := filepath.Base(link)
|
|
// Sanity-check: BDF format is dddd:bb:dd.f
|
|
if strings.Count(base, ":") != 2 || strings.Count(base, ".") != 1 {
|
|
return ""
|
|
}
|
|
return strings.ToLower(base)
|
|
}
|