mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-06 15:56:06 -04:00
* fix(gpu-detect): clinfo --json fallback for Intel discrete VRAM
ghw returns 0 VRAM for any i915-driven Intel GPU because the kernel
driver doesn't expose VRAM through the sysfs paths ghw checks (no
mem_info_vram_total — that's an amdgpu interface). xpu-smi, the
canonical Intel tool, isn't in the oneAPI base image (it lives in a
separate xpumanager package). The capability gate added in 19c92c70
("default to CPU if there is less than 4GB of GPU available") then
demotes the host to CPU even on a 16 GB Arc A770.
clinfo ships with the OpenCL ICD loader and is present in the oneAPI
base image, so plug it in as the last-resort Intel VRAM source:
xpu-smi -> intel_gpu_top -> clinfo --json
The parser drops UMA devices via HOST_UNIFIED_MEMORY=true so an iGPU
sibling can't double-count system RAM, and dedups by PCI BDF when
multiple ICDs enumerate the same physical device (POCL caps reported
GLOBAL_MEM_SIZE at 4 GiB; the largest non-capped value wins).
Subprocess is wrapped in a 2s timeout and memoised with sync.OnceValue
— GPU hardware is static for the process lifetime. The Intel branch
also short-circuits when ghw saw no Intel vendor, so NVIDIA-only hosts
don't pay the spawn cost.
Verified end-to-end on Intel Arc A770: ghw -> 0, clinfo path reports
16,225,243,136 bytes (15.11 GiB), capability gate now passes naturally
without LOCALAI_FORCE_META_BACKEND_CAPABILITY=intel.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
* feat(gpu-detect): live VRAM usage from DRM fdinfo
The clinfo fallback reports total VRAM correctly but leaves UsedVRAM
at 0 because OpenCL has no portable live-memory property — the UI
ends up showing 0% utilisation even when llama-cpp is actually
holding gigabytes in device memory.
Fill that gap with the standardised Linux DRM fdinfo interface
(Documentation/gpu/drm-usage-stats.rst, kernel ≥5.19). Walking
/proc/<pid>/fdinfo for any fd that points at /dev/dri/render* yields
drm-total-<region> / drm-resident-<region> keys; aggregate per
render-node, resolve the render node to a PCI BDF via
/sys/class/drm/<name>/device, and merge the result into the matching
GPUMemoryInfo by BDF.
Region naming is driver-defined — i915 uses "local0" for device-local
VRAM, amdgpu and xe use "vram0" — so a prefix-match on local/vram
covers all three DRM drivers that LocalAI cares about. system/gtt/
stolen regions are deliberately excluded since they're host RAM
mirrors and would double-count against system RAM.
GPUMemoryInfo gains an optional BDF field (`bdf,omitempty` in JSON)
so future vendor-specific detectors can plug into the same matcher.
Empty BDF skips the merge — non-PCI devices and detection paths that
don't surface PCI location keep their existing behaviour.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
---------
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
222 lines
6.5 KiB
Go
222 lines
6.5 KiB
Go
package xsysinfo
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
const (
|
|
clDeviceTypeGPU = "CL_DEVICE_TYPE_GPU"
|
|
clinfoTimeout = 2 * time.Second
|
|
)
|
|
|
|
// clinfoOutput is the subset of `clinfo --json` we read. clinfo emits
|
|
// one entry under "devices" per platform, in the same order as
|
|
// "platforms"; live devices are under "online".
|
|
type clinfoOutput struct {
|
|
Devices []struct {
|
|
Online []clinfoDevice `json:"online"`
|
|
} `json:"devices"`
|
|
}
|
|
|
|
type clinfoDevice struct {
|
|
Name string `json:"CL_DEVICE_NAME"`
|
|
Vendor string `json:"CL_DEVICE_VENDOR"`
|
|
VendorID uint32 `json:"CL_DEVICE_VENDOR_ID"`
|
|
Type clinfoTypeProp `json:"CL_DEVICE_TYPE"`
|
|
HostUnifiedMemory bool `json:"CL_DEVICE_HOST_UNIFIED_MEMORY"`
|
|
GlobalMemSize uint64 `json:"CL_DEVICE_GLOBAL_MEM_SIZE"`
|
|
PCIBusInfoKHR string `json:"CL_DEVICE_PCI_BUS_INFO_KHR"`
|
|
PCIDomainNV int `json:"CL_DEVICE_PCI_DOMAIN_ID_NV"`
|
|
PCIBusNV int `json:"CL_DEVICE_PCI_BUS_ID_NV"`
|
|
PCISlotNV int `json:"CL_DEVICE_PCI_SLOT_ID_NV"`
|
|
}
|
|
|
|
// clinfoTypeProp matches against the type-string array rather than
|
|
// CL_DEVICE_TYPE.raw so a future CL_DEVICE_TYPE_CUSTOM can't sneak
|
|
// past as a GPU.
|
|
type clinfoTypeProp struct {
|
|
Raw uint32 `json:"raw"`
|
|
Type []string `json:"type"`
|
|
}
|
|
|
|
func (t clinfoTypeProp) isGPU() bool {
|
|
for _, s := range t.Type {
|
|
if s == clDeviceTypeGPU {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// clinfoOnce caches the result for the process lifetime. GPU hardware
|
|
// doesn't change between calls and the subprocess is ~150 ms.
|
|
var clinfoOnce = sync.OnceValue(runCLInfo)
|
|
|
|
func runCLInfo() []GPUMemoryInfo {
|
|
if _, err := exec.LookPath("clinfo"); err != nil {
|
|
return nil
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), clinfoTimeout)
|
|
defer cancel()
|
|
cmd := exec.CommandContext(ctx, "clinfo", "--json")
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
if err := cmd.Run(); err != nil {
|
|
xlog.Debug("clinfo failed", "error", err, "stderr", stderr.String())
|
|
return nil
|
|
}
|
|
return parseCLInfoJSON(stdout.Bytes())
|
|
}
|
|
|
|
// getCLInfoGPUMemory is a best-effort fallback for hosts where the
|
|
// vendor's own management binary (nvidia-smi / xpu-smi / rocm-smi)
|
|
// isn't installed but the OpenCL ICD is. Live used/free aren't exposed
|
|
// via standard CL_ properties; we synthesise them by attributing
|
|
// per-process VRAM allocations from the kernel DRM fdinfo interface
|
|
// to each clinfo-reported GPU via the shared PCI BDF.
|
|
func getCLInfoGPUMemory() []GPUMemoryInfo {
|
|
gpus := clinfoOnce()
|
|
if len(gpus) == 0 {
|
|
return nil
|
|
}
|
|
usage := drmFdInfoUsageByBDF()
|
|
for i := range gpus {
|
|
gpus[i] = applyDRMUsage(gpus[i], usage[gpus[i].BDF])
|
|
}
|
|
return gpus
|
|
}
|
|
|
|
// applyDRMUsage stamps live VRAM accounting onto a GPUMemoryInfo
|
|
// whose TotalVRAM came from a static source (e.g. clinfo). Caller
|
|
// already populated TotalVRAM and FreeVRAM=TotalVRAM as defaults; if
|
|
// DRM accounting reports usage, we trust it and rederive free/percent.
|
|
func applyDRMUsage(g GPUMemoryInfo, used uint64) GPUMemoryInfo {
|
|
if used == 0 || g.TotalVRAM == 0 {
|
|
return g
|
|
}
|
|
if used > g.TotalVRAM {
|
|
// Process-private DRM total can momentarily exceed device
|
|
// VRAM (over-commit via host memory mirror). Clamp so the UI
|
|
// doesn't display absurd percentages.
|
|
used = g.TotalVRAM
|
|
}
|
|
g.UsedVRAM = used
|
|
g.FreeVRAM = g.TotalVRAM - used
|
|
g.UsagePercent = float64(used) / float64(g.TotalVRAM) * 100
|
|
return g
|
|
}
|
|
|
|
// parseCLInfoJSON returns one GPUMemoryInfo per discrete GPU. UMA
|
|
// devices (iGPU/APU) are dropped because their "VRAM" is system RAM
|
|
// and would double-count against the capability gate. When the same
|
|
// physical device is enumerated by multiple ICDs (Intel OpenCL + POCL,
|
|
// for example), the BDF dedup keeps the largest reported size — some
|
|
// ICDs cap at 4 GiB for legacy alloc-size compatibility.
|
|
func parseCLInfoJSON(raw []byte) []GPUMemoryInfo {
|
|
var out clinfoOutput
|
|
if err := json.Unmarshal(raw, &out); err != nil {
|
|
xlog.Debug("clinfo: failed to parse --json output", "error", err)
|
|
return nil
|
|
}
|
|
|
|
byBDF := map[string]GPUMemoryInfo{}
|
|
var noBDF []GPUMemoryInfo
|
|
|
|
for _, plat := range out.Devices {
|
|
for _, d := range plat.Online {
|
|
if !d.Type.isGPU() || d.HostUnifiedMemory || d.GlobalMemSize == 0 {
|
|
continue
|
|
}
|
|
bdf := clinfoBDF(d)
|
|
info := GPUMemoryInfo{
|
|
Name: strings.TrimSpace(d.Name),
|
|
Vendor: clinfoVendor(d.VendorID, d.Vendor),
|
|
BDF: bdf,
|
|
TotalVRAM: d.GlobalMemSize,
|
|
FreeVRAM: d.GlobalMemSize,
|
|
}
|
|
if bdf == "" {
|
|
noBDF = append(noBDF, info)
|
|
continue
|
|
}
|
|
if existing, ok := byBDF[bdf]; !ok || info.TotalVRAM > existing.TotalVRAM {
|
|
byBDF[bdf] = info
|
|
}
|
|
}
|
|
}
|
|
|
|
all := make([]GPUMemoryInfo, 0, len(byBDF)+len(noBDF))
|
|
for _, g := range byBDF {
|
|
all = append(all, g)
|
|
}
|
|
all = append(all, noBDF...)
|
|
for i := range all {
|
|
all[i].Index = i
|
|
}
|
|
return all
|
|
}
|
|
|
|
func clinfoVendor(vendorID uint32, name string) string {
|
|
switch vendorID {
|
|
case 0x10de:
|
|
return VendorNVIDIA
|
|
case 0x1002, 0x1022: // 0x1022 is the AMD CPU vendor ID, also reported by some APU OpenCL devices.
|
|
return VendorAMD
|
|
case 0x8086:
|
|
return VendorIntel
|
|
case 0x106B:
|
|
return VendorApple
|
|
}
|
|
n := strings.ToLower(name)
|
|
switch {
|
|
case strings.Contains(n, "nvidia"):
|
|
return VendorNVIDIA
|
|
case strings.Contains(n, "advanced micro devices"), strings.Contains(n, "amd"):
|
|
return VendorAMD
|
|
case strings.Contains(n, "intel"):
|
|
return VendorIntel
|
|
case strings.Contains(n, "apple"):
|
|
return VendorApple
|
|
}
|
|
return VendorUnknown
|
|
}
|
|
|
|
// clinfoBDF returns the device's canonical `dddd:bb:dd.f` PCI address,
|
|
// or "" when no PCI location is reported. The KHR form is `"PCI-E,
|
|
// 0000:01:00.0"` on NVIDIA and bare `"0000:01:00.0"` on most others.
|
|
func clinfoBDF(d clinfoDevice) string {
|
|
if d.PCIBusInfoKHR != "" {
|
|
s := d.PCIBusInfoKHR
|
|
if i := strings.LastIndex(s, " "); i >= 0 {
|
|
s = s[i+1:]
|
|
}
|
|
if c := strings.Count(s, ":"); c == 1 || c == 2 {
|
|
return normalizeBDF(s)
|
|
}
|
|
}
|
|
// NVIDIA pre-KHR per-axis fields. An all-zero result is
|
|
// indistinguishable from "fields absent", but no GPU sits at
|
|
// 0000:00:00.0 so the false negative is harmless.
|
|
if d.PCIBusNV != 0 || d.PCISlotNV != 0 || d.PCIDomainNV != 0 {
|
|
return fmt.Sprintf("%04x:%02x:%02x.0", d.PCIDomainNV, d.PCIBusNV, d.PCISlotNV)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func normalizeBDF(s string) string {
|
|
if strings.Count(s, ":") == 1 {
|
|
return strings.ToLower("0000:" + s)
|
|
}
|
|
return strings.ToLower(s)
|
|
}
|