Files
LocalAI/pkg/xsysinfo/gpu.go
2025-12-21 19:33:13 +01:00

658 lines
17 KiB
Go

package xsysinfo
import (
"bytes"
"encoding/json"
"os/exec"
"strconv"
"strings"
"sync"
"github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu"
"github.com/mudler/xlog"
)
// GPU vendor constants
const (
VendorNVIDIA = "nvidia"
VendorAMD = "amd"
VendorIntel = "intel"
VendorVulkan = "vulkan"
VendorUnknown = "unknown"
)
// UnifiedMemoryDevices is a list of GPU device name patterns that use unified memory
// (shared with system RAM). When these devices are detected and report N/A for VRAM,
// we fall back to system RAM information.
var UnifiedMemoryDevices = []string{
"NVIDIA GB10",
"GB10",
// Add more unified memory devices here as needed
}
// GPUMemoryInfo contains real-time GPU memory usage information
type GPUMemoryInfo struct {
Index int `json:"index"`
Name string `json:"name"`
Vendor string `json:"vendor"`
TotalVRAM uint64 `json:"total_vram"` // Total VRAM in bytes
UsedVRAM uint64 `json:"used_vram"` // Used VRAM in bytes
FreeVRAM uint64 `json:"free_vram"` // Free VRAM in bytes
UsagePercent float64 `json:"usage_percent"` // Usage as percentage (0-100)
}
// GPUAggregateInfo contains aggregate GPU information across all GPUs
type GPUAggregateInfo struct {
TotalVRAM uint64 `json:"total_vram"`
UsedVRAM uint64 `json:"used_vram"`
FreeVRAM uint64 `json:"free_vram"`
UsagePercent float64 `json:"usage_percent"`
GPUCount int `json:"gpu_count"`
}
// AggregateMemoryInfo contains aggregate memory information (unified for GPU/RAM)
type AggregateMemoryInfo struct {
TotalMemory uint64 `json:"total_memory"`
UsedMemory uint64 `json:"used_memory"`
FreeMemory uint64 `json:"free_memory"`
UsagePercent float64 `json:"usage_percent"`
GPUCount int `json:"gpu_count"`
}
// ResourceInfo represents unified memory resource information
type ResourceInfo struct {
Type string `json:"type"` // "gpu" or "ram"
Available bool `json:"available"`
GPUs []GPUMemoryInfo `json:"gpus,omitempty"`
RAM *SystemRAMInfo `json:"ram,omitempty"`
Aggregate AggregateMemoryInfo `json:"aggregate"`
}
var (
gpuCache []*gpu.GraphicsCard
gpuCacheOnce sync.Once
gpuCacheErr error
)
func GPUs() ([]*gpu.GraphicsCard, error) {
gpuCacheOnce.Do(func() {
gpu, err := ghw.GPU()
if err != nil {
gpuCacheErr = err
return
}
gpuCache = gpu.GraphicsCards
})
return gpuCache, gpuCacheErr
}
func TotalAvailableVRAM() (uint64, error) {
gpus, err := GPUs()
if err != nil {
return 0, err
}
var totalVRAM uint64
for _, gpu := range gpus {
if gpu != nil && gpu.Node != nil && gpu.Node.Memory != nil {
if gpu.Node.Memory.TotalUsableBytes > 0 {
totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
}
}
}
return totalVRAM, nil
}
func HasGPU(vendor string) bool {
gpus, err := GPUs()
if err != nil {
return false
}
if vendor == "" {
return len(gpus) > 0
}
for _, gpu := range gpus {
if strings.Contains(gpu.String(), vendor) {
return true
}
}
return false
}
// isUnifiedMemoryDevice checks if the given GPU name matches any known unified memory device
func isUnifiedMemoryDevice(gpuName string) bool {
gpuNameUpper := strings.ToUpper(gpuName)
for _, pattern := range UnifiedMemoryDevices {
if strings.Contains(gpuNameUpper, strings.ToUpper(pattern)) {
return true
}
}
return false
}
// GetGPUMemoryUsage returns real-time GPU memory usage for all detected GPUs.
// It tries multiple vendor-specific tools in order: NVIDIA, AMD, Intel, Vulkan.
// Returns an empty slice if no GPU monitoring tools are available.
func GetGPUMemoryUsage() []GPUMemoryInfo {
var gpus []GPUMemoryInfo
// Try NVIDIA first
nvidiaGPUs := getNVIDIAGPUMemory()
if len(nvidiaGPUs) > 0 {
gpus = append(gpus, nvidiaGPUs...)
}
// XXX: Note - I could not test this with AMD and Intel GPUs, so I'm not sure if it works and it was added with the help of AI.
// Try AMD ROCm
amdGPUs := getAMDGPUMemory()
if len(amdGPUs) > 0 {
// Adjust indices to continue from NVIDIA GPUs
startIdx := len(gpus)
for i := range amdGPUs {
amdGPUs[i].Index = startIdx + i
}
gpus = append(gpus, amdGPUs...)
}
// Try Intel
intelGPUs := getIntelGPUMemory()
if len(intelGPUs) > 0 {
startIdx := len(gpus)
for i := range intelGPUs {
intelGPUs[i].Index = startIdx + i
}
gpus = append(gpus, intelGPUs...)
}
// Try Vulkan as fallback for device detection (limited real-time data)
if len(gpus) == 0 {
vulkanGPUs := getVulkanGPUMemory()
gpus = append(gpus, vulkanGPUs...)
}
return gpus
}
// GetGPUAggregateInfo returns aggregate GPU information across all GPUs
func GetGPUAggregateInfo() GPUAggregateInfo {
gpus := GetGPUMemoryUsage()
var aggregate GPUAggregateInfo
aggregate.GPUCount = len(gpus)
for _, gpu := range gpus {
aggregate.TotalVRAM += gpu.TotalVRAM
aggregate.UsedVRAM += gpu.UsedVRAM
aggregate.FreeVRAM += gpu.FreeVRAM
}
if aggregate.TotalVRAM > 0 {
aggregate.UsagePercent = float64(aggregate.UsedVRAM) / float64(aggregate.TotalVRAM) * 100
}
return aggregate
}
// getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi
func getNVIDIAGPUMemory() []GPUMemoryInfo {
// Check if nvidia-smi is available
if _, err := exec.LookPath("nvidia-smi"); err != nil {
return nil
}
cmd := exec.Command("nvidia-smi",
"--query-gpu=index,name,memory.total,memory.used,memory.free",
"--format=csv,noheader,nounits")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("nvidia-smi failed", "error", err, "stderr", stderr.String())
return nil
}
var gpus []GPUMemoryInfo
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
for _, line := range lines {
if line == "" {
continue
}
parts := strings.Split(line, ", ")
if len(parts) < 5 {
continue
}
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
name := strings.TrimSpace(parts[1])
totalStr := strings.TrimSpace(parts[2])
usedStr := strings.TrimSpace(parts[3])
freeStr := strings.TrimSpace(parts[4])
var totalBytes, usedBytes, freeBytes uint64
var usagePercent float64
// Check if memory values are N/A (unified memory devices like GB10)
isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]"
if isNA && isUnifiedMemoryDevice(name) {
// Unified memory device - fall back to system RAM
sysInfo, err := GetSystemRAMInfo()
if err != nil {
xlog.Debug("failed to get system RAM for unified memory device", "error", err, "device", name)
// Still add the GPU but with zero memory info
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
}
totalBytes = sysInfo.Total
usedBytes = sysInfo.Used
freeBytes = sysInfo.Free
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
xlog.Debug("using system RAM for unified memory GPU", "device", name, "system_ram_bytes", totalBytes)
} else if isNA {
// Unknown device with N/A values - skip memory info
xlog.Debug("nvidia-smi returned N/A for unknown device", "device", name)
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
} else {
// Normal GPU with dedicated VRAM
totalMB, _ := strconv.ParseFloat(totalStr, 64)
usedMB, _ := strconv.ParseFloat(usedStr, 64)
freeMB, _ := strconv.ParseFloat(freeStr, 64)
// Convert MB to bytes
totalBytes = uint64(totalMB * 1024 * 1024)
usedBytes = uint64(usedMB * 1024 * 1024)
freeBytes = uint64(freeMB * 1024 * 1024)
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
}
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getAMDGPUMemory queries AMD GPUs using rocm-smi
func getAMDGPUMemory() []GPUMemoryInfo {
// Check if rocm-smi is available
if _, err := exec.LookPath("rocm-smi"); err != nil {
return nil
}
// Try CSV format first
cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("rocm-smi failed", "error", err, "stderr", stderr.String())
return nil
}
var gpus []GPUMemoryInfo
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
// Skip header line
for i, line := range lines {
if i == 0 || line == "" {
continue
}
parts := strings.Split(line, ",")
if len(parts) < 3 {
continue
}
// Parse GPU index from first column (usually "GPU[0]" format)
idxStr := strings.TrimSpace(parts[0])
idx := 0
if strings.HasPrefix(idxStr, "GPU[") {
idxStr = strings.TrimPrefix(idxStr, "GPU[")
idxStr = strings.TrimSuffix(idxStr, "]")
idx, _ = strconv.Atoi(idxStr)
}
// Parse memory values (in bytes or MB depending on rocm-smi version)
usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64)
totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64)
// If values seem like MB, convert to bytes
if totalBytes < 1000000 {
usedBytes *= 1024 * 1024
totalBytes *= 1024 * 1024
}
freeBytes := uint64(0)
if totalBytes > usedBytes {
freeBytes = totalBytes - usedBytes
}
usagePercent := 0.0
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: "AMD GPU",
Vendor: VendorAMD,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top
func getIntelGPUMemory() []GPUMemoryInfo {
// Try xpu-smi first (Intel's official GPU management tool)
gpus := getIntelXPUSMI()
if len(gpus) > 0 {
return gpus
}
// Fallback to intel_gpu_top
return getIntelGPUTop()
}
// getIntelXPUSMI queries Intel GPUs using xpu-smi
func getIntelXPUSMI() []GPUMemoryInfo {
if _, err := exec.LookPath("xpu-smi"); err != nil {
return nil
}
// Get device list
cmd := exec.Command("xpu-smi", "discovery", "--json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("xpu-smi discovery failed", "error", err, "stderr", stderr.String())
return nil
}
// Parse JSON output
var result struct {
DeviceList []struct {
DeviceID int `json:"device_id"`
DeviceName string `json:"device_name"`
VendorName string `json:"vendor_name"`
MemoryPhysicalSizeBytes uint64 `json:"memory_physical_size_byte"`
} `json:"device_list"`
}
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
xlog.Debug("failed to parse xpu-smi discovery output", "error", err)
return nil
}
var gpus []GPUMemoryInfo
for _, device := range result.DeviceList {
// Get memory usage for this device
statsCmd := exec.Command("xpu-smi", "stats", "-d", strconv.Itoa(device.DeviceID), "--json")
var statsStdout bytes.Buffer
statsCmd.Stdout = &statsStdout
usedBytes := uint64(0)
if err := statsCmd.Run(); err == nil {
var stats struct {
DeviceID int `json:"device_id"`
MemoryUsed uint64 `json:"memory_used"`
}
if err := json.Unmarshal(statsStdout.Bytes(), &stats); err == nil {
usedBytes = stats.MemoryUsed
}
}
totalBytes := device.MemoryPhysicalSizeBytes
freeBytes := uint64(0)
if totalBytes > usedBytes {
freeBytes = totalBytes - usedBytes
}
usagePercent := 0.0
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
gpus = append(gpus, GPUMemoryInfo{
Index: device.DeviceID,
Name: device.DeviceName,
Vendor: VendorIntel,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getIntelGPUTop queries Intel GPUs using intel_gpu_top
func getIntelGPUTop() []GPUMemoryInfo {
if _, err := exec.LookPath("intel_gpu_top"); err != nil {
return nil
}
// intel_gpu_top with -J outputs JSON, -s 1 for single sample
cmd := exec.Command("intel_gpu_top", "-J", "-s", "1")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String())
return nil
}
// Parse JSON output - intel_gpu_top outputs NDJSON
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
if len(lines) == 0 {
return nil
}
// Take the last complete JSON object
var lastJSON string
for i := len(lines) - 1; i >= 0; i-- {
if strings.HasPrefix(strings.TrimSpace(lines[i]), "{") {
lastJSON = lines[i]
break
}
}
if lastJSON == "" {
return nil
}
var result struct {
Engines map[string]interface{} `json:"engines"`
// Memory info if available
}
if err := json.Unmarshal([]byte(lastJSON), &result); err != nil {
xlog.Debug("failed to parse intel_gpu_top output", "error", err)
return nil
}
// intel_gpu_top doesn't always provide memory info
// Return empty if we can't get useful data
return nil
}
// GetResourceInfo returns GPU info if available, otherwise system RAM info
func GetResourceInfo() ResourceInfo {
gpus := GetGPUMemoryUsage()
if len(gpus) > 0 {
// GPU available - return GPU info
aggregate := GetGPUAggregateInfo()
return ResourceInfo{
Type: "gpu",
Available: true,
GPUs: gpus,
RAM: nil,
Aggregate: AggregateMemoryInfo{
TotalMemory: aggregate.TotalVRAM,
UsedMemory: aggregate.UsedVRAM,
FreeMemory: aggregate.FreeVRAM,
UsagePercent: aggregate.UsagePercent,
GPUCount: aggregate.GPUCount,
},
}
}
// No GPU - fall back to system RAM
ramInfo, err := GetSystemRAMInfo()
if err != nil {
xlog.Debug("failed to get system RAM info", "error", err)
return ResourceInfo{
Type: "ram",
Available: false,
Aggregate: AggregateMemoryInfo{},
}
}
return ResourceInfo{
Type: "ram",
Available: true,
GPUs: nil,
RAM: ramInfo,
Aggregate: AggregateMemoryInfo{
TotalMemory: ramInfo.Total,
UsedMemory: ramInfo.Used,
FreeMemory: ramInfo.Free,
UsagePercent: ramInfo.UsagePercent,
GPUCount: 0,
},
}
}
// GetResourceAggregateInfo returns aggregate memory info (GPU if available, otherwise RAM)
// This is used by the memory reclaimer to check memory usage
func GetResourceAggregateInfo() AggregateMemoryInfo {
resourceInfo := GetResourceInfo()
return resourceInfo.Aggregate
}
// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
// Note: Vulkan provides memory heap info but not real-time usage
func getVulkanGPUMemory() []GPUMemoryInfo {
if _, err := exec.LookPath("vulkaninfo"); err != nil {
return nil
}
cmd := exec.Command("vulkaninfo", "--json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("vulkaninfo failed", "error", err, "stderr", stderr.String())
return nil
}
// Parse Vulkan JSON output
var result struct {
VkPhysicalDevices []struct {
DeviceName string `json:"deviceName"`
DeviceType string `json:"deviceType"`
VkPhysicalDeviceMemoryProperties struct {
MemoryHeaps []struct {
Flags int `json:"flags"`
Size uint64 `json:"size"`
} `json:"memoryHeaps"`
} `json:"VkPhysicalDeviceMemoryProperties"`
} `json:"VkPhysicalDevices"`
}
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
xlog.Debug("failed to parse vulkaninfo output", "error", err)
return nil
}
var gpus []GPUMemoryInfo
for i, device := range result.VkPhysicalDevices {
// Skip non-discrete/integrated GPUs if possible
if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
continue
}
// Sum up device-local memory heaps
var totalVRAM uint64
for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
if heap.Flags&1 != 0 {
totalVRAM += heap.Size
}
}
if totalVRAM == 0 {
continue
}
gpus = append(gpus, GPUMemoryInfo{
Index: i,
Name: device.DeviceName,
Vendor: VendorVulkan,
TotalVRAM: totalVRAM,
UsedVRAM: 0, // Vulkan doesn't provide real-time usage
FreeVRAM: totalVRAM,
UsagePercent: 0,
})
}
return gpus
}