mirror of
https://github.com/nicolargo/glances.git
synced 2025-12-23 22:18:31 -05:00
Merge work done by @meatposes
This commit is contained in:
@@ -182,6 +182,10 @@ mem_critical=90
|
||||
temperature_careful=60
|
||||
temperature_warning=70
|
||||
temperature_critical=80
|
||||
# Ignore specific GPU devices by ID (comma-separated)
|
||||
# Use 'xpu-smi discovery' to see device IDs
|
||||
# Example: ignore device 2 (typically the iGPU)
|
||||
#ignore_devices=2
|
||||
|
||||
[mem]
|
||||
disable=False
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#
|
||||
# Copyright (C) 2020 Kirby Banman <kirby.banman@gmail.com>
|
||||
# Copyright (C) 2024 Nicolas Hennion <nicolashennion@gmail.com>
|
||||
# Intel GPU support added (poorly) 2025 by <computerdork@verion.net>
|
||||
#
|
||||
# SPDX-License-Identifier: LGPL-3.0-only
|
||||
#
|
||||
@@ -12,11 +13,13 @@
|
||||
Currently supported:
|
||||
- NVIDIA GPU (need pynvml lib)
|
||||
- AMD GPU (no lib needed)
|
||||
- Intel GPU (need xpumcli, requires root/sudo right for utilization)
|
||||
"""
|
||||
|
||||
from glances.globals import to_fahrenheit
|
||||
from glances.logger import logger
|
||||
from glances.plugins.gpu.cards.amd import AmdGPU
|
||||
from glances.plugins.gpu.cards.intel import IntelGPU
|
||||
from glances.plugins.gpu.cards.nvidia import NvidiaGPU
|
||||
from glances.plugins.plugin.model import GlancesPluginModel
|
||||
|
||||
@@ -90,13 +93,24 @@ class GpuPlugin(GlancesPluginModel):
|
||||
logger.debug(f'AMD GPU initialization error: {e}')
|
||||
self.amd = None
|
||||
|
||||
# Init the Intel GPU API
|
||||
try:
|
||||
self.intel = IntelGPU(config=config)
|
||||
except Exception as e:
|
||||
logger.debug(f'Intel GPU initialization error: {e}')
|
||||
self.intel = None
|
||||
|
||||
# We want to display the stat in the curse interface
|
||||
self.display_curse = True
|
||||
|
||||
def exit(self):
|
||||
"""Overwrite the exit method to close the GPU API."""
|
||||
self.nvidia.exit()
|
||||
self.amd.exit()
|
||||
if self.nvidia:
|
||||
self.nvidia.exit()
|
||||
if self.amd:
|
||||
self.amd.exit()
|
||||
if self.intel:
|
||||
self.intel.exit()
|
||||
|
||||
# Call the father exit method
|
||||
super().exit()
|
||||
@@ -117,6 +131,8 @@ class GpuPlugin(GlancesPluginModel):
|
||||
stats.extend(self.nvidia.get_device_stats())
|
||||
if self.amd:
|
||||
stats.extend(self.amd.get_device_stats())
|
||||
if self.intel:
|
||||
stats.extend(self.intel.get_device_stats())
|
||||
|
||||
# !!!
|
||||
# Uncomment to test on computer without Nvidia GPU
|
||||
|
||||
281
glances/plugins/gpu/cards/intel.py
Normal file
281
glances/plugins/gpu/cards/intel.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#
|
||||
# This file is part of Glances.
|
||||
#
|
||||
# Intel GPU support added (poorly) 2025 by <computerdork@verion.net>
|
||||
#
|
||||
# SPDX-License-Identifier: LGPL-3.0-only
|
||||
#
|
||||
|
||||
"""Intel GPU card for Glances."""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
from glances.logger import logger
|
||||
|
||||
|
||||
class IntelGPU:
|
||||
"""Intel GPU card (Arc, Xe) using xpumcli + fdinfo."""
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Init Intel GPU detection."""
|
||||
self.ready = False
|
||||
self.device_count = 0
|
||||
self.pci_to_id = {}
|
||||
self.fdinfo_last = {}
|
||||
self.config = config
|
||||
|
||||
# Parse ignore_devices from config
|
||||
self.ignore_devices = set()
|
||||
if config:
|
||||
try:
|
||||
ignore_str = config.get_value('gpu', 'ignore_devices', default='')
|
||||
if ignore_str:
|
||||
self.ignore_devices = {int(x.strip()) for x in ignore_str.split(',') if x.strip()}
|
||||
logger.debug(f"Intel GPU ignoring devices: {self.ignore_devices}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Error parsing ignore_devices: {e}")
|
||||
|
||||
# Detect which command is available: xpu-smi (newer) or xpumcli (older)
|
||||
self.xpumcli_cmd = None
|
||||
for cmd in ['xpu-smi', 'xpumcli']:
|
||||
try:
|
||||
result = subprocess.run([cmd, '--version'], capture_output=True, timeout=2)
|
||||
if result.returncode == 0:
|
||||
self.xpumcli_cmd = cmd
|
||||
logger.debug(f"Found Intel GPU tool: {cmd}")
|
||||
break
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
continue
|
||||
|
||||
if not self.xpumcli_cmd:
|
||||
logger.debug("Neither xpu-smi nor xpumcli found, Intel GPU support disabled")
|
||||
return
|
||||
|
||||
# Get Intel GPU device list
|
||||
try:
|
||||
result = subprocess.run([self.xpumcli_cmd, 'discovery', '-j'], capture_output=True, text=True, timeout=5)
|
||||
|
||||
if result.returncode == 0:
|
||||
data = json.loads(result.stdout)
|
||||
devices = data.get('device_list', [])
|
||||
self.device_count = len(devices)
|
||||
|
||||
# Build PCI address mapping
|
||||
for device in devices:
|
||||
device_id = device.get('device_id')
|
||||
pci_addr = device.get('pci_bdf_address', '').lower()
|
||||
if device_id is not None and pci_addr:
|
||||
self.pci_to_id[pci_addr] = device_id
|
||||
|
||||
if self.device_count > 0:
|
||||
self.ready = True
|
||||
logger.debug(f"Intel GPU support initialized: {self.device_count} device(s)")
|
||||
except Exception as e:
|
||||
logger.debug(f"Intel GPU initialization failed: {e}")
|
||||
|
||||
def get_device_stats(self):
|
||||
"""Get Intel GPU stats.
|
||||
|
||||
Returns list of dicts with GPU stats.
|
||||
"""
|
||||
if not self.ready:
|
||||
return []
|
||||
|
||||
stats = []
|
||||
|
||||
# Get GPU utilization from fdinfo
|
||||
intel_util = self._get_fdinfo_utilization()
|
||||
|
||||
# Query each Intel GPU
|
||||
for xpu_device_id in range(self.device_count):
|
||||
# Skip ignored devices
|
||||
if xpu_device_id in self.ignore_devices:
|
||||
logger.debug(f"Skipping ignored Intel GPU device {xpu_device_id}")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self.xpumcli_cmd, 'stats', '-j', '-d', str(xpu_device_id)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
continue
|
||||
|
||||
data = json.loads(result.stdout)
|
||||
device_level = data.get('device_level', [])
|
||||
|
||||
device_stats = {
|
||||
'key': 'gpu_id',
|
||||
'gpu_id': f'intel{xpu_device_id}',
|
||||
'name': self._get_device_name(xpu_device_id),
|
||||
'mem': self._extract_metric(device_level, 'XPUM_STATS_MEMORY_UTILIZATION'),
|
||||
'proc': intel_util.get(xpu_device_id, 0.0),
|
||||
'temperature': self._extract_metric(device_level, 'XPUM_STATS_MEMORY_TEMPERATURE'),
|
||||
'fan_speed': None, # Not available
|
||||
}
|
||||
|
||||
# Set None for invalid values
|
||||
if device_stats['mem'] <= 0:
|
||||
device_stats['mem'] = None
|
||||
if device_stats['temperature'] <= 0:
|
||||
device_stats['temperature'] = None
|
||||
|
||||
stats.append(device_stats)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error getting Intel GPU {xpu_device_id} stats: {e}")
|
||||
continue
|
||||
|
||||
return stats
|
||||
|
||||
def _get_device_name(self, device_id):
|
||||
"""Get Intel GPU device name."""
|
||||
try:
|
||||
result = subprocess.run([self.xpumcli_cmd, 'discovery', '-j'], capture_output=True, text=True, timeout=5)
|
||||
|
||||
if result.returncode == 0:
|
||||
data = json.loads(result.stdout)
|
||||
for device in data.get('device_list', []):
|
||||
if device.get('device_id') == device_id:
|
||||
name = device.get('device_name', 'Intel GPU')
|
||||
# Clean up name
|
||||
name = name.replace('Intel(R) ', '').replace('Graphics ', '')
|
||||
if not name or name == 'Graphics':
|
||||
# Fallback to PCI device ID
|
||||
pci_id = device.get('pci_device_id', '')
|
||||
if pci_id.startswith('0x'):
|
||||
name = pci_id[2:]
|
||||
else:
|
||||
name = 'Intel GPU'
|
||||
return name
|
||||
except Exception:
|
||||
pass
|
||||
return 'Intel GPU'
|
||||
|
||||
def _extract_metric(self, device_level, metric_type):
|
||||
"""Extract metric from xpumcli device_level array."""
|
||||
for metric in device_level:
|
||||
if metric.get('metrics_type') == metric_type:
|
||||
return metric.get('value', 0)
|
||||
return 0
|
||||
|
||||
def _get_fdinfo_utilization(self):
|
||||
"""Get Intel GPU utilization from /proc/*/fdinfo/*.
|
||||
|
||||
Returns dict of {device_id: utilization_percent}
|
||||
|
||||
Requires root/CAP_SYS_PTRACE to see all processes.
|
||||
"""
|
||||
current_time = time.time()
|
||||
|
||||
# Find all processes with GPU access
|
||||
pci_to_cycles = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for proc_dir in glob.glob('/proc/[0-9]*'):
|
||||
try:
|
||||
fdinfo_dir = os.path.join(proc_dir, 'fdinfo')
|
||||
|
||||
if not os.path.exists(fdinfo_dir):
|
||||
continue
|
||||
|
||||
for fdinfo_file in os.listdir(fdinfo_dir):
|
||||
fdinfo_path = os.path.join(fdinfo_dir, fdinfo_file)
|
||||
|
||||
try:
|
||||
with open(fdinfo_path) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for Intel GPU
|
||||
pci_match = re.search(r'drm-pdev:\s*([0-9a-f:\.]+)', content)
|
||||
if not pci_match or 'drm-cycles-' not in content:
|
||||
continue
|
||||
|
||||
pci_addr = pci_match.group(1).lower()
|
||||
|
||||
# Only process Intel GPUs we know about
|
||||
if pci_addr not in self.pci_to_id:
|
||||
continue
|
||||
|
||||
# Parse engine cycles
|
||||
cycles_pattern = re.compile(r'drm-cycles-(\w+):\s+(\d+)')
|
||||
total_cycles_pattern = re.compile(r'drm-total-cycles-(\w+):\s+(\d+)')
|
||||
|
||||
for match in cycles_pattern.finditer(content):
|
||||
engine = match.group(1)
|
||||
value = int(match.group(2))
|
||||
pci_to_cycles[pci_addr][engine + '_cycles'] += value
|
||||
|
||||
for match in total_cycles_pattern.finditer(content):
|
||||
engine = match.group(1)
|
||||
value = int(match.group(2))
|
||||
key = engine + '_total'
|
||||
pci_to_cycles[pci_addr][key] = max(pci_to_cycles[pci_addr][key], value)
|
||||
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
except (ValueError, OSError, PermissionError):
|
||||
continue
|
||||
|
||||
# Calculate utilization
|
||||
utilization = {}
|
||||
|
||||
for pci_addr, cycles in pci_to_cycles.items():
|
||||
device_id = self.pci_to_id.get(pci_addr)
|
||||
if device_id is None:
|
||||
continue
|
||||
|
||||
# Check if we have a previous measurement
|
||||
if pci_addr not in self.fdinfo_last:
|
||||
# First measurement - store baseline
|
||||
self.fdinfo_last[pci_addr] = {'cycles': dict(cycles), 'time': current_time}
|
||||
utilization[device_id] = 0.0
|
||||
continue
|
||||
|
||||
last = self.fdinfo_last[pci_addr]
|
||||
time_delta = current_time - last['time']
|
||||
|
||||
if time_delta < 0.1:
|
||||
utilization[device_id] = 0.0
|
||||
continue
|
||||
|
||||
# Calculate max utilization across all engines
|
||||
max_util = 0.0
|
||||
engines = {k.replace('_cycles', '').replace('_total', '') for k in cycles.keys()}
|
||||
|
||||
for engine in engines:
|
||||
curr_cycles = cycles.get(engine + '_cycles', 0)
|
||||
curr_total = cycles.get(engine + '_total', 0)
|
||||
prev_cycles = last['cycles'].get(engine + '_cycles', 0)
|
||||
prev_total = last['cycles'].get(engine + '_total', 0)
|
||||
|
||||
delta_cycles = curr_cycles - prev_cycles
|
||||
delta_total = curr_total - prev_total
|
||||
|
||||
if delta_total > 0:
|
||||
engine_util = (delta_cycles / delta_total) * 100.0
|
||||
max_util = max(max_util, engine_util)
|
||||
|
||||
utilization[device_id] = min(100.0, max(0.0, max_util))
|
||||
|
||||
# Update last measurement
|
||||
self.fdinfo_last[pci_addr] = {'cycles': dict(cycles), 'time': current_time}
|
||||
|
||||
# Fill in 0% for devices with no activity
|
||||
for device_id in range(self.device_count):
|
||||
if device_id not in utilization:
|
||||
utilization[device_id] = 0.0
|
||||
|
||||
return utilization
|
||||
|
||||
def exit(self):
|
||||
"""Cleanup (Intel GPU is stateless)."""
|
||||
pass
|
||||
Reference in New Issue
Block a user