Files
arnis/analyze_performance.py
2026-01-27 22:50:00 +01:00

184 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""Analyze performance data from Windows Performance Monitor CSV exports."""
import csv
from datetime import datetime
from pathlib import Path
def parse_pdh_csv(filepath):
"""Parse a Windows Performance Monitor CSV file."""
data = []
with open(filepath, 'r', encoding='utf-8-sig', errors='replace') as f:
reader = csv.reader(f)
header = next(reader)
# Clean up column names - extract the metric name
clean_cols = []
for col in header:
if 'Verfügbare MB' in col or 'Verf' in col:
clean_cols.append('available_mb')
elif 'Zugesicherte' in col:
clean_cols.append('committed_pct')
elif 'Bytes geschrieben' in col:
clean_cols.append('disk_write_bytes_sec')
elif 'Arbeitsseiten' in col and 'arnis-windows' not in col:
clean_cols.append('working_set')
elif 'Arbeitsseiten' in col and 'arnis-windows' in col:
clean_cols.append('gui_working_set')
elif 'Private Bytes' in col and 'arnis-windows' not in col:
clean_cols.append('private_bytes')
elif 'Private Bytes' in col and 'arnis-windows' in col:
clean_cols.append('gui_private_bytes')
elif 'Prozessorzeit' in col and 'arnis-windows' not in col and 'Prozessorinformationen' not in col:
clean_cols.append('cpu_pct')
elif 'Prozessorzeit' in col and 'arnis-windows' in col:
clean_cols.append('gui_cpu_pct')
elif 'Threadanzahl' in col and 'arnis-windows' not in col:
clean_cols.append('thread_count')
elif 'Threadanzahl' in col and 'arnis-windows' in col:
clean_cols.append('gui_thread_count')
elif 'PDH-CSV' in col:
clean_cols.append('timestamp')
else:
clean_cols.append(col[:30]) # truncate long names
for row in reader:
if not row or not row[0].strip():
continue
entry = {}
for i, val in enumerate(row):
if i >= len(clean_cols):
break
col_name = clean_cols[i]
if col_name == 'timestamp':
try:
entry[col_name] = datetime.strptime(val.strip(), '%m/%d/%Y %H:%M:%S.%f')
except:
entry[col_name] = val
elif val.strip() == '' or val.strip() == ' ':
entry[col_name] = None
else:
try:
entry[col_name] = float(val)
except:
entry[col_name] = val
data.append(entry)
return data
def analyze_run(data, name):
"""Analyze a single run's data."""
print(f"\n{'='*60}")
print(f" {name}")
print(f"{'='*60}")
# Time range
timestamps = [d.get('timestamp') for d in data if isinstance(d.get('timestamp'), datetime)]
if timestamps:
duration = (timestamps[-1] - timestamps[0]).total_seconds()
print(f"Duration: {duration:.1f}s ({duration/60:.1f} min)")
# Memory usage (working set) - prefer 'working_set' (arnis backend) over gui_working_set
working_sets = [d.get('working_set') for d in data if d.get('working_set') is not None]
gui_ws = [d.get('gui_working_set') for d in data if d.get('gui_working_set') is not None]
# Use GUI working set if backend working set not available (before scenario)
if working_sets:
max_ws = max(working_sets) / (1024**3) # GB
avg_ws = sum(working_sets) / len(working_sets) / (1024**3)
print(f"Backend Working Set: max={max_ws:.2f} GB, avg={avg_ws:.2f} GB")
if gui_ws:
max_gui_ws = max(gui_ws) / (1024**3)
print(f"GUI Working Set: max={max_gui_ws:.2f} GB")
# For before, we only have GUI data, so use that as the main metric
if not working_sets:
working_sets = gui_ws
max_ws = max_gui_ws
# Private bytes
private = [d.get('private_bytes') for d in data if d.get('private_bytes') is not None]
if private:
max_private = max(private) / (1024**3)
avg_private = sum(private) / len(private) / (1024**3)
print(f"Private Bytes: max={max_private:.2f} GB, avg={avg_private:.2f} GB")
# Available system memory
avail = [d.get('available_mb') for d in data if d.get('available_mb') is not None]
if avail:
min_avail = min(avail) / 1024 # GB
max_avail = max(avail) / 1024
print(f"System Available Memory: min={min_avail:.2f} GB, max={max_avail:.2f} GB")
# CPU usage
cpu = [d.get('cpu_pct') for d in data if d.get('cpu_pct') is not None]
if cpu:
max_cpu = max(cpu)
avg_cpu = sum(cpu) / len(cpu)
print(f"CPU %: max={max_cpu:.1f}%, avg={avg_cpu:.1f}%")
# Thread count
threads = [d.get('thread_count') for d in data if d.get('thread_count') is not None]
if threads:
max_threads = max(threads)
print(f"Thread count: max={int(max_threads)}")
# Disk writes
disk = [d.get('disk_write_bytes_sec') for d in data if d.get('disk_write_bytes_sec') is not None]
if disk:
max_disk = max(disk) / (1024**2) # MB/s
avg_disk = sum(disk) / len(disk) / (1024**2)
print(f"Disk Write: max={max_disk:.1f} MB/s, avg={avg_disk:.1f} MB/s")
return {
'duration': duration if timestamps else 0,
'max_working_set_gb': max(working_sets) / (1024**3) if working_sets else 0,
'max_private_bytes_gb': max(private) / (1024**3) if private else 0,
'avg_cpu': sum(cpu) / len(cpu) if cpu else 0,
'max_cpu': max(cpu) if cpu else 0,
}
def main():
print("Performance Analysis: BEFORE vs AFTER Parallel Processing")
print("=" * 60)
before_path = Path("arnis_before.csv")
after_path = Path("arnis_after.csv")
if before_path.exists():
before_data = parse_pdh_csv(before_path)
before_stats = analyze_run(before_data, "BEFORE (Sequential)")
else:
print("arnis_before.csv not found")
before_stats = None
if after_path.exists():
after_data = parse_pdh_csv(after_path)
after_stats = analyze_run(after_data, "AFTER (Parallel)")
else:
print("arnis_after.csv not found")
after_stats = None
# Comparison
if before_stats and after_stats:
print(f"\n{'='*60}")
print(" COMPARISON")
print(f"{'='*60}")
time_diff = after_stats['duration'] - before_stats['duration']
time_ratio = after_stats['duration'] / before_stats['duration'] if before_stats['duration'] > 0 else 0
print(f"Duration: {before_stats['duration']:.1f}s -> {after_stats['duration']:.1f}s ({time_ratio:.2f}x, {time_diff:+.1f}s)")
mem_ratio = after_stats['max_working_set_gb'] / before_stats['max_working_set_gb'] if before_stats['max_working_set_gb'] > 0 else 0
print(f"Peak Memory: {before_stats['max_working_set_gb']:.2f} GB -> {after_stats['max_working_set_gb']:.2f} GB ({mem_ratio:.2f}x)")
cpu_diff = after_stats['avg_cpu'] - before_stats['avg_cpu']
print(f"Avg CPU: {before_stats['avg_cpu']:.1f}% -> {after_stats['avg_cpu']:.1f}% ({cpu_diff:+.1f}%)")
if __name__ == '__main__':
main()