Compare commits

...

4 Commits

Author SHA1 Message Date
Alex Cheema
f31d51ecc0 Merge branch 'main' into alexcheema/fix-memory-reporting 2026-02-16 05:34:40 -08:00
Alex Cheema
859f593883 fix: exclude distributed test script from pytest collection and apply formatting
The start_distributed_test.py script calls sys.exit() at module level,
crashing pytest collection. Add --ignore to pytest addopts to skip it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:07:34 -08:00
Alex Cheema
9c3c569d9f Merge remote-tracking branch 'origin/main' into alexcheema/fix-memory-reporting 2026-02-16 04:50:14 -08:00
Alex Cheema
8c57df8b37 fix: enable psutil fallback for memory monitoring when macmon is missing on macOS
On Darwin, the psutil memory poller was disabled (memory_poll_rate=None),
relying entirely on macmon. When macmon is not installed, no memory data
was reported, causing nodes to show zero memory in the cluster state and
blocking shard placement.

Now falls back to psutil-based memory polling when macmon is not found.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 08:34:54 -08:00
2 changed files with 7 additions and 1 deletions

View File

@@ -132,7 +132,7 @@ markers = [
env = [
"EXO_TESTS=1"
]
addopts = "-m 'not slow'"
addopts = "-m 'not slow' --ignore=tests/start_distributed_test.py"
filterwarnings = [
"ignore:builtin type Swig:DeprecationWarning",
]

View File

@@ -388,6 +388,12 @@ class InfoGatherer:
if IS_DARWIN:
if (macmon_path := shutil.which("macmon")) is not None:
tg.start_soon(self._monitor_macmon, macmon_path)
else:
# macmon not installed — fall back to psutil for memory
logger.warning(
"macmon not found, falling back to psutil for memory monitoring"
)
self.memory_poll_rate = 1
tg.start_soon(self._monitor_system_profiler_thunderbolt_data)
tg.start_soon(self._monitor_thunderbolt_bridge_status)
tg.start_soon(self._monitor_rdma_ctl_status)