Files
LocalAI/backend/python/common/parent_watch_test.py
Ettore Di Giacinto 94e3e06b8b fix(process): extend parent-death backstop to C++ and Python backends
The Go parent-death watcher (pkg/grpc/parentwatch.go, commit 772b435d5)
only protects backends that route through pkg/grpc. C++ and Python
backends don't, so the originally-reported case — the llama.cpp gRPC
worker surviving a non-graceful LocalAI death — was still uncovered.

Extend the same best-effort backstop to both languages, reusing the
exact mechanism and semantics:

- capture getppid() at startup, skip if already orphaned (<=1)
- a background thread polls getppid() and self-exits on reparenting
  (getppid() != orig || == 1), portable across Linux/macOS, no-op on
  Windows
- same env vars: LOCALAI_BACKEND_PARENT_WATCH (default on; falsy
  false/0/no/off disable) and LOCALAI_BACKEND_PARENT_WATCH_INTERVAL
  (default 2s; accepts Go-style durations like 500ms/2s/1m)

C++: implemented in backend/cpp/llama-cpp (the reported, most-used C++
backend) as a dependency-free header parent_watch.h, wired into
grpc-server.cpp's main() and copied at build time via prepare.sh. C++
backends have no shared server scaffolding, so other C++ backends
(ds4, ik-llama-cpp, privacy-filter, ...) are not yet covered and would
each need the same one-line include+call as follow-ups.

Python: implemented once in the shared common/parent_watch.py and armed
from common/grpc_auth.py's get_auth_interceptors() — the single helper
every one of the 35 Python backends invokes while building its gRPC
server — so all Python backends (and future ones) are covered with no
per-backend edits and no duplicated implementation.

Tests (real process-tree reparent detection, mirroring the Go test):
- backend/cpp/llama-cpp/parent_watch_test.cpp (via run-unit-tests.sh)
- backend/python/common/parent_watch_test.py (python -m unittest)

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-07-02 07:31:31 +00:00

151 lines
5.2 KiB
Python

"""Unit tests for the parent-death watcher (parent_watch.py).
Run standalone (Python standard library only, no backend venv needed):
python3 -m unittest parent_watch_test
The core test (test_detects_reparent) builds a genuine two-level process tree
(test -> middle -> grandchild) with os.fork, lets the middle process die, and
asserts the grandchild's parent_watch._watch detects the reparenting and
self-terminates — mirroring the Go test in pkg/grpc/parentwatch_test.go and the
C++ test in backend/cpp/llama-cpp/parent_watch_test.cpp.
"""
import os
import sys
import tempfile
import threading
import time
import unittest
import parent_watch
class TestParentWatchEnvParsing(unittest.TestCase):
def setUp(self):
self._saved = {
k: os.environ.get(k)
for k in (parent_watch.ENV_PARENT_WATCH, parent_watch.ENV_PARENT_WATCH_INTERVAL)
}
for k in self._saved:
os.environ.pop(k, None)
def tearDown(self):
for k, v in self._saved.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
def test_interval_default(self):
self.assertEqual(parent_watch._interval_seconds(), 2.0)
def test_interval_units(self):
cases = {"500ms": 0.5, "2s": 2.0, "1m": 60.0, "3": 3.0, "0.5s": 0.5}
for raw, expected in cases.items():
os.environ[parent_watch.ENV_PARENT_WATCH_INTERVAL] = raw
self.assertAlmostEqual(parent_watch._interval_seconds(), expected, msg=raw)
def test_interval_garbage_falls_back(self):
os.environ[parent_watch.ENV_PARENT_WATCH_INTERVAL] = "garbage"
self.assertEqual(parent_watch._interval_seconds(), 2.0)
@unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only")
def test_enabled_default(self):
self.assertTrue(parent_watch._enabled())
@unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only")
def test_disabled_by_falsey(self):
for val in ("false", "0", "no", "off", "OFF", " False "):
os.environ[parent_watch.ENV_PARENT_WATCH] = val
self.assertFalse(parent_watch._enabled(), msg=val)
@unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only")
def test_enabled_by_truthy(self):
for val in ("true", "1", "yes", "on"):
os.environ[parent_watch.ENV_PARENT_WATCH] = val
self.assertTrue(parent_watch._enabled(), msg=val)
@unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "fork/reparent is POSIX only")
class TestParentWatchReparent(unittest.TestCase):
def _wait_for_file(self, path, timeout=10.0):
deadline = time.time() + timeout
while time.time() < deadline:
if os.path.exists(path):
return True
time.sleep(0.02)
return False
def test_detects_reparent(self):
tmpdir = tempfile.mkdtemp(prefix="parentwatch_test_")
ready_file = os.path.join(tmpdir, "ready")
exited_file = os.path.join(tmpdir, "exited")
middle = os.fork()
if middle == 0:
# ---- middle process ----
grandchild = os.fork()
if grandchild == 0:
# ---- grandchild process: arm the REAL watcher against middle ----
orig_ppid = os.getppid()
def on_death():
with open(exited_file, "w") as f:
f.write("1")
os._exit(7)
threading.Thread(
target=parent_watch._watch,
args=(orig_ppid, 0.05, on_death),
daemon=True,
).start()
# Safety valve: never linger if something goes wrong.
def bail():
time.sleep(30)
os._exit(2)
threading.Thread(target=bail, daemon=True).start()
# Signal readiness only after the watcher captured orig_ppid.
with open(ready_file, "w") as f:
f.write(str(os.getpid()))
while True:
time.sleep(1)
else:
# middle: wait until grandchild is ready, then exit to orphan it.
if not self._wait_for_file(ready_file):
os._exit(5)
os._exit(0)
# ---- test (top) process ----
os.waitpid(middle, 0) # reap middle only; grandchild is orphaned
self.assertTrue(os.path.exists(ready_file), "grandchild never signaled readiness")
self.assertTrue(
self._wait_for_file(exited_file),
"watcher did not detect parent death within timeout",
)
# Best-effort cleanup: kill the grandchild if it somehow survived.
try:
with open(ready_file) as f:
pid = int(f.read().strip())
if pid > 1:
os.kill(pid, 9)
except (OSError, ValueError):
pass
for p in (ready_file, exited_file):
try:
os.remove(p)
except OSError:
pass
try:
os.rmdir(tmpdir)
except OSError:
pass
if __name__ == "__main__":
unittest.main()