mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-02 04:16:56 -04:00
The Go parent-death watcher (pkg/grpc/parentwatch.go, commit 772b435d5)
only protects backends that route through pkg/grpc. C++ and Python
backends don't, so the originally-reported case — the llama.cpp gRPC
worker surviving a non-graceful LocalAI death — was still uncovered.
Extend the same best-effort backstop to both languages, reusing the
exact mechanism and semantics:
- capture getppid() at startup, skip if already orphaned (<=1)
- a background thread polls getppid() and self-exits on reparenting
(getppid() != orig || == 1), portable across Linux/macOS, no-op on
Windows
- same env vars: LOCALAI_BACKEND_PARENT_WATCH (default on; falsy
false/0/no/off disable) and LOCALAI_BACKEND_PARENT_WATCH_INTERVAL
(default 2s; accepts Go-style durations like 500ms/2s/1m)
C++: implemented in backend/cpp/llama-cpp (the reported, most-used C++
backend) as a dependency-free header parent_watch.h, wired into
grpc-server.cpp's main() and copied at build time via prepare.sh. C++
backends have no shared server scaffolding, so other C++ backends
(ds4, ik-llama-cpp, privacy-filter, ...) are not yet covered and would
each need the same one-line include+call as follow-ups.
Python: implemented once in the shared common/parent_watch.py and armed
from common/grpc_auth.py's get_auth_interceptors() — the single helper
every one of the 35 Python backends invokes while building its gRPC
server — so all Python backends (and future ones) are covered with no
per-backend edits and no duplicated implementation.
Tests (real process-tree reparent detection, mirroring the Go test):
- backend/cpp/llama-cpp/parent_watch_test.cpp (via run-unit-tests.sh)
- backend/python/common/parent_watch_test.py (python -m unittest)
Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
150 lines
5.6 KiB
Python
150 lines
5.6 KiB
Python
"""Parent-death watcher (best-effort backstop) for LocalAI Python backends.
|
|
|
|
LocalAI spawns each backend as a child process and, on a clean shutdown, tears
|
|
it down itself (SIGTERM -> grace -> SIGKILL). That graceful path only runs when
|
|
LocalAI receives a catchable signal and lives long enough to run its handlers.
|
|
If LocalAI is SIGKILLed (e.g. a supervising process's grace period elapses
|
|
first), that teardown never runs and this backend would be reparented to init
|
|
and linger, holding GPU/VRAM and its listen port.
|
|
|
|
The watcher here is a best-effort backstop for exactly that case: it does NOT
|
|
replace the graceful teardown, it only covers the "parent vanished without
|
|
cleaning up" path. It detects reparenting: when the process that spawned this
|
|
backend dies, the kernel reparents us to the nearest sub-reaper or to init
|
|
(PID 1), so os.getppid() stops matching the value captured at startup. This
|
|
getppid() approach is portable across Linux/macOS (unlike the Linux-only
|
|
PR_SET_PDEATHSIG), which is why it is used here, mirroring the Go backends'
|
|
pkg/grpc/parentwatch.go and the C++ backends' parent_watch.h. It is disabled on
|
|
Windows, which has no equivalent orphan-reparenting semantics.
|
|
|
|
Env vars (shared verbatim across the Go, C++ and Python backends):
|
|
LOCALAI_BACKEND_PARENT_WATCH enabled by default; a falsey value
|
|
("false"/"0"/"no"/"off", case-insensitive)
|
|
disables it.
|
|
LOCALAI_BACKEND_PARENT_WATCH_INTERVAL poll interval as a Go-style duration
|
|
string ("500ms", "2s", "1m") or a bare
|
|
number of seconds. Defaults to 2s.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import threading
|
|
|
|
ENV_PARENT_WATCH = "LOCALAI_BACKEND_PARENT_WATCH"
|
|
ENV_PARENT_WATCH_INTERVAL = "LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"
|
|
|
|
_DEFAULT_INTERVAL_SECONDS = 2.0
|
|
|
|
# Guard so repeated calls (e.g. get_auth_interceptors invoked more than once)
|
|
# only ever arm a single watcher thread per process.
|
|
_started = False
|
|
_started_lock = threading.Lock()
|
|
|
|
|
|
def _enabled():
|
|
"""Report whether the watcher should run in this process."""
|
|
# Windows does not reparent orphans to a well-known init PID, so the
|
|
# getppid() heuristic used here doesn't apply there.
|
|
if os.name == "nt" or sys.platform.startswith("win"):
|
|
return False
|
|
val = os.environ.get(ENV_PARENT_WATCH, "").strip().lower()
|
|
if val in ("false", "0", "no", "off"):
|
|
return False
|
|
return True
|
|
|
|
|
|
def _interval_seconds():
|
|
"""Return the configured poll interval in seconds, or the default.
|
|
|
|
Accepts Go-style duration strings ("500ms", "2s", "1m") for cross-language
|
|
parity, or a bare number interpreted as seconds.
|
|
"""
|
|
raw = os.environ.get(ENV_PARENT_WATCH_INTERVAL, "").strip()
|
|
if not raw:
|
|
return _DEFAULT_INTERVAL_SECONDS
|
|
# Split numeric prefix from unit suffix.
|
|
i = 0
|
|
while i < len(raw) and (raw[i].isdigit() or raw[i] == "." or (i == 0 and raw[i] in "+-")):
|
|
i += 1
|
|
if i == 0:
|
|
return _DEFAULT_INTERVAL_SECONDS
|
|
try:
|
|
num = float(raw[:i])
|
|
except ValueError:
|
|
return _DEFAULT_INTERVAL_SECONDS
|
|
unit = raw[i:].lower()
|
|
if unit == "ms":
|
|
seconds = num / 1000.0
|
|
elif unit in ("s", ""):
|
|
seconds = num
|
|
elif unit == "m":
|
|
seconds = num * 60.0
|
|
else:
|
|
return _DEFAULT_INTERVAL_SECONDS
|
|
return seconds if seconds > 0 else _DEFAULT_INTERVAL_SECONDS
|
|
|
|
|
|
def _parent_died(orig_ppid):
|
|
"""Report whether this process has been reparented away from orig_ppid.
|
|
|
|
Reparenting is the standard POSIX signal that the original parent (here, the
|
|
LocalAI process that spawned this backend) has exited: the orphan is handed
|
|
to the nearest sub-reaper or to init (PID 1), so os.getppid() no longer
|
|
matches the value captured at startup.
|
|
"""
|
|
ppid = os.getppid()
|
|
return ppid != orig_ppid or ppid == 1
|
|
|
|
|
|
def _watch(orig_ppid, interval, on_death):
|
|
"""Poll until _parent_died reports the original parent is gone, then call
|
|
on_death. Blocks, so run it on its own (daemon) thread."""
|
|
import time
|
|
|
|
while True:
|
|
time.sleep(interval)
|
|
if _parent_died(orig_ppid):
|
|
on_death()
|
|
return
|
|
|
|
|
|
def start_parent_death_watcher():
|
|
"""Install the best-effort safety net described in this module's docstring.
|
|
|
|
No-op when disabled, on Windows, when already orphaned at startup
|
|
(os.getppid() <= 1), or if already started. This is a backstop alongside —
|
|
never a replacement for — LocalAI's graceful teardown.
|
|
"""
|
|
global _started
|
|
if not _enabled():
|
|
return
|
|
with _started_lock:
|
|
if _started:
|
|
return
|
|
orig_ppid = os.getppid()
|
|
# A parent of 1 (or less) at startup means we were already orphaned (or
|
|
# launched directly under init) — there is no original parent to watch.
|
|
if orig_ppid <= 1:
|
|
return
|
|
interval = _interval_seconds()
|
|
|
|
def on_death():
|
|
print(
|
|
"backend parent process (pid {}) exited without stopping this "
|
|
"backend; self-terminating to avoid orphaning".format(orig_ppid),
|
|
file=sys.stderr,
|
|
flush=True,
|
|
)
|
|
# Immediate, non-cleanup exit: this is a shutdown safety net and the
|
|
# normal graceful path is already gone.
|
|
os._exit(1)
|
|
|
|
thread = threading.Thread(
|
|
target=_watch,
|
|
args=(orig_ppid, interval, on_death),
|
|
name="parent-death-watcher",
|
|
daemon=True,
|
|
)
|
|
thread.start()
|
|
_started = True
|