From 94e3e06b8bed8ad27e05153103911be74d1c62f1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jul 2026 06:58:55 +0000 Subject: [PATCH] fix(process): extend parent-death backstop to C++ and Python backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Go parent-death watcher (pkg/grpc/parentwatch.go, commit 772b435d5) only protects backends that route through pkg/grpc. C++ and Python backends don't, so the originally-reported case — the llama.cpp gRPC worker surviving a non-graceful LocalAI death — was still uncovered. Extend the same best-effort backstop to both languages, reusing the exact mechanism and semantics: - capture getppid() at startup, skip if already orphaned (<=1) - a background thread polls getppid() and self-exits on reparenting (getppid() != orig || == 1), portable across Linux/macOS, no-op on Windows - same env vars: LOCALAI_BACKEND_PARENT_WATCH (default on; falsy false/0/no/off disable) and LOCALAI_BACKEND_PARENT_WATCH_INTERVAL (default 2s; accepts Go-style durations like 500ms/2s/1m) C++: implemented in backend/cpp/llama-cpp (the reported, most-used C++ backend) as a dependency-free header parent_watch.h, wired into grpc-server.cpp's main() and copied at build time via prepare.sh. C++ backends have no shared server scaffolding, so other C++ backends (ds4, ik-llama-cpp, privacy-filter, ...) are not yet covered and would each need the same one-line include+call as follow-ups. Python: implemented once in the shared common/parent_watch.py and armed from common/grpc_auth.py's get_auth_interceptors() — the single helper every one of the 35 Python backends invokes while building its gRPC server — so all Python backends (and future ones) are covered with no per-backend edits and no duplicated implementation. Tests (real process-tree reparent detection, mirroring the Go test): - backend/cpp/llama-cpp/parent_watch_test.cpp (via run-unit-tests.sh) - backend/python/common/parent_watch_test.py (python -m unittest) Co-Authored-By: Claude Sonnet 5 Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/CMakeLists.txt | 9 + backend/cpp/llama-cpp/grpc-server.cpp | 6 + backend/cpp/llama-cpp/parent_watch.h | 179 ++++++++++++++++++ backend/cpp/llama-cpp/parent_watch_test.cpp | 197 ++++++++++++++++++++ backend/cpp/llama-cpp/prepare.sh | 4 + backend/cpp/run-unit-tests.sh | 2 +- backend/python/common/grpc_auth.py | 9 + backend/python/common/parent_watch.py | 149 +++++++++++++++ backend/python/common/parent_watch_test.py | 150 +++++++++++++++ 9 files changed, 704 insertions(+), 1 deletion(-) create mode 100644 backend/cpp/llama-cpp/parent_watch.h create mode 100644 backend/cpp/llama-cpp/parent_watch_test.cpp create mode 100644 backend/python/common/parent_watch.py create mode 100644 backend/python/common/parent_watch_test.py diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt index 8b8d2e2d5..47852d400 100644 --- a/backend/cpp/llama-cpp/CMakeLists.txt +++ b/backend/cpp/llama-cpp/CMakeLists.txt @@ -101,4 +101,13 @@ if(LLAMA_GRPC_BUILD_TESTS) target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET}) target_compile_features(message_content_test PRIVATE cxx_std_17) add_test(NAME message_content_test COMMAND message_content_test) + + # Parent-death watcher test (parent_watch.h) — standard library only, but + # needs a threading runtime for std::thread. + find_package(Threads REQUIRED) + add_executable(parent_watch_test parent_watch_test.cpp parent_watch.h) + target_include_directories(parent_watch_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_link_libraries(parent_watch_test PRIVATE Threads::Threads) + target_compile_features(parent_watch_test PRIVATE cxx_std_17) + add_test(NAME parent_watch_test COMMAND parent_watch_test) endif() diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index a02d461f4..660c4367e 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -75,6 +75,8 @@ #include #endif +#include "parent_watch.h" // best-effort parent-death backstop (see header) + using grpc::Server; using grpc::ServerBuilder; @@ -3442,6 +3444,10 @@ int main(int argc, char** argv) { } } + // Best-effort backstop: self-terminate if the LocalAI process that spawned + // us dies without cleaning us up (see parent_watch.h). + llama_grpc::start_parent_death_watcher(); + server_context ctx_server; BackendServiceImpl service(ctx_server); diff --git a/backend/cpp/llama-cpp/parent_watch.h b/backend/cpp/llama-cpp/parent_watch.h new file mode 100644 index 000000000..b2eb34e74 --- /dev/null +++ b/backend/cpp/llama-cpp/parent_watch.h @@ -0,0 +1,179 @@ +// Parent-death watcher (best-effort backstop) for the llama.cpp gRPC backend. +// +// LocalAI spawns this backend as a child process and, on a clean shutdown, +// tears it down itself (SIGTERM -> grace -> SIGKILL). That graceful path only +// runs when LocalAI receives a catchable signal and lives long enough to run +// its handlers. If LocalAI is SIGKILLed (e.g. a supervising process's grace +// period elapses first), that teardown never runs and this backend would be +// reparented to init and linger, holding VRAM and its listen port. +// +// The watcher here is a best-effort backstop for exactly that case: it does +// NOT replace the graceful teardown, it only covers the "parent vanished +// without cleaning up" path. It detects reparenting: when the process that +// spawned this backend dies, the kernel reparents us to the nearest sub-reaper +// or to init (PID 1), so getppid() stops matching the value captured at +// startup. This getppid() approach is portable across Linux/macOS (unlike the +// Linux-only PR_SET_PDEATHSIG), which is why it is used here, mirroring the Go +// backends' pkg/grpc/parentwatch.go. It is disabled on Windows, which has no +// equivalent orphan-reparenting semantics. +// +// This header is intentionally dependency-free (C++ standard library only) so +// it can be exercised by a standalone unit test (parent_watch_test.cpp) without +// building the full llama.cpp + gRPC backend. +#ifndef LLAMA_GRPC_PARENT_WATCH_H +#define LLAMA_GRPC_PARENT_WATCH_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(_WIN32) +#include // getppid(2), _exit(2) +#endif + +namespace llama_grpc { + +// Env var names are shared verbatim with the Go and Python backends for +// consistency across languages. +inline const char *kEnvParentWatch() { return "LOCALAI_BACKEND_PARENT_WATCH"; } +inline const char *kEnvParentWatchInterval() { return "LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"; } + +// Default poll interval in milliseconds. Matches the Go side's 2 * time.Second. +inline long parent_watch_default_interval_ms() { return 2000; } + +namespace detail { +inline std::string trim_lower(const std::string &in, bool lower) { + size_t a = in.find_first_not_of(" \t\r\n"); + size_t b = in.find_last_not_of(" \t\r\n"); + if (a == std::string::npos) { + return ""; + } + std::string s = in.substr(a, b - a + 1); + if (lower) { + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + } + return s; +} +} // namespace detail + +// parent_watch_enabled reports whether the watcher should run. Enabled by +// default; a falsey value ("false"/"0"/"no"/"off", case-insensitive) disables +// it, matching the Go implementation's exact semantics. +inline bool parent_watch_enabled() { +#if defined(_WIN32) + return false; +#else + const char *v = std::getenv(kEnvParentWatch()); + if (v == nullptr || v[0] == '\0') { + return true; + } + const std::string s = detail::trim_lower(v, true); + return !(s == "false" || s == "0" || s == "no" || s == "off"); +#endif +} + +// parent_watch_interval_ms returns the poll interval in milliseconds. Accepts +// Go-style duration strings ("500ms", "2s", "1m") for cross-language parity, or +// a bare number interpreted as seconds. Defaults to +// parent_watch_default_interval_ms(). +inline long parent_watch_interval_ms() { + const long def = parent_watch_default_interval_ms(); + const char *v = std::getenv(kEnvParentWatchInterval()); + if (v == nullptr || v[0] == '\0') { + return def; + } + const std::string s = detail::trim_lower(v, false); + if (s.empty()) { + return def; + } + size_t i = 0; + while (i < s.size() && (std::isdigit((unsigned char)s[i]) || s[i] == '.')) { + i++; + } + if (i == 0) { + return def; + } + double num = 0.0; + try { + num = std::stod(s.substr(0, i)); + } catch (...) { + return def; + } + const std::string unit = s.substr(i); + long ms; + if (unit == "ms") { + ms = (long)num; + } else if (unit == "s" || unit.empty()) { + ms = (long)(num * 1000.0); + } else if (unit == "m") { + ms = (long)(num * 60000.0); + } else { + return def; // unrecognized unit + } + return ms > 0 ? ms : def; +} + +#if !defined(_WIN32) +// parent_died reports whether this process has been reparented away from the +// parent it had when the watcher started. Reparenting is the standard POSIX +// signal that the original parent (here, the LocalAI process that spawned this +// backend) has exited: the orphan is handed to the nearest sub-reaper or to +// init (PID 1), so getppid() no longer matches the value captured at startup. +inline bool parent_died(pid_t orig_ppid) { + const pid_t ppid = getppid(); + return ppid != orig_ppid || ppid == 1; +} + +// watch_parent_death polls until parent_died reports the original parent is +// gone, then invokes on_death. It blocks, so run it on its own thread. +inline void watch_parent_death(pid_t orig_ppid, long interval_ms, + const std::function &on_death) { + for (;;) { + std::this_thread::sleep_for(std::chrono::milliseconds(interval_ms)); + if (parent_died(orig_ppid)) { + on_death(); + return; + } + } +} +#endif + +// start_parent_death_watcher installs the best-effort safety net described in +// the file header on the calling backend process. It is a no-op when disabled, +// on Windows, or when the process is already orphaned at startup +// (getppid() <= 1). This is a backstop alongside — never a replacement for — +// LocalAI's graceful teardown. +inline void start_parent_death_watcher() { +#if !defined(_WIN32) + if (!parent_watch_enabled()) { + return; + } + const pid_t orig_ppid = getppid(); + // A parent of 1 (or less) at startup means we were already orphaned (or + // launched directly under init) — there is no original parent to watch for. + if (orig_ppid <= 1) { + return; + } + const long interval_ms = parent_watch_interval_ms(); + std::thread([orig_ppid, interval_ms]() { + watch_parent_death(orig_ppid, interval_ms, [orig_ppid]() { + fprintf(stderr, + "backend parent process (pid %d) exited without stopping " + "this backend; self-terminating to avoid orphaning\n", + (int)orig_ppid); + fflush(stderr); + _exit(1); + }); + }).detach(); +#endif +} + +} // namespace llama_grpc + +#endif // LLAMA_GRPC_PARENT_WATCH_H diff --git a/backend/cpp/llama-cpp/parent_watch_test.cpp b/backend/cpp/llama-cpp/parent_watch_test.cpp new file mode 100644 index 000000000..9a7fd8074 --- /dev/null +++ b/backend/cpp/llama-cpp/parent_watch_test.cpp @@ -0,0 +1,197 @@ +// Unit tests for the parent-death watcher (parent_watch.h). +// +// Build & run standalone (C++ standard library only, no nlohmann/json needed): +// g++ -std=c++17 -pthread parent_watch_test.cpp -o t && ./t +// +// The core test (TestDetectsReparent) builds a genuine two-level process tree +// (test -> middle -> grandchild), lets the middle process die, and asserts the +// grandchild's watch_parent_death detects the reparenting and self-terminates — +// mirroring the Go test in pkg/grpc/parentwatch_test.go, but with fork(2). +// +// On Windows this file compiles to a no-op success (the watcher is unsupported +// there), matching parent_watch.h's platform gating. + +#include +#include +#include + +#include "parent_watch.h" + +static int failures = 0; + +static void check(bool ok, const std::string &name) { + if (!ok) { + failures++; + fprintf(stderr, "FAIL: %s\n", name.c_str()); + } else { + fprintf(stderr, "ok: %s\n", name.c_str()); + } +} + +// Env-parsing tests are platform-independent and always run. +static void test_env_parsing() { + using namespace llama_grpc; + + // Interval: default when unset. + unsetenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"); + check(parent_watch_interval_ms() == 2000, "interval default 2000ms"); + + setenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL", "500ms", 1); + check(parent_watch_interval_ms() == 500, "interval 500ms"); + + setenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL", "2s", 1); + check(parent_watch_interval_ms() == 2000, "interval 2s"); + + setenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL", "1m", 1); + check(parent_watch_interval_ms() == 60000, "interval 1m"); + + setenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL", "3", 1); // bare number -> seconds + check(parent_watch_interval_ms() == 3000, "interval bare 3 -> 3000ms"); + + setenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL", "garbage", 1); + check(parent_watch_interval_ms() == 2000, "interval garbage -> default"); + unsetenv("LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"); + +#if !defined(_WIN32) + // Enabled semantics (POSIX only; always false on Windows). + unsetenv("LOCALAI_BACKEND_PARENT_WATCH"); + check(parent_watch_enabled(), "enabled by default"); + + for (const char *falsey : {"false", "0", "no", "off", "OFF", " False "}) { + setenv("LOCALAI_BACKEND_PARENT_WATCH", falsey, 1); + check(!parent_watch_enabled(), std::string("disabled by '") + falsey + "'"); + } + setenv("LOCALAI_BACKEND_PARENT_WATCH", "true", 1); + check(parent_watch_enabled(), "enabled by 'true'"); + setenv("LOCALAI_BACKEND_PARENT_WATCH", "1", 1); + check(parent_watch_enabled(), "enabled by '1'"); + unsetenv("LOCALAI_BACKEND_PARENT_WATCH"); +#endif +} + +#if !defined(_WIN32) + +#include +#include +#include +#include +#include + +static bool file_exists(const std::string &p) { + struct stat st; + return ::stat(p.c_str(), &st) == 0; +} + +static bool wait_for_file(const std::string &p, int timeout_ms) { + int waited = 0; + while (waited < timeout_ms) { + if (file_exists(p)) { + return true; + } + usleep(20 * 1000); + waited += 20; + } + return false; +} + +static void write_file(const std::string &p, const std::string &content) { + FILE *f = fopen(p.c_str(), "w"); + if (f) { + fwrite(content.data(), 1, content.size(), f); + fclose(f); + } +} + +// Builds test -> middle -> grandchild via fork(2). The grandchild arms the REAL +// watch_parent_death against middle; middle exits, orphaning the grandchild; +// the watcher must detect the reparenting and self-terminate. +static void test_detects_reparent() { + char tmpl[] = "/tmp/parentwatch_test_XXXXXX"; + char *dir = mkdtemp(tmpl); + if (dir == nullptr) { + check(false, "mkdtemp"); + return; + } + const std::string ready_file = std::string(dir) + "/ready"; + const std::string exited_file = std::string(dir) + "/exited"; + + pid_t middle = fork(); + if (middle < 0) { + check(false, "fork middle"); + return; + } + + if (middle == 0) { + // ---- middle process ---- + pid_t grandchild = fork(); + if (grandchild < 0) { + _exit(4); + } + if (grandchild == 0) { + // ---- grandchild process ---- + pid_t orig_ppid = getppid(); // == middle + std::thread([&]() { + llama_grpc::watch_parent_death(orig_ppid, 50 /*ms*/, [&]() { + write_file(exited_file, "1"); + _exit(7); + }); + }).detach(); + + // Safety valve: never linger if something goes wrong. + std::thread([]() { + usleep(30 * 1000 * 1000); + _exit(2); + }).detach(); + + // Signal readiness only after the watcher captured orig_ppid. + write_file(ready_file, std::to_string(getpid())); + for (;;) { + pause(); + } + } + // middle: wait until grandchild is ready, then exit to orphan it. + if (!wait_for_file(ready_file, 10000)) { + _exit(5); + } + _exit(0); + } + + // ---- test (top) process ---- + int status = 0; + waitpid(middle, &status, 0); // reap middle only; grandchild is orphaned + + check(file_exists(ready_file), "grandchild signaled readiness"); + + bool detected = wait_for_file(exited_file, 10000); + check(detected, "watcher detected parent death and self-terminated"); + + // Best-effort cleanup: kill the grandchild if it somehow survived. + if (file_exists(ready_file)) { + FILE *f = fopen(ready_file.c_str(), "r"); + if (f) { + int pid = 0; + if (fscanf(f, "%d", &pid) == 1 && pid > 1) { + kill(pid, SIGKILL); + } + fclose(f); + } + } + unlink(ready_file.c_str()); + unlink(exited_file.c_str()); + rmdir(dir); +} + +#endif // !_WIN32 + +int main() { + test_env_parsing(); +#if !defined(_WIN32) + test_detects_reparent(); +#endif + if (failures == 0) { + fprintf(stderr, "\nAll parent_watch tests passed.\n"); + return 0; + } + fprintf(stderr, "\n%d parent_watch test(s) failed.\n", failures); + return 1; +} diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh index 4da45ea9d..27c664e8c 100644 --- a/backend/cpp/llama-cpp/prepare.sh +++ b/backend/cpp/llama-cpp/prepare.sh @@ -22,6 +22,10 @@ cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ # unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON). cp -r message_content.h llama.cpp/tools/grpc-server/ cp -r message_content_test.cpp llama.cpp/tools/grpc-server/ +# Parent-death watcher (included by grpc-server.cpp) and its standalone unit +# test (run via backend/cpp/run-unit-tests.sh; also buildable under ctest). +cp -r parent_watch.h llama.cpp/tools/grpc-server/ +cp -r parent_watch_test.cpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/ diff --git a/backend/cpp/run-unit-tests.sh b/backend/cpp/run-unit-tests.sh index 3f63faa40..603d13a91 100755 --- a/backend/cpp/run-unit-tests.sh +++ b/backend/cpp/run-unit-tests.sh @@ -54,7 +54,7 @@ for test_src in "${tests[@]}"; do name="$(basename "$test_src" .cpp)" bin="$(mktemp -d)/$name" echo "==> $test_src" - if ! "$CXX" -std=c++17 -Wall -Wextra \ + if ! "$CXX" -std=c++17 -Wall -Wextra -pthread \ -I"$JSON_INC" -I"$(dirname "$test_src")" \ "$test_src" -o "$bin"; then echo "COMPILE FAILED: $test_src" >&2 diff --git a/backend/python/common/grpc_auth.py b/backend/python/common/grpc_auth.py index eda138ab4..9ed866abb 100644 --- a/backend/python/common/grpc_auth.py +++ b/backend/python/common/grpc_auth.py @@ -11,6 +11,8 @@ import os import grpc +from parent_watch import start_parent_death_watcher + class _AbortHandler(grpc.RpcMethodHandler): """A method handler that immediately aborts with UNAUTHENTICATED.""" @@ -70,6 +72,13 @@ def get_auth_interceptors(*, aio: bool = False): Returns an empty list when LOCALAI_GRPC_AUTH_TOKEN is not set. """ + # Arm the best-effort parent-death backstop here: this is the single helper + # every LocalAI Python backend invokes exactly once while building its gRPC + # server (mirroring how the Go watcher arms in pkg/grpc's shared serve path). + # start_parent_death_watcher() is idempotent and a no-op when disabled or on + # unsupported platforms — see parent_watch.py. + start_parent_death_watcher() + token = os.environ.get("LOCALAI_GRPC_AUTH_TOKEN", "") if not token: return [] diff --git a/backend/python/common/parent_watch.py b/backend/python/common/parent_watch.py new file mode 100644 index 000000000..c2f7f6a7a --- /dev/null +++ b/backend/python/common/parent_watch.py @@ -0,0 +1,149 @@ +"""Parent-death watcher (best-effort backstop) for LocalAI Python backends. + +LocalAI spawns each backend as a child process and, on a clean shutdown, tears +it down itself (SIGTERM -> grace -> SIGKILL). That graceful path only runs when +LocalAI receives a catchable signal and lives long enough to run its handlers. +If LocalAI is SIGKILLed (e.g. a supervising process's grace period elapses +first), that teardown never runs and this backend would be reparented to init +and linger, holding GPU/VRAM and its listen port. + +The watcher here is a best-effort backstop for exactly that case: it does NOT +replace the graceful teardown, it only covers the "parent vanished without +cleaning up" path. It detects reparenting: when the process that spawned this +backend dies, the kernel reparents us to the nearest sub-reaper or to init +(PID 1), so os.getppid() stops matching the value captured at startup. This +getppid() approach is portable across Linux/macOS (unlike the Linux-only +PR_SET_PDEATHSIG), which is why it is used here, mirroring the Go backends' +pkg/grpc/parentwatch.go and the C++ backends' parent_watch.h. It is disabled on +Windows, which has no equivalent orphan-reparenting semantics. + +Env vars (shared verbatim across the Go, C++ and Python backends): + LOCALAI_BACKEND_PARENT_WATCH enabled by default; a falsey value + ("false"/"0"/"no"/"off", case-insensitive) + disables it. + LOCALAI_BACKEND_PARENT_WATCH_INTERVAL poll interval as a Go-style duration + string ("500ms", "2s", "1m") or a bare + number of seconds. Defaults to 2s. +""" + +import os +import sys +import threading + +ENV_PARENT_WATCH = "LOCALAI_BACKEND_PARENT_WATCH" +ENV_PARENT_WATCH_INTERVAL = "LOCALAI_BACKEND_PARENT_WATCH_INTERVAL" + +_DEFAULT_INTERVAL_SECONDS = 2.0 + +# Guard so repeated calls (e.g. get_auth_interceptors invoked more than once) +# only ever arm a single watcher thread per process. +_started = False +_started_lock = threading.Lock() + + +def _enabled(): + """Report whether the watcher should run in this process.""" + # Windows does not reparent orphans to a well-known init PID, so the + # getppid() heuristic used here doesn't apply there. + if os.name == "nt" or sys.platform.startswith("win"): + return False + val = os.environ.get(ENV_PARENT_WATCH, "").strip().lower() + if val in ("false", "0", "no", "off"): + return False + return True + + +def _interval_seconds(): + """Return the configured poll interval in seconds, or the default. + + Accepts Go-style duration strings ("500ms", "2s", "1m") for cross-language + parity, or a bare number interpreted as seconds. + """ + raw = os.environ.get(ENV_PARENT_WATCH_INTERVAL, "").strip() + if not raw: + return _DEFAULT_INTERVAL_SECONDS + # Split numeric prefix from unit suffix. + i = 0 + while i < len(raw) and (raw[i].isdigit() or raw[i] == "." or (i == 0 and raw[i] in "+-")): + i += 1 + if i == 0: + return _DEFAULT_INTERVAL_SECONDS + try: + num = float(raw[:i]) + except ValueError: + return _DEFAULT_INTERVAL_SECONDS + unit = raw[i:].lower() + if unit == "ms": + seconds = num / 1000.0 + elif unit in ("s", ""): + seconds = num + elif unit == "m": + seconds = num * 60.0 + else: + return _DEFAULT_INTERVAL_SECONDS + return seconds if seconds > 0 else _DEFAULT_INTERVAL_SECONDS + + +def _parent_died(orig_ppid): + """Report whether this process has been reparented away from orig_ppid. + + Reparenting is the standard POSIX signal that the original parent (here, the + LocalAI process that spawned this backend) has exited: the orphan is handed + to the nearest sub-reaper or to init (PID 1), so os.getppid() no longer + matches the value captured at startup. + """ + ppid = os.getppid() + return ppid != orig_ppid or ppid == 1 + + +def _watch(orig_ppid, interval, on_death): + """Poll until _parent_died reports the original parent is gone, then call + on_death. Blocks, so run it on its own (daemon) thread.""" + import time + + while True: + time.sleep(interval) + if _parent_died(orig_ppid): + on_death() + return + + +def start_parent_death_watcher(): + """Install the best-effort safety net described in this module's docstring. + + No-op when disabled, on Windows, when already orphaned at startup + (os.getppid() <= 1), or if already started. This is a backstop alongside — + never a replacement for — LocalAI's graceful teardown. + """ + global _started + if not _enabled(): + return + with _started_lock: + if _started: + return + orig_ppid = os.getppid() + # A parent of 1 (or less) at startup means we were already orphaned (or + # launched directly under init) — there is no original parent to watch. + if orig_ppid <= 1: + return + interval = _interval_seconds() + + def on_death(): + print( + "backend parent process (pid {}) exited without stopping this " + "backend; self-terminating to avoid orphaning".format(orig_ppid), + file=sys.stderr, + flush=True, + ) + # Immediate, non-cleanup exit: this is a shutdown safety net and the + # normal graceful path is already gone. + os._exit(1) + + thread = threading.Thread( + target=_watch, + args=(orig_ppid, interval, on_death), + name="parent-death-watcher", + daemon=True, + ) + thread.start() + _started = True diff --git a/backend/python/common/parent_watch_test.py b/backend/python/common/parent_watch_test.py new file mode 100644 index 000000000..da37eb3ec --- /dev/null +++ b/backend/python/common/parent_watch_test.py @@ -0,0 +1,150 @@ +"""Unit tests for the parent-death watcher (parent_watch.py). + +Run standalone (Python standard library only, no backend venv needed): + python3 -m unittest parent_watch_test + +The core test (test_detects_reparent) builds a genuine two-level process tree +(test -> middle -> grandchild) with os.fork, lets the middle process die, and +asserts the grandchild's parent_watch._watch detects the reparenting and +self-terminates — mirroring the Go test in pkg/grpc/parentwatch_test.go and the +C++ test in backend/cpp/llama-cpp/parent_watch_test.cpp. +""" + +import os +import sys +import tempfile +import threading +import time +import unittest + +import parent_watch + + +class TestParentWatchEnvParsing(unittest.TestCase): + def setUp(self): + self._saved = { + k: os.environ.get(k) + for k in (parent_watch.ENV_PARENT_WATCH, parent_watch.ENV_PARENT_WATCH_INTERVAL) + } + for k in self._saved: + os.environ.pop(k, None) + + def tearDown(self): + for k, v in self._saved.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + + def test_interval_default(self): + self.assertEqual(parent_watch._interval_seconds(), 2.0) + + def test_interval_units(self): + cases = {"500ms": 0.5, "2s": 2.0, "1m": 60.0, "3": 3.0, "0.5s": 0.5} + for raw, expected in cases.items(): + os.environ[parent_watch.ENV_PARENT_WATCH_INTERVAL] = raw + self.assertAlmostEqual(parent_watch._interval_seconds(), expected, msg=raw) + + def test_interval_garbage_falls_back(self): + os.environ[parent_watch.ENV_PARENT_WATCH_INTERVAL] = "garbage" + self.assertEqual(parent_watch._interval_seconds(), 2.0) + + @unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only") + def test_enabled_default(self): + self.assertTrue(parent_watch._enabled()) + + @unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only") + def test_disabled_by_falsey(self): + for val in ("false", "0", "no", "off", "OFF", " False "): + os.environ[parent_watch.ENV_PARENT_WATCH] = val + self.assertFalse(parent_watch._enabled(), msg=val) + + @unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "POSIX only") + def test_enabled_by_truthy(self): + for val in ("true", "1", "yes", "on"): + os.environ[parent_watch.ENV_PARENT_WATCH] = val + self.assertTrue(parent_watch._enabled(), msg=val) + + +@unittest.skipIf(os.name == "nt" or sys.platform.startswith("win"), "fork/reparent is POSIX only") +class TestParentWatchReparent(unittest.TestCase): + def _wait_for_file(self, path, timeout=10.0): + deadline = time.time() + timeout + while time.time() < deadline: + if os.path.exists(path): + return True + time.sleep(0.02) + return False + + def test_detects_reparent(self): + tmpdir = tempfile.mkdtemp(prefix="parentwatch_test_") + ready_file = os.path.join(tmpdir, "ready") + exited_file = os.path.join(tmpdir, "exited") + + middle = os.fork() + if middle == 0: + # ---- middle process ---- + grandchild = os.fork() + if grandchild == 0: + # ---- grandchild process: arm the REAL watcher against middle ---- + orig_ppid = os.getppid() + + def on_death(): + with open(exited_file, "w") as f: + f.write("1") + os._exit(7) + + threading.Thread( + target=parent_watch._watch, + args=(orig_ppid, 0.05, on_death), + daemon=True, + ).start() + + # Safety valve: never linger if something goes wrong. + def bail(): + time.sleep(30) + os._exit(2) + + threading.Thread(target=bail, daemon=True).start() + + # Signal readiness only after the watcher captured orig_ppid. + with open(ready_file, "w") as f: + f.write(str(os.getpid())) + while True: + time.sleep(1) + else: + # middle: wait until grandchild is ready, then exit to orphan it. + if not self._wait_for_file(ready_file): + os._exit(5) + os._exit(0) + + # ---- test (top) process ---- + os.waitpid(middle, 0) # reap middle only; grandchild is orphaned + + self.assertTrue(os.path.exists(ready_file), "grandchild never signaled readiness") + self.assertTrue( + self._wait_for_file(exited_file), + "watcher did not detect parent death within timeout", + ) + + # Best-effort cleanup: kill the grandchild if it somehow survived. + try: + with open(ready_file) as f: + pid = int(f.read().strip()) + if pid > 1: + os.kill(pid, 9) + except (OSError, ValueError): + pass + for p in (ready_file, exited_file): + try: + os.remove(p) + except OSError: + pass + try: + os.rmdir(tmpdir) + except OSError: + pass + + +if __name__ == "__main__": + unittest.main()