mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-02 04:16:56 -04:00
The Go parent-death watcher (pkg/grpc/parentwatch.go, commit 772b435d5)
only protects backends that route through pkg/grpc. C++ and Python
backends don't, so the originally-reported case — the llama.cpp gRPC
worker surviving a non-graceful LocalAI death — was still uncovered.
Extend the same best-effort backstop to both languages, reusing the
exact mechanism and semantics:
- capture getppid() at startup, skip if already orphaned (<=1)
- a background thread polls getppid() and self-exits on reparenting
(getppid() != orig || == 1), portable across Linux/macOS, no-op on
Windows
- same env vars: LOCALAI_BACKEND_PARENT_WATCH (default on; falsy
false/0/no/off disable) and LOCALAI_BACKEND_PARENT_WATCH_INTERVAL
(default 2s; accepts Go-style durations like 500ms/2s/1m)
C++: implemented in backend/cpp/llama-cpp (the reported, most-used C++
backend) as a dependency-free header parent_watch.h, wired into
grpc-server.cpp's main() and copied at build time via prepare.sh. C++
backends have no shared server scaffolding, so other C++ backends
(ds4, ik-llama-cpp, privacy-filter, ...) are not yet covered and would
each need the same one-line include+call as follow-ups.
Python: implemented once in the shared common/parent_watch.py and armed
from common/grpc_auth.py's get_auth_interceptors() — the single helper
every one of the 35 Python backends invokes while building its gRPC
server — so all Python backends (and future ones) are covered with no
per-backend edits and no duplicated implementation.
Tests (real process-tree reparent detection, mirroring the Go test):
- backend/cpp/llama-cpp/parent_watch_test.cpp (via run-unit-tests.sh)
- backend/python/common/parent_watch_test.py (python -m unittest)
Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
180 lines
6.5 KiB
C++
180 lines
6.5 KiB
C++
// Parent-death watcher (best-effort backstop) for the llama.cpp gRPC backend.
|
|
//
|
|
// LocalAI spawns this backend as a child process and, on a clean shutdown,
|
|
// tears it down itself (SIGTERM -> grace -> SIGKILL). That graceful path only
|
|
// runs when LocalAI receives a catchable signal and lives long enough to run
|
|
// its handlers. If LocalAI is SIGKILLed (e.g. a supervising process's grace
|
|
// period elapses first), that teardown never runs and this backend would be
|
|
// reparented to init and linger, holding VRAM and its listen port.
|
|
//
|
|
// The watcher here is a best-effort backstop for exactly that case: it does
|
|
// NOT replace the graceful teardown, it only covers the "parent vanished
|
|
// without cleaning up" path. It detects reparenting: when the process that
|
|
// spawned this backend dies, the kernel reparents us to the nearest sub-reaper
|
|
// or to init (PID 1), so getppid() stops matching the value captured at
|
|
// startup. This getppid() approach is portable across Linux/macOS (unlike the
|
|
// Linux-only PR_SET_PDEATHSIG), which is why it is used here, mirroring the Go
|
|
// backends' pkg/grpc/parentwatch.go. It is disabled on Windows, which has no
|
|
// equivalent orphan-reparenting semantics.
|
|
//
|
|
// This header is intentionally dependency-free (C++ standard library only) so
|
|
// it can be exercised by a standalone unit test (parent_watch_test.cpp) without
|
|
// building the full llama.cpp + gRPC backend.
|
|
#ifndef LLAMA_GRPC_PARENT_WATCH_H
|
|
#define LLAMA_GRPC_PARENT_WATCH_H
|
|
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <chrono>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <functional>
|
|
#include <string>
|
|
#include <thread>
|
|
|
|
#if !defined(_WIN32)
|
|
#include <unistd.h> // getppid(2), _exit(2)
|
|
#endif
|
|
|
|
namespace llama_grpc {
|
|
|
|
// Env var names are shared verbatim with the Go and Python backends for
|
|
// consistency across languages.
|
|
inline const char *kEnvParentWatch() { return "LOCALAI_BACKEND_PARENT_WATCH"; }
|
|
inline const char *kEnvParentWatchInterval() { return "LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"; }
|
|
|
|
// Default poll interval in milliseconds. Matches the Go side's 2 * time.Second.
|
|
inline long parent_watch_default_interval_ms() { return 2000; }
|
|
|
|
namespace detail {
|
|
inline std::string trim_lower(const std::string &in, bool lower) {
|
|
size_t a = in.find_first_not_of(" \t\r\n");
|
|
size_t b = in.find_last_not_of(" \t\r\n");
|
|
if (a == std::string::npos) {
|
|
return "";
|
|
}
|
|
std::string s = in.substr(a, b - a + 1);
|
|
if (lower) {
|
|
std::transform(s.begin(), s.end(), s.begin(),
|
|
[](unsigned char c) { return std::tolower(c); });
|
|
}
|
|
return s;
|
|
}
|
|
} // namespace detail
|
|
|
|
// parent_watch_enabled reports whether the watcher should run. Enabled by
|
|
// default; a falsey value ("false"/"0"/"no"/"off", case-insensitive) disables
|
|
// it, matching the Go implementation's exact semantics.
|
|
inline bool parent_watch_enabled() {
|
|
#if defined(_WIN32)
|
|
return false;
|
|
#else
|
|
const char *v = std::getenv(kEnvParentWatch());
|
|
if (v == nullptr || v[0] == '\0') {
|
|
return true;
|
|
}
|
|
const std::string s = detail::trim_lower(v, true);
|
|
return !(s == "false" || s == "0" || s == "no" || s == "off");
|
|
#endif
|
|
}
|
|
|
|
// parent_watch_interval_ms returns the poll interval in milliseconds. Accepts
|
|
// Go-style duration strings ("500ms", "2s", "1m") for cross-language parity, or
|
|
// a bare number interpreted as seconds. Defaults to
|
|
// parent_watch_default_interval_ms().
|
|
inline long parent_watch_interval_ms() {
|
|
const long def = parent_watch_default_interval_ms();
|
|
const char *v = std::getenv(kEnvParentWatchInterval());
|
|
if (v == nullptr || v[0] == '\0') {
|
|
return def;
|
|
}
|
|
const std::string s = detail::trim_lower(v, false);
|
|
if (s.empty()) {
|
|
return def;
|
|
}
|
|
size_t i = 0;
|
|
while (i < s.size() && (std::isdigit((unsigned char)s[i]) || s[i] == '.')) {
|
|
i++;
|
|
}
|
|
if (i == 0) {
|
|
return def;
|
|
}
|
|
double num = 0.0;
|
|
try {
|
|
num = std::stod(s.substr(0, i));
|
|
} catch (...) {
|
|
return def;
|
|
}
|
|
const std::string unit = s.substr(i);
|
|
long ms;
|
|
if (unit == "ms") {
|
|
ms = (long)num;
|
|
} else if (unit == "s" || unit.empty()) {
|
|
ms = (long)(num * 1000.0);
|
|
} else if (unit == "m") {
|
|
ms = (long)(num * 60000.0);
|
|
} else {
|
|
return def; // unrecognized unit
|
|
}
|
|
return ms > 0 ? ms : def;
|
|
}
|
|
|
|
#if !defined(_WIN32)
|
|
// parent_died reports whether this process has been reparented away from the
|
|
// parent it had when the watcher started. Reparenting is the standard POSIX
|
|
// signal that the original parent (here, the LocalAI process that spawned this
|
|
// backend) has exited: the orphan is handed to the nearest sub-reaper or to
|
|
// init (PID 1), so getppid() no longer matches the value captured at startup.
|
|
inline bool parent_died(pid_t orig_ppid) {
|
|
const pid_t ppid = getppid();
|
|
return ppid != orig_ppid || ppid == 1;
|
|
}
|
|
|
|
// watch_parent_death polls until parent_died reports the original parent is
|
|
// gone, then invokes on_death. It blocks, so run it on its own thread.
|
|
inline void watch_parent_death(pid_t orig_ppid, long interval_ms,
|
|
const std::function<void()> &on_death) {
|
|
for (;;) {
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(interval_ms));
|
|
if (parent_died(orig_ppid)) {
|
|
on_death();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// start_parent_death_watcher installs the best-effort safety net described in
|
|
// the file header on the calling backend process. It is a no-op when disabled,
|
|
// on Windows, or when the process is already orphaned at startup
|
|
// (getppid() <= 1). This is a backstop alongside — never a replacement for —
|
|
// LocalAI's graceful teardown.
|
|
inline void start_parent_death_watcher() {
|
|
#if !defined(_WIN32)
|
|
if (!parent_watch_enabled()) {
|
|
return;
|
|
}
|
|
const pid_t orig_ppid = getppid();
|
|
// A parent of 1 (or less) at startup means we were already orphaned (or
|
|
// launched directly under init) — there is no original parent to watch for.
|
|
if (orig_ppid <= 1) {
|
|
return;
|
|
}
|
|
const long interval_ms = parent_watch_interval_ms();
|
|
std::thread([orig_ppid, interval_ms]() {
|
|
watch_parent_death(orig_ppid, interval_ms, [orig_ppid]() {
|
|
fprintf(stderr,
|
|
"backend parent process (pid %d) exited without stopping "
|
|
"this backend; self-terminating to avoid orphaning\n",
|
|
(int)orig_ppid);
|
|
fflush(stderr);
|
|
_exit(1);
|
|
});
|
|
}).detach();
|
|
#endif
|
|
}
|
|
|
|
} // namespace llama_grpc
|
|
|
|
#endif // LLAMA_GRPC_PARENT_WATCH_H
|