mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-02 04:16:56 -04:00
The Go parent-death watcher (pkg/grpc/parentwatch.go, commit 772b435d5)
only protects backends that route through pkg/grpc. C++ and Python
backends don't, so the originally-reported case — the llama.cpp gRPC
worker surviving a non-graceful LocalAI death — was still uncovered.
Extend the same best-effort backstop to both languages, reusing the
exact mechanism and semantics:
- capture getppid() at startup, skip if already orphaned (<=1)
- a background thread polls getppid() and self-exits on reparenting
(getppid() != orig || == 1), portable across Linux/macOS, no-op on
Windows
- same env vars: LOCALAI_BACKEND_PARENT_WATCH (default on; falsy
false/0/no/off disable) and LOCALAI_BACKEND_PARENT_WATCH_INTERVAL
(default 2s; accepts Go-style durations like 500ms/2s/1m)
C++: implemented in backend/cpp/llama-cpp (the reported, most-used C++
backend) as a dependency-free header parent_watch.h, wired into
grpc-server.cpp's main() and copied at build time via prepare.sh. C++
backends have no shared server scaffolding, so other C++ backends
(ds4, ik-llama-cpp, privacy-filter, ...) are not yet covered and would
each need the same one-line include+call as follow-ups.
Python: implemented once in the shared common/parent_watch.py and armed
from common/grpc_auth.py's get_auth_interceptors() — the single helper
every one of the 35 Python backends invokes while building its gRPC
server — so all Python backends (and future ones) are covered with no
per-backend edits and no duplicated implementation.
Tests (real process-tree reparent detection, mirroring the Go test):
- backend/cpp/llama-cpp/parent_watch_test.cpp (via run-unit-tests.sh)
- backend/python/common/parent_watch_test.py (python -m unittest)
Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
88 lines
3.1 KiB
Python
88 lines
3.1 KiB
Python
"""Shared gRPC bearer token authentication interceptor for LocalAI Python backends.
|
|
|
|
When the environment variable LOCALAI_GRPC_AUTH_TOKEN is set, requests without
|
|
a valid Bearer token in the 'authorization' metadata header are rejected with
|
|
UNAUTHENTICATED. When the variable is empty or unset, no authentication is
|
|
performed (backward compatible).
|
|
"""
|
|
|
|
import hmac
|
|
import os
|
|
|
|
import grpc
|
|
|
|
from parent_watch import start_parent_death_watcher
|
|
|
|
|
|
class _AbortHandler(grpc.RpcMethodHandler):
|
|
"""A method handler that immediately aborts with UNAUTHENTICATED."""
|
|
|
|
def __init__(self):
|
|
self.request_streaming = False
|
|
self.response_streaming = False
|
|
self.request_deserializer = None
|
|
self.response_serializer = None
|
|
self.unary_unary = self._abort
|
|
self.unary_stream = None
|
|
self.stream_unary = None
|
|
self.stream_stream = None
|
|
|
|
@staticmethod
|
|
def _abort(request, context):
|
|
context.abort(grpc.StatusCode.UNAUTHENTICATED, "invalid token")
|
|
|
|
|
|
class TokenAuthInterceptor(grpc.ServerInterceptor):
|
|
"""Sync gRPC server interceptor that validates a bearer token."""
|
|
|
|
def __init__(self, token: str):
|
|
self._token = token
|
|
self._abort_handler = _AbortHandler()
|
|
|
|
def intercept_service(self, continuation, handler_call_details):
|
|
metadata = dict(handler_call_details.invocation_metadata)
|
|
auth = metadata.get("authorization", "")
|
|
expected = "Bearer " + self._token
|
|
if not hmac.compare_digest(auth, expected):
|
|
return self._abort_handler
|
|
return continuation(handler_call_details)
|
|
|
|
|
|
class AsyncTokenAuthInterceptor(grpc.aio.ServerInterceptor):
|
|
"""Async gRPC server interceptor that validates a bearer token."""
|
|
|
|
def __init__(self, token: str):
|
|
self._token = token
|
|
|
|
async def intercept_service(self, continuation, handler_call_details):
|
|
metadata = dict(handler_call_details.invocation_metadata)
|
|
auth = metadata.get("authorization", "")
|
|
expected = "Bearer " + self._token
|
|
if not hmac.compare_digest(auth, expected):
|
|
return _AbortHandler()
|
|
return await continuation(handler_call_details)
|
|
|
|
|
|
def get_auth_interceptors(*, aio: bool = False):
|
|
"""Return a list of gRPC interceptors for bearer token auth.
|
|
|
|
Args:
|
|
aio: If True, return async-compatible interceptors for grpc.aio.server().
|
|
If False (default), return sync interceptors for grpc.server().
|
|
|
|
Returns an empty list when LOCALAI_GRPC_AUTH_TOKEN is not set.
|
|
"""
|
|
# Arm the best-effort parent-death backstop here: this is the single helper
|
|
# every LocalAI Python backend invokes exactly once while building its gRPC
|
|
# server (mirroring how the Go watcher arms in pkg/grpc's shared serve path).
|
|
# start_parent_death_watcher() is idempotent and a no-op when disabled or on
|
|
# unsupported platforms — see parent_watch.py.
|
|
start_parent_death_watcher()
|
|
|
|
token = os.environ.get("LOCALAI_GRPC_AUTH_TOKEN", "")
|
|
if not token:
|
|
return []
|
|
if aio:
|
|
return [AsyncTokenAuthInterceptor(token)]
|
|
return [TokenAuthInterceptor(token)]
|