mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-01 20:07:18 -04:00
Compare commits
9 Commits
fix/distri
...
fix/backen
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
772b435d52 | ||
|
|
703ea32de6 | ||
|
|
751db06e35 | ||
|
|
f46c0e9c83 | ||
|
|
0d8adfc59a | ||
|
|
43f2615e19 | ||
|
|
875c539ad5 | ||
|
|
d641ded194 | ||
|
|
40445fff05 |
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=f74a6fb87b315b2c3154166e075360e15021a61d
|
||||
IK_LLAMA_VERSION?=29431b31c89e79c10f8736e8f2742485ba1713d6
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=6f4f53f2b7da54fcdbbecaaa734337c337ad6176
|
||||
LLAMA_VERSION?=0eca4d490e591d4e93058d07540cf47278a72577
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=3b93758f9725d400eca82976f895e4cec3f31260
|
||||
CRISPASR_VERSION?=8fd9db8fec8cb5e929d23d3267ed5817794feb1a
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=3b6c9ca97cfcda8e68e719e6670d06379fcbe943
|
||||
STABLEDIFFUSION_GGML_VERSION?=484baa41e5e006c52dcd4addc38c830b9489745f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -798,6 +798,7 @@ void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed) {
|
||||
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char* ref_images[], int ref_images_count) {
|
||||
|
||||
sd_image_t* results;
|
||||
int num_results_out = 0;
|
||||
|
||||
std::vector<int> skip_layers = {7, 8, 9};
|
||||
|
||||
@@ -994,10 +995,14 @@ int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, cha
|
||||
sd_ctx_params_to_str(&ctx_params),
|
||||
sd_img_gen_params_to_str(p));
|
||||
|
||||
results = generate_image(sd_c, p);
|
||||
bool gen_ok = generate_image(sd_c, p, &results, &num_results_out);
|
||||
|
||||
std::free(p);
|
||||
|
||||
if (!gen_ok || num_results_out == 0) {
|
||||
results = NULL;
|
||||
}
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
if (input_image_buffer) free(input_image_buffer);
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=0ae02cdb2c7317b50991367c165736ce42ed96ac
|
||||
WHISPER_CPP_VERSION?=0874de3e8e8e48361dba85c7fe6d176f008bf158
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -104,7 +104,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
|
||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260628073537"
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260630095652"
|
||||
|
||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
# on a cu130 host. Pull the cu130-flavoured wheel from vLLM's per-tag index
|
||||
# instead — the cublas13 case in install.sh adds --index-strategy=unsafe-best-match
|
||||
# so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://wheels.vllm.ai/0.23.0/cu130
|
||||
--extra-index-url https://wheels.vllm.ai/0.24.0/cu130
|
||||
# VERSION COUPLING: darwin/Apple-Silicon builds use vllm-metal (see install.sh),
|
||||
# which pins this exact vLLM version. Bumping vllm here means coordinating with a
|
||||
# vllm-metal release that supports the new version, or macOS/Metal builds break.
|
||||
vllm==0.23.0
|
||||
vllm==0.24.0
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.5.5"
|
||||
"version": "v4.5.6"
|
||||
}
|
||||
|
||||
105
pkg/grpc/parentwatch.go
Normal file
105
pkg/grpc/parentwatch.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package grpc
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Backend worker processes (the per-model gRPC servers LocalAI spawns) are
|
||||
// deliberately placed in their own process group by the process manager so
|
||||
// LocalAI's graceful shutdown can signal the whole group. That graceful path
|
||||
// (SIGTERM -> grace -> SIGKILL, driven by pkg/signals + pkg/model) only runs
|
||||
// when LocalAI itself receives a catchable signal and lives long enough to run
|
||||
// its handlers. If LocalAI is SIGKILLed (e.g. a supervising process's
|
||||
// graceful-shutdown grace period elapses first), that teardown never runs and
|
||||
// this backend would be reparented to init and linger, holding VRAM and its
|
||||
// listen port.
|
||||
//
|
||||
// The watcher below is a best-effort backstop for exactly that case: it does
|
||||
// NOT replace the graceful teardown, it only covers the "parent vanished
|
||||
// without cleaning up" path. It works by detecting reparenting: when the
|
||||
// process that spawned this backend dies, the kernel reparents us to the
|
||||
// nearest sub-reaper or to init (PID 1), so getppid() stops matching the value
|
||||
// we captured at startup. This getppid() approach is portable across
|
||||
// Linux/macOS (unlike Linux-only PR_SET_PDEATHSIG), which is why it's used
|
||||
// here rather than a kernel parent-death signal.
|
||||
const (
|
||||
// EnvBackendParentWatch toggles the parent-death watcher. It is enabled by
|
||||
// default; set it to a falsey value ("false", "0", "no", "off") to disable
|
||||
// (e.g. when running a backend standalone for debugging under a shell whose
|
||||
// lifetime shouldn't govern the backend).
|
||||
EnvBackendParentWatch = "LOCALAI_BACKEND_PARENT_WATCH"
|
||||
// EnvBackendParentWatchInterval overrides the poll interval as a Go
|
||||
// duration string (e.g. "500ms"). Defaults to defaultParentWatchInterval.
|
||||
EnvBackendParentWatchInterval = "LOCALAI_BACKEND_PARENT_WATCH_INTERVAL"
|
||||
|
||||
defaultParentWatchInterval = 2 * time.Second
|
||||
)
|
||||
|
||||
// parentWatchEnabled reports whether the watcher should run in this process.
|
||||
func parentWatchEnabled() bool {
|
||||
switch strings.ToLower(strings.TrimSpace(os.Getenv(EnvBackendParentWatch))) {
|
||||
case "false", "0", "no", "off":
|
||||
return false
|
||||
}
|
||||
// Windows does not reparent orphans to a well-known init PID, so the
|
||||
// getppid() heuristic used here doesn't apply there.
|
||||
return runtime.GOOS != "windows"
|
||||
}
|
||||
|
||||
// parentWatchInterval returns the configured poll interval, or the default.
|
||||
func parentWatchInterval() time.Duration {
|
||||
if v := os.Getenv(EnvBackendParentWatchInterval); v != "" {
|
||||
if d, err := time.ParseDuration(v); err == nil && d > 0 {
|
||||
return d
|
||||
}
|
||||
}
|
||||
return defaultParentWatchInterval
|
||||
}
|
||||
|
||||
// parentDied reports whether this process has been reparented away from the
|
||||
// parent it had when the watcher started. Reparenting is the standard POSIX
|
||||
// signal that the original parent (here, the LocalAI process that spawned this
|
||||
// backend) has exited: the orphan is handed to the nearest sub-reaper or to
|
||||
// init (PID 1), so getppid() no longer matches the value captured at startup.
|
||||
func parentDied(origPPID int) bool {
|
||||
ppid := os.Getppid()
|
||||
return ppid != origPPID || ppid == 1
|
||||
}
|
||||
|
||||
// watchParentDeath polls until parentDied reports the original parent is gone,
|
||||
// then invokes onDeath. It blocks, so run it in its own goroutine.
|
||||
func watchParentDeath(origPPID int, interval time.Duration, onDeath func()) {
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
if parentDied(origPPID) {
|
||||
onDeath()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startParentDeathWatcher installs the best-effort safety net described above
|
||||
// on the calling backend process. It is a no-op when disabled or on platforms
|
||||
// where the mechanism doesn't apply. This is a backstop alongside — never a
|
||||
// replacement for — LocalAI's graceful SIGTERM->grace->SIGKILL teardown.
|
||||
func startParentDeathWatcher() {
|
||||
if !parentWatchEnabled() {
|
||||
return
|
||||
}
|
||||
origPPID := os.Getppid()
|
||||
// A parent of 1 at startup means we were already orphaned (or launched
|
||||
// directly under init) — there's no original parent to watch for.
|
||||
if origPPID <= 1 {
|
||||
return
|
||||
}
|
||||
interval := parentWatchInterval()
|
||||
go watchParentDeath(origPPID, interval, func() {
|
||||
log.Printf("backend parent process (pid %d) exited without stopping this backend; self-terminating to avoid orphaning", origPPID)
|
||||
os.Exit(1)
|
||||
})
|
||||
}
|
||||
168
pkg/grpc/parentwatch_test.go
Normal file
168
pkg/grpc/parentwatch_test.go
Normal file
@@ -0,0 +1,168 @@
|
||||
//go:build !windows
|
||||
|
||||
package grpc
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// These env vars drive the helper roles this test binary re-executes itself as
|
||||
// (see the init() dispatcher). They are only set for the spawned child/
|
||||
// grandchild processes, never for the normal `go test` invocation.
|
||||
const (
|
||||
envRole = "LOCALAI_PARENTWATCH_TEST_ROLE"
|
||||
envReady = "LOCALAI_PARENTWATCH_TEST_READY" // grandchild writes its PID here once the watcher is armed
|
||||
envExited = "LOCALAI_PARENTWATCH_TEST_EXITED" // grandchild writes here when it detects reparenting
|
||||
)
|
||||
|
||||
// init dispatches the helper roles when this test binary is re-executed with a
|
||||
// role set. It runs before the testing/Ginkgo machinery, and is a no-op during
|
||||
// a normal test run (role unset).
|
||||
func init() {
|
||||
switch os.Getenv(envRole) {
|
||||
case "middle":
|
||||
runMiddleRole()
|
||||
case "grandchild":
|
||||
runGrandchildRole()
|
||||
}
|
||||
}
|
||||
|
||||
// childEnv returns the current environment with the parentwatch test role set
|
||||
// to the given value (replacing any inherited role), leaving the ready/exited
|
||||
// file paths inherited.
|
||||
func childEnv(role string) []string {
|
||||
out := make([]string, 0, len(os.Environ())+1)
|
||||
for _, kv := range os.Environ() {
|
||||
if len(kv) > len(envRole) && kv[:len(envRole)+1] == envRole+"=" {
|
||||
continue
|
||||
}
|
||||
out = append(out, kv)
|
||||
}
|
||||
return append(out, envRole+"="+role)
|
||||
}
|
||||
|
||||
// runGrandchildRole arms the REAL watchParentDeath against its current parent
|
||||
// (the "middle" process), signals readiness, then blocks. When middle exits and
|
||||
// we are reparented, the watcher fires and we record it before exiting.
|
||||
func runGrandchildRole() {
|
||||
exitedFile := os.Getenv(envExited)
|
||||
readyFile := os.Getenv(envReady)
|
||||
|
||||
origPPID := os.Getppid()
|
||||
go watchParentDeath(origPPID, 50*time.Millisecond, func() {
|
||||
_ = os.WriteFile(exitedFile, []byte("1"), 0o644)
|
||||
os.Exit(7)
|
||||
})
|
||||
|
||||
// Safety valve: never linger if something goes wrong with the test.
|
||||
go func() {
|
||||
time.Sleep(30 * time.Second)
|
||||
os.Exit(2)
|
||||
}()
|
||||
|
||||
// Signal readiness only after the watcher captured origPPID, so middle
|
||||
// won't exit before we've recorded it as our original parent.
|
||||
_ = os.WriteFile(readyFile, []byte(strconv.Itoa(os.Getpid())), 0o644)
|
||||
|
||||
select {} // block until the watcher terminates us
|
||||
}
|
||||
|
||||
// runMiddleRole spawns the grandchild (which arms the watcher against us),
|
||||
// waits until it is ready, then exits — orphaning the grandchild so it gets
|
||||
// reparented, which is what the watcher must detect.
|
||||
func runMiddleRole() {
|
||||
readyFile := os.Getenv(envReady)
|
||||
|
||||
self, err := os.Executable()
|
||||
if err != nil {
|
||||
os.Exit(3)
|
||||
}
|
||||
cmd := exec.Command(self)
|
||||
cmd.Env = childEnv("grandchild")
|
||||
// Own process group, mirroring how real backends are spawned, and discard
|
||||
// std streams so the grandchild doesn't keep any parent pipe open.
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
if err := cmd.Start(); err != nil {
|
||||
os.Exit(4)
|
||||
}
|
||||
|
||||
if !waitForFile(readyFile, 10*time.Second) {
|
||||
os.Exit(5)
|
||||
}
|
||||
os.Exit(0) // orphan the grandchild
|
||||
}
|
||||
|
||||
func waitForFile(path string, timeout time.Duration) bool {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return true
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TestParentDeathWatcherDetectsReparent builds a genuine two-level process tree
|
||||
// (test -> middle -> grandchild), lets the middle process die, and asserts the
|
||||
// grandchild's watchParentDeath detects the reparenting and self-terminates.
|
||||
func TestParentDeathWatcherDetectsReparent(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("parent-death watcher is not supported on windows")
|
||||
}
|
||||
|
||||
dir := t.TempDir()
|
||||
readyFile := filepath.Join(dir, "ready")
|
||||
exitedFile := filepath.Join(dir, "exited")
|
||||
|
||||
self, err := os.Executable()
|
||||
if err != nil {
|
||||
t.Fatalf("cannot resolve test executable: %v", err)
|
||||
}
|
||||
|
||||
middle := exec.Command(self)
|
||||
middle.Env = append(childEnv("middle"),
|
||||
envReady+"="+readyFile,
|
||||
envExited+"="+exitedFile,
|
||||
)
|
||||
// Discard the helpers' output; keep the test log clean.
|
||||
middle.Stdout = nil
|
||||
middle.Stderr = nil
|
||||
|
||||
if err := middle.Start(); err != nil {
|
||||
t.Fatalf("failed to start middle helper: %v", err)
|
||||
}
|
||||
// Wait only for the middle process; the grandchild is intentionally left
|
||||
// orphaned. No pipes are shared, so this returns as soon as middle exits.
|
||||
if err := middle.Wait(); err != nil {
|
||||
t.Fatalf("middle helper exited with error: %v", err)
|
||||
}
|
||||
|
||||
// The grandchild must have armed the watcher (and thus captured middle as
|
||||
// its parent) before middle exited.
|
||||
if _, err := os.Stat(readyFile); err != nil {
|
||||
t.Fatalf("grandchild never signaled readiness: %v", err)
|
||||
}
|
||||
|
||||
// Best-effort cleanup in case the watcher somehow doesn't fire.
|
||||
t.Cleanup(func() {
|
||||
if b, err := os.ReadFile(readyFile); err == nil {
|
||||
if pid, err := strconv.Atoi(string(b)); err == nil {
|
||||
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Now that middle is gone, the grandchild has been reparented; the watcher
|
||||
// must notice and write the exited marker.
|
||||
if !waitForFile(exitedFile, 10*time.Second) {
|
||||
t.Fatalf("watcher did not detect parent death within timeout")
|
||||
}
|
||||
}
|
||||
@@ -939,6 +939,9 @@ func StartServer(address string, model AIModel) error {
|
||||
s := grpc.NewServer(serverOpts()...)
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
// Safety net: self-terminate if the LocalAI process that spawned this
|
||||
// backend dies without running its graceful teardown (see parentwatch.go).
|
||||
startParentDeathWatcher()
|
||||
if err := s.Serve(lis); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -954,6 +957,9 @@ func RunServer(address string, model AIModel) (func() error, error) {
|
||||
s := grpc.NewServer(serverOpts()...)
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
// Safety net: self-terminate if the LocalAI process that spawned this
|
||||
// backend dies without running its graceful teardown (see parentwatch.go).
|
||||
startParentDeathWatcher()
|
||||
if err = s.Serve(lis); err != nil {
|
||||
return func() error {
|
||||
return lis.Close()
|
||||
|
||||
Reference in New Issue
Block a user