diff --git a/go.mod b/go.mod index 58c091138..b897944b8 100644 --- a/go.mod +++ b/go.mod @@ -38,7 +38,7 @@ require ( github.com/modelcontextprotocol/go-sdk v1.5.0 github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b github.com/mudler/edgevpn v0.31.1 - github.com/mudler/go-processmanager v0.1.0 + github.com/mudler/go-processmanager v0.1.1 github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8 github.com/mudler/xlog v0.0.6 github.com/nats-io/nats.go v1.50.0 diff --git a/go.sum b/go.sum index 4c0886111..cc82e6759 100644 --- a/go.sum +++ b/go.sum @@ -711,8 +711,8 @@ github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8= github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA= github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= -github.com/mudler/go-processmanager v0.1.0 h1:fcSKgF9U/a1Z7KofAFeZnke5YseadCI5GqL9oT0LS3E= -github.com/mudler/go-processmanager v0.1.0/go.mod h1:h6kmHUZeafr+k5hRYpGLMzJFH4hItHffgpRo2QIkP+o= +github.com/mudler/go-processmanager v0.1.1 h1:c/1NRZOZpW8HuFv9RhBG57nQu1oDMRomEHedwBFMlrw= +github.com/mudler/go-processmanager v0.1.1/go.mod h1:h6kmHUZeafr+k5hRYpGLMzJFH4hItHffgpRo2QIkP+o= github.com/mudler/localrecall v0.5.9-0.20260415164846-8ad831f840fc h1:p1ucQ2rbU4mhG2Xl1Emg5Q6QCYCjI+fvMF9KTek/+sY= github.com/mudler/localrecall v0.5.9-0.20260415164846-8ad831f840fc/go.mod h1:xuPtgL9zUyiQLmspYzO3kaboYrGbWmwi8BQPt1aCAcs= github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8 h1:Ry8RiWy8fZ6Ff4E7dPmjRsBrnHOnPeOOj2LhCgyjQu0= diff --git a/pkg/model/process.go b/pkg/model/process.go index 21cb53728..8d75b97b2 100644 --- a/pkg/model/process.go +++ b/pkg/model/process.go @@ -204,5 +204,30 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string } }() + // Surface backend exits in the log. Without this, a crash (SIGSEGV + // from a missing shared library, a Python ImportError, etc.) is + // invisible at every log level — the only signal is a delayed + // "connection refused" from the gRPC dial, which doesn't say + // whether the child is alive. + go func() { + <-grpcControlProcess.Done() + fields := []any{ + "id", id, + "address", serverAddress, + "process", filepath.Base(grpcProcess), + } + code, codeErr := grpcControlProcess.ExitCode() + if codeErr == nil { + fields = append(fields, "exitCode", code) + } + // 143 = 128 + SIGTERM, the signal sent during graceful stop / model unload. + // Treat that and a clean 0 as expected; everything else is a likely crash. + if codeErr == nil && (code == "0" || code == "143") { + xlog.Info("Backend process exited", fields...) + } else { + xlog.Warn("Backend process exited unexpectedly", fields...) + } + }() + return grpcControlProcess, nil }