mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-01 03:46:41 -04:00
fix(watchdog): don't log optional Free() as an error when backend returns Unimplemented (#10602) (#10607)
* fix(watchdog): don't log optional Free() as an error when backend returns Unimplemented (#10602) When the watchdog evicts a model, deleteProcess calls the backend's gRPC Free() to release VRAM before stopping the process. Free is optional: backends that don't override it -- the generated UnimplementedBackendServer stub, many Python/external backends, or a federation proxy in distributed mode -- return gRPC Unimplemented. That is expected, not a failure: VRAM is reclaimed when the local process is stopped, or by the remote unloader for remote backends. Logging it as "WARN Error freeing GPU resources" made a benign, optional RPC look like a fault (the alarming line in #10602, seen in distributed mode where the model is remote and Free hits a stub). Treat gRPC Unimplemented from Free() as a no-op logged at Debug; genuine failures still Warn. Free() is still attempted for every backend, so any backend that does implement it is unaffected. Add a reusable grpcerrors.IsUnimplemented helper following the package's existing code-based detection idiom (prefer the typed status code, fall back to the message across non-gRPC boundaries), with table tests. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com> * fix(watchdog): log a non-Unimplemented Free() failure at error level Per review: now that the expected gRPC Unimplemented case is split out and logged at Debug, any remaining Free() error is a genuine failure to release VRAM, so surface it at error level instead of warn. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com> --------- Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com>
This commit is contained in:
@@ -58,6 +58,23 @@ func IsLiveTranscriptionUnsupported(err error) bool {
|
||||
return strings.Contains(strings.ToLower(err.Error()), "unimplemented")
|
||||
}
|
||||
|
||||
// IsUnimplemented reports whether err is a gRPC Unimplemented status — the
|
||||
// signal a backend gives for an RPC it does not implement. The generated
|
||||
// UnimplementedBackendServer stub returns exactly this for any RPC a backend
|
||||
// (e.g. a Python or external backend) has not overridden, so callers can treat
|
||||
// an optional RPC as a no-op rather than a failure. Prefers the typed status
|
||||
// code and falls back to the message for paths that lose the status (e.g. errors
|
||||
// wrapped across non-gRPC boundaries).
|
||||
func IsUnimplemented(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if status.Code(err) == codes.Unimplemented {
|
||||
return true
|
||||
}
|
||||
return strings.Contains(strings.ToLower(err.Error()), "unimplemented")
|
||||
}
|
||||
|
||||
// StreamTranscriptionUnsupported returns the canonical error a backend returns
|
||||
// when it (or the loaded model) cannot serve the server-streaming
|
||||
// AudioTranscriptionStream RPC. It carries codes.Unimplemented like the live
|
||||
|
||||
@@ -55,6 +55,18 @@ var _ = Describe("grpcerrors", func() {
|
||||
Expect(grpcerrors.IsModelNotLoaded(err)).To(BeFalse())
|
||||
})
|
||||
|
||||
DescribeTable("IsUnimplemented",
|
||||
func(err error, want bool) {
|
||||
Expect(grpcerrors.IsUnimplemented(err)).To(Equal(want))
|
||||
},
|
||||
Entry("nil", nil, false),
|
||||
Entry("typed code", status.Error(codes.Unimplemented, "method Free not implemented"), true),
|
||||
Entry("stale stub message (Unknown code)", errors.New("rpc error: code = Unimplemented desc = "), true),
|
||||
Entry("unrelated error", errors.New("context deadline exceeded"), false),
|
||||
Entry("unrelated grpc code", status.Error(codes.Unavailable, "connection refused"), false),
|
||||
Entry("model not loaded is NOT unimplemented", grpcerrors.ModelNotLoaded("parakeet-cpp"), false),
|
||||
)
|
||||
|
||||
It("StreamTranscriptionUnsupported carries Unimplemented and is not ModelNotLoaded", func() {
|
||||
err := grpcerrors.StreamTranscriptionUnsupported("parakeet-cpp", "not a streaming model")
|
||||
Expect(status.Code(err)).To(Equal(codes.Unimplemented))
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/hpcloud/tail"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
"github.com/mudler/LocalAI/pkg/signals"
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -52,10 +53,21 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
hook(s)
|
||||
}
|
||||
|
||||
// Free GPU resources before stopping the process to ensure VRAM is released
|
||||
// Free GPU resources before stopping the process to ensure VRAM is released.
|
||||
// Free is optional: backends that don't override it (the generated stub, many
|
||||
// Python/external backends, or a federation proxy in distributed mode) return
|
||||
// gRPC Unimplemented. That is expected, not a failure — VRAM is reclaimed when
|
||||
// the process is stopped below, or by the remote unloader for remote backends —
|
||||
// so don't surface it as an error.
|
||||
xlog.Debug("Calling Free() to release GPU resources", "model", s)
|
||||
if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil {
|
||||
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
|
||||
if grpcerrors.IsUnimplemented(err) {
|
||||
xlog.Debug("Backend does not implement Free(); GPU release handled on process stop", "model", s)
|
||||
} else {
|
||||
// Now that the expected Unimplemented case is filtered out above, a
|
||||
// remaining error is a genuine failure to release VRAM — surface it.
|
||||
xlog.Error("Error freeing GPU resources", "error", err, "model", s)
|
||||
}
|
||||
}
|
||||
|
||||
process := model.Process()
|
||||
|
||||
Reference in New Issue
Block a user