mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-06 07:46:15 -04:00
* fix(distributed): self-heal stale 'model not loaded' routing In distributed mode the registry can list a model as loaded on a node while the worker has evicted it (autonomous LRU eviction, an out-of-band unload, etc.) yet the backend process survives. The router's cached-node check only verifies the process is alive (probeHealth), so it routes there and inference fails with "<backend>: model not loaded" — and stays broken until the controller restarts and rebuilds its registry. InFlightTrackingClient now reconciles this: when a tracked inference call returns a model-not-loaded error, it drops the stale replica row (RemoveNodeModel) so the next request reloads the model on a healthy node instead of routing back to the evicted one. The original error is returned unchanged; only the registry is corrected. Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor(distributed): typed model-not-loaded error via gRPC status code Replace the controller-side error-string match with a shared, code-aware helper. Go error types don't survive the gRPC boundary, so the signal is carried as a status code (FailedPrecondition): - pkg/grpc/grpcerrors: ModelNotLoaded(backend) constructor + IsModelNotLoaded(err) checker (status-code first, message fallback for backends not yet migrated). - InFlightTrackingClient.reconcile now uses grpcerrors.IsModelNotLoaded. - Migrate the Go backends that emit this error (parakeet-cpp, cloud-proxy, rfdetr-cpp) to the typed constructor. Acting on a false positive is harmless (the model is just reloaded). Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
38 lines
1.2 KiB
Go
38 lines
1.2 KiB
Go
package grpcerrors_test
|
|
|
|
import (
|
|
"errors"
|
|
"testing"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
|
|
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
func TestGRPCErrors(t *testing.T) {
|
|
RegisterFailHandler(Fail)
|
|
RunSpecs(t, "grpcerrors test suite")
|
|
}
|
|
|
|
var _ = Describe("grpcerrors", func() {
|
|
DescribeTable("IsModelNotLoaded",
|
|
func(err error, want bool) {
|
|
Expect(grpcerrors.IsModelNotLoaded(err)).To(Equal(want))
|
|
},
|
|
Entry("nil", nil, false),
|
|
Entry("typed via constructor", grpcerrors.ModelNotLoaded("parakeet-cpp"), true),
|
|
Entry("typed code only", status.Error(codes.FailedPrecondition, "anything"), true),
|
|
Entry("legacy message (Unknown code)", errors.New("parakeet-cpp: model not loaded"), true),
|
|
Entry("legacy message mixed case", errors.New("Backend: Model Not Loaded"), true),
|
|
Entry("unrelated error", errors.New("context deadline exceeded"), false),
|
|
Entry("unrelated grpc code", status.Error(codes.Unavailable, "connection refused"), false),
|
|
)
|
|
|
|
It("ModelNotLoaded carries FailedPrecondition", func() {
|
|
Expect(status.Code(grpcerrors.ModelNotLoaded("whisper"))).To(Equal(codes.FailedPrecondition))
|
|
})
|
|
})
|