mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-05 07:16:10 -04:00
refactor(distributed): typed model-not-loaded error via gRPC status code
Replace the controller-side error-string match with a shared, code-aware helper. Go error types don't survive the gRPC boundary, so the signal is carried as a status code (FailedPrecondition): - pkg/grpc/grpcerrors: ModelNotLoaded(backend) constructor + IsModelNotLoaded(err) checker (status-code first, message fallback for backends not yet migrated). - InFlightTrackingClient.reconcile now uses grpcerrors.IsModelNotLoaded. - Migrate the Go backends that emit this error (parakeet-cpp, cloud-proxy, rfdetr-cpp) to the typed constructor. Acting on a false positive is harmless (the model is just reloaded). Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/mudler/xlog"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||
)
|
||||
@@ -145,7 +146,7 @@ func resolveAPIKey(envName, filePath string) (string, error) {
|
||||
func (c *CloudProxy) PredictRich(opts *pb.PredictOptions) (reply *pb.Reply, err error) {
|
||||
cfg := c.cfg.Load()
|
||||
if cfg == nil {
|
||||
return nil, errors.New("cloud-proxy: model not loaded")
|
||||
return nil, grpcerrors.ModelNotLoaded("cloud-proxy")
|
||||
}
|
||||
if cfg.mode != modeTranslate {
|
||||
return nil, fmt.Errorf("cloud-proxy: Predict only valid in translate mode (have %s)", cfg.mode)
|
||||
@@ -175,7 +176,7 @@ func (c *CloudProxy) PredictRich(opts *pb.PredictOptions) (reply *pb.Reply, err
|
||||
func (c *CloudProxy) PredictStreamRich(opts *pb.PredictOptions, results chan<- *pb.Reply) (err error) {
|
||||
cfg := c.cfg.Load()
|
||||
if cfg == nil {
|
||||
return errors.New("cloud-proxy: model not loaded")
|
||||
return grpcerrors.ModelNotLoaded("cloud-proxy")
|
||||
}
|
||||
if cfg.mode != modeTranslate {
|
||||
return fmt.Errorf("cloud-proxy: PredictStream only valid in translate mode (have %s)", cfg.mode)
|
||||
@@ -269,7 +270,7 @@ func (c *CloudProxy) Forward(ctx context.Context, in <-chan *pb.ForwardRequest,
|
||||
|
||||
cfg := c.cfg.Load()
|
||||
if cfg == nil {
|
||||
return errors.New("cloud-proxy: model not loaded")
|
||||
return grpcerrors.ModelNotLoaded("cloud-proxy")
|
||||
}
|
||||
if cfg.mode != modePassthrough {
|
||||
return fmt.Errorf("cloud-proxy: Forward only valid in passthrough mode (have %s)", cfg.mode)
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -230,7 +231,7 @@ func (p *ParakeetCpp) runBatch(reqs []*batchRequest) {
|
||||
// (L2).
|
||||
func (p *ParakeetCpp) AudioTranscription(ctx context.Context, opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
||||
if p.ctxPtr == 0 {
|
||||
return pb.TranscriptResult{}, errors.New("parakeet-cpp: model not loaded")
|
||||
return pb.TranscriptResult{}, grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
if opts.Dst == "" {
|
||||
return pb.TranscriptResult{}, errors.New("parakeet-cpp: TranscriptRequest.dst (audio path) is required")
|
||||
@@ -351,7 +352,7 @@ func (p *ParakeetCpp) AudioTranscriptionStream(ctx context.Context, opts *pb.Tra
|
||||
defer close(results)
|
||||
|
||||
if p.ctxPtr == 0 {
|
||||
return errors.New("parakeet-cpp: model not loaded")
|
||||
return grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
if opts.Dst == "" {
|
||||
return errors.New("parakeet-cpp: TranscriptRequest.dst (audio path) is required")
|
||||
|
||||
@@ -2,11 +2,11 @@ package nodes
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/xlog"
|
||||
ggrpc "google.golang.org/grpc"
|
||||
@@ -72,7 +72,7 @@ func (c *InFlightTrackingClient) track(ctx context.Context) func() {
|
||||
// model stays unreachable until the controller restarts. The original error is
|
||||
// returned unchanged.
|
||||
func (c *InFlightTrackingClient) reconcile(err error) error {
|
||||
if !isModelNotLoaded(err) {
|
||||
if !grpcerrors.IsModelNotLoaded(err) {
|
||||
return err
|
||||
}
|
||||
rmCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
@@ -87,12 +87,6 @@ func (c *InFlightTrackingClient) reconcile(err error) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// isModelNotLoaded reports whether err is a backend "model not loaded" response.
|
||||
// Backends phrase it as "<backend>: model not loaded", so match on the suffix.
|
||||
func isModelNotLoaded(err error) bool {
|
||||
return err != nil && strings.Contains(strings.ToLower(err.Error()), "model not loaded")
|
||||
}
|
||||
|
||||
// --- Tracked inference methods ---
|
||||
|
||||
func (c *InFlightTrackingClient) Predict(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.Reply, error) {
|
||||
|
||||
35
pkg/grpc/grpcerrors/errors.go
Normal file
35
pkg/grpc/grpcerrors/errors.go
Normal file
@@ -0,0 +1,35 @@
|
||||
// Package grpcerrors defines well-known error signals shared between backends
|
||||
// (which produce them) and the router (which consumes them). Go error types do
|
||||
// not survive the gRPC boundary, so these conditions are carried as gRPC status
|
||||
// codes and detected via the code rather than by matching the error message.
|
||||
package grpcerrors
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// ModelNotLoaded returns the canonical error a backend returns when it has no
|
||||
// model loaded for the request. It carries codes.FailedPrecondition so callers
|
||||
// can detect it across the gRPC boundary without matching the message string.
|
||||
func ModelNotLoaded(backend string) error {
|
||||
return status.Errorf(codes.FailedPrecondition, "%s: model not loaded", backend)
|
||||
}
|
||||
|
||||
// IsModelNotLoaded reports whether err signals that the backend has no model
|
||||
// loaded. It prefers the typed gRPC status code (FailedPrecondition) and falls
|
||||
// back to the message for backends that have not yet adopted ModelNotLoaded.
|
||||
//
|
||||
// Acting on a false positive is harmless: the only consequence upstream is that
|
||||
// the model is reloaded, which is idempotent.
|
||||
func IsModelNotLoaded(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if status.Code(err) == codes.FailedPrecondition {
|
||||
return true
|
||||
}
|
||||
return strings.Contains(strings.ToLower(err.Error()), "model not loaded")
|
||||
}
|
||||
37
pkg/grpc/grpcerrors/errors_test.go
Normal file
37
pkg/grpc/grpcerrors/errors_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package grpcerrors_test
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
func TestGRPCErrors(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "grpcerrors test suite")
|
||||
}
|
||||
|
||||
var _ = Describe("grpcerrors", func() {
|
||||
DescribeTable("IsModelNotLoaded",
|
||||
func(err error, want bool) {
|
||||
Expect(grpcerrors.IsModelNotLoaded(err)).To(Equal(want))
|
||||
},
|
||||
Entry("nil", nil, false),
|
||||
Entry("typed via constructor", grpcerrors.ModelNotLoaded("parakeet-cpp"), true),
|
||||
Entry("typed code only", status.Error(codes.FailedPrecondition, "anything"), true),
|
||||
Entry("legacy message (Unknown code)", errors.New("parakeet-cpp: model not loaded"), true),
|
||||
Entry("legacy message mixed case", errors.New("Backend: Model Not Loaded"), true),
|
||||
Entry("unrelated error", errors.New("context deadline exceeded"), false),
|
||||
Entry("unrelated grpc code", status.Error(codes.Unavailable, "connection refused"), false),
|
||||
)
|
||||
|
||||
It("ModelNotLoaded carries FailedPrecondition", func() {
|
||||
Expect(status.Code(grpcerrors.ModelNotLoaded("whisper"))).To(Equal(codes.FailedPrecondition))
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user