Files
LocalAI/pkg/grpc/forward_test.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

95 lines
2.8 KiB
Go

package grpc
import (
"context"
"errors"
"io"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// echoForwardModel is a minimal AIModel that just echoes Forward
// requests back as replies — used to exercise the in-process bidi
// plumbing without standing up a real HTTP upstream.
type echoForwardModel struct {
base.SingleThread
}
func (m *echoForwardModel) Forward(_ context.Context, in <-chan *pb.ForwardRequest, out chan<- *pb.ForwardReply) error {
defer close(out)
first := true
for req := range in {
if first {
out <- &pb.ForwardReply{
Status: 200,
Headers: []*pb.ForwardHeader{{Name: "Content-Type", Value: "text/event-stream"}},
}
first = false
}
out <- &pb.ForwardReply{BodyChunk: req.BodyChunk}
}
return nil
}
var _ = Describe("Forward RPC (in-process)", func() {
It("round-trips status, headers, and body chunks", func() {
// Provide registers an AIModel under a virtual address so
// NewClient routes via the in-process embedBackend instead of
// dialing a real socket.
addr := "test://forward-echo"
Provide(addr, &echoForwardModel{})
c := NewClient(addr, true, nil, false)
stream, err := c.Forward(context.Background())
Expect(err).NotTo(HaveOccurred())
// One initial request carrying path/method/headers, then two body chunks.
Expect(stream.Send(&pb.ForwardRequest{
Path: "/v1/chat/completions",
Method: "POST",
Headers: []*pb.ForwardHeader{{Name: "Authorization", Value: "Bearer x"}},
BodyChunk: []byte(`{"hello":`),
})).To(Succeed())
Expect(stream.Send(&pb.ForwardRequest{BodyChunk: []byte(`"world"}`)})).To(Succeed())
Expect(stream.CloseSend()).To(Succeed())
// First reply carries status + headers.
first, err := stream.Recv()
Expect(err).NotTo(HaveOccurred())
Expect(first.Status).To(Equal(int32(200)))
Expect(first.Headers).To(HaveLen(1))
Expect(first.Headers[0].Name).To(Equal("Content-Type"))
// Body echoes back, one reply per request chunk.
var body []byte
for {
r, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
Expect(err).NotTo(HaveOccurred())
body = append(body, r.BodyChunk...)
}
Expect(string(body)).To(Equal(`{"hello":"world"}`))
})
It("UnimplementedBase returns an error on Forward", func() {
// The default base.Base.Forward returns "unimplemented" — any
// backend that doesn't opt in should surface that to callers
// rather than silently succeed.
addr := "test://forward-base"
Provide(addr, &base.SingleThread{})
c := NewClient(addr, true, nil, false)
stream, err := c.Forward(context.Background())
Expect(err).NotTo(HaveOccurred())
Expect(stream.CloseSend()).To(Succeed())
_, err = stream.Recv()
Expect(err).To(HaveOccurred())
})
})