From fc618dcee6befb61dfb3867989bb4f358786aa8d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 10:13:37 +0200 Subject: [PATCH] fix(distributed): track in-flight for SoundDetection requests (#10475) The distributed router wraps backend clients in InFlightTrackingClient so the eviction logic knows which replicas are actively serving. Every inference method must be wrapped: track() increments in-flight on entry and decrements (plus fires onFirstComplete, which releases the load-time reservation) on return. SoundDetection was added after the tracking client and never got a wrapper, so its calls fell through to the embedded passthrough Backend. The increment/decrement never ran and, critically, onFirstComplete never fired, so the reservation set at model load was never released - leaving in-flight stuck at 1 and the replica permanently ineligible for eviction. Wrap SoundDetection like the other non-LLM methods and cover it in the "non-LLM inference methods track in-flight" table test. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/services/nodes/inflight.go | 6 ++++++ core/services/nodes/inflight_test.go | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/core/services/nodes/inflight.go b/core/services/nodes/inflight.go index 85b10f71c..b51ef6001 100644 --- a/core/services/nodes/inflight.go +++ b/core/services/nodes/inflight.go @@ -218,6 +218,12 @@ func (c *InFlightTrackingClient) Score(ctx context.Context, in *pb.ScoreRequest, return res, c.reconcile(err) } +func (c *InFlightTrackingClient) SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...ggrpc.CallOption) (*pb.SoundDetectionResponse, error) { + defer c.track(ctx)() + res, err := c.Backend.SoundDetection(ctx, in, opts...) + return res, c.reconcile(err) +} + func (c *InFlightTrackingClient) AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...ggrpc.CallOption) (*pb.AudioEncodeResult, error) { defer c.track(ctx)() res, err := c.Backend.AudioEncode(ctx, in, opts...) diff --git a/core/services/nodes/inflight_test.go b/core/services/nodes/inflight_test.go index 2eb90f9c6..5fc9820e7 100644 --- a/core/services/nodes/inflight_test.go +++ b/core/services/nodes/inflight_test.go @@ -408,6 +408,13 @@ var _ = Describe("InFlightTrackingClient", func() { return err }) }) + + It("SoundDetection", func() { + assertTracked(func() error { + _, err := client.SoundDetection(context.Background(), &pb.SoundDetectionRequest{}) + return err + }) + }) }) Describe("stale model reload (self-heal)", func() {