Files
LocalAI/tests/e2e/distributed/model_routing_test.go
LocalAI [bot] 170d55c67d fix(distributed): honor NodeSelector in cached-replica lookup, stop empty-backend reconciler scaleups (#9652)
* fix(distributed): honor NodeSelector in cached-replica lookup, stop empty-backend reconciler scaleups

Two distinct bugs were causing tight retry loops in the distributed scheduler:

1. FindAndLockNodeWithModel ignored the model's NodeSelector. When a model
   was loaded on multiple nodes and only some matched the current selector,
   the function returned the lowest-in_flight node — even one the selector
   excluded. Route()'s post-check then fell through to scheduleNewModel,
   which targeted the matching node where the model was already at
   MaxReplicasPerModel capacity. Eviction couldn't help (the only loaded
   model on that node was the one being requested, and it was busy), so
   every request looped through "evicting LRU" → "all models busy".

   Fix: thread an optional candidateNodeIDs filter through
   FindAndLockNodeWithModel. Route() resolves the selector once via a new
   resolveSelectorCandidates helper and passes the matching IDs to both
   the cached-replica lookup and scheduleNewModel. The same helper
   replaces the inline selector block in scheduleNewModel.

2. ScheduleAndLoadModel (reconciler scale-up path) fell back to
   scheduleNewModel with backendType="" when no replica had ever been
   loaded for a model. The worker rejected the resulting backend.install
   ("backend name is empty") on every reconciler tick (~30s).

   Fix: remove the broken fallback. When GetModelLoadInfo has nothing
   stored, return a clear error instead of firing a doomed NATS install.
   The reconciler's existing scale-up failure log surfaces it once per
   tick; the model auto-replicates as soon as Route() serves it once and
   stores load info.

Also downgrade the post-LoadModel-failure StopGRPC error to Debug — that
cleanup attempt usually hits "model not found" because LoadModel failed
before registering the process, and the outer "Failed to load model"
error already carries the real reason.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

* test(distributed): cover selector-aware FindAndLockNodeWithModel and reconciler scaleup guard

Two regression tests for the bugs fixed in the previous commit:

1. FindAndLockNodeWithModel — registry-level integration tests verify the
   candidateNodeIDs filter:
   - Returns the included node even when an excluded node has lower
     in_flight (the original selector-mismatch loop scenario).
   - Returns not-found when the model is loaded only on excluded nodes,
     forcing Route() to fall through to a fresh schedule instead of
     reusing the excluded replica.

2. ScheduleAndLoadModel — mock-based test verifies the reconciler scale-up
   path returns an error and does NOT fire backend.install when no replica
   has been loaded yet. fakeUnloader gains an installCalls slice so this
   negative assertion is direct.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-04 09:42:14 +02:00

124 lines
4.4 KiB
Go

package distributed_test
import (
"context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services/nodes"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
pgdriver "gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)
var _ = Describe("Model Routing", Label("Distributed"), func() {
var (
infra *TestInfra
db *gorm.DB
registry *nodes.NodeRegistry
)
BeforeEach(func() {
infra = SetupInfra("localai_routing_test")
var err error
db, err = gorm.Open(pgdriver.Open(infra.PGURL), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
Expect(err).ToNot(HaveOccurred())
registry, err = nodes.NewNodeRegistry(db)
Expect(err).ToNot(HaveOccurred())
})
Context("ModelRouterAdapter from SmartRouter", func() {
It("should create ModelRouterAdapter from SmartRouter", func() {
router := nodes.NewSmartRouter(registry, nodes.SmartRouterOptions{})
Expect(router).ToNot(BeNil())
adapter := nodes.NewModelRouterAdapter(router)
Expect(adapter).ToNot(BeNil())
// The adapter should provide a ModelRouter callback
routerFunc := adapter.AsModelRouter()
Expect(routerFunc).ToNot(BeNil())
})
It("should release in-flight counter on model unload", func() {
// Register a node with a loaded model
node := &nodes.BackendNode{
Name: "gpu-1", Address: "h1:50051",
}
Expect(registry.Register(context.Background(), node, true)).To(Succeed())
Expect(registry.SetNodeModel(context.Background(), node.ID, "llama3", 0, "loaded", "", 0)).To(Succeed())
Expect(registry.IncrementInFlight(context.Background(), node.ID, "llama3", 0)).To(Succeed())
Expect(registry.IncrementInFlight(context.Background(), node.ID, "llama3", 0)).To(Succeed())
// Verify in-flight count
models, err := registry.GetNodeModels(context.Background(), node.ID)
Expect(err).ToNot(HaveOccurred())
Expect(models[0].InFlight).To(Equal(2))
// FindAndLockNodeWithModel should return this node and atomically increment in-flight
foundNode, foundModel, err := registry.FindAndLockNodeWithModel(context.Background(), "llama3", nil)
Expect(err).ToNot(HaveOccurred())
Expect(foundNode.ID).To(Equal(node.ID))
Expect(foundModel.ModelName).To(Equal("llama3"))
Expect(foundModel.InFlight).To(Equal(2), "InFlight returned is the pre-increment snapshot from the query")
// Verify the DB now has in_flight = 3 (2 manual + 1 from FindAndLock)
models, err = registry.GetNodeModels(context.Background(), node.ID)
Expect(err).ToNot(HaveOccurred())
Expect(models[0].InFlight).To(Equal(3))
// Simulate decrement (what Release does)
Expect(registry.DecrementInFlight(context.Background(), node.ID, "llama3", 0)).To(Succeed())
models, _ = registry.GetNodeModels(context.Background(), node.ID)
Expect(models[0].InFlight).To(Equal(2))
// The ModelRouterAdapter.ReleaseModel calls the stored Release function
router := nodes.NewSmartRouter(registry, nodes.SmartRouterOptions{})
adapter := nodes.NewModelRouterAdapter(router)
// ReleaseModel on an unknown model should be a no-op (no panic)
Expect(func() { adapter.ReleaseModel("nonexistent-model") }).ToNot(Panic())
})
It("should use SmartRouter to find nodes with a model", func() {
// Register multiple nodes
node1 := &nodes.BackendNode{
Name: "node-a", Address: "h1:50051",
}
node2 := &nodes.BackendNode{
Name: "node-b", Address: "h2:50051",
}
Expect(registry.Register(context.Background(), node1, true)).To(Succeed())
Expect(registry.Register(context.Background(), node2, true)).To(Succeed())
// Load model on node1
Expect(registry.SetNodeModel(context.Background(), node1.ID, "llama3", 0, "loaded", "", 0)).To(Succeed())
// Verify routing can find the model
nodesWithModel, err := registry.FindNodesWithModel(context.Background(), "llama3")
Expect(err).ToNot(HaveOccurred())
Expect(nodesWithModel).To(HaveLen(1))
Expect(nodesWithModel[0].ID).To(Equal(node1.ID))
})
})
Context("Without --distributed", func() {
It("should fall through to local loading without --distributed", func() {
appCfg := config.NewApplicationConfig()
Expect(appCfg.Distributed.Enabled).To(BeFalse())
// Without distributed mode, no SmartRouter is created.
// The ModelLoader uses its local process management.
// This test documents the design decision.
Expect(appCfg.Distributed.NatsURL).To(BeEmpty())
})
})
})