From 0e2b84d8e3628dc24569072ea57873cda1e7338b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 May 2026 19:39:54 +0000 Subject: [PATCH] feat(distributed): add configurable NATS backend install/upgrade timeouts Adds BackendInstallTimeout and BackendUpgradeTimeout to DistributedConfig with 15m defaults, following the existing MCPToolTimeout / WorkerWaitTimeout pattern. These will replace the hardcoded literals in RemoteUnloaderAdapter so admin-driven backend installs across the cluster survive long OCI image pulls that previously timed out at 3m. Signed-off-by: Ettore Di Giacinto --- core/config/distributed_config.go | 29 +++++++++++++++++++++ core/config/distributed_config_test.go | 36 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 core/config/distributed_config_test.go diff --git a/core/config/distributed_config.go b/core/config/distributed_config.go index 0b77d1ffd..f427c23b8 100644 --- a/core/config/distributed_config.go +++ b/core/config/distributed_config.go @@ -42,6 +42,9 @@ type DistributedConfig struct { MCPCIJobTimeout time.Duration // MCP CI job execution timeout (default 10m) + BackendInstallTimeout time.Duration // NATS round-trip timeout for backend.install (default 15m) + BackendUpgradeTimeout time.Duration // NATS round-trip timeout for backend.upgrade (default 15m) + MaxUploadSize int64 // Maximum upload body size in bytes (default 50 GB) AgentWorkerConcurrency int `yaml:"agent_worker_concurrency" json:"agent_worker_concurrency" env:"LOCALAI_AGENT_WORKER_CONCURRENCY"` @@ -75,6 +78,8 @@ func (c DistributedConfig) Validate() error { "health-check-interval": c.HealthCheckInterval, "stale-node-threshold": c.StaleNodeThreshold, "mcp-ci-job-timeout": c.MCPCIJobTimeout, + "backend-install-timeout": c.BackendInstallTimeout, + "backend-upgrade-timeout": c.BackendUpgradeTimeout, } { if d < 0 { return fmt.Errorf("%s must not be negative", name) @@ -137,6 +142,18 @@ func WithStorageSecretKey(key string) AppOption { } } +func WithBackendInstallTimeout(d time.Duration) AppOption { + return func(o *ApplicationConfig) { + o.Distributed.BackendInstallTimeout = d + } +} + +func WithBackendUpgradeTimeout(d time.Duration) AppOption { + return func(o *ApplicationConfig) { + o.Distributed.BackendUpgradeTimeout = d + } +} + var EnableAutoApproveNodes = func(o *ApplicationConfig) { o.Distributed.AutoApproveNodes = true } @@ -150,11 +167,23 @@ const ( DefaultHealthCheckInterval = 15 * time.Second DefaultStaleNodeThreshold = 60 * time.Second DefaultMCPCIJobTimeout = 10 * time.Minute + DefaultBackendInstallTimeout = 15 * time.Minute + DefaultBackendUpgradeTimeout = 15 * time.Minute ) // DefaultMaxUploadSize is the default maximum upload body size (50 GB). const DefaultMaxUploadSize int64 = 50 << 30 +// BackendInstallTimeoutOrDefault returns the configured timeout or the default. +func (c DistributedConfig) BackendInstallTimeoutOrDefault() time.Duration { + return cmp.Or(c.BackendInstallTimeout, DefaultBackendInstallTimeout) +} + +// BackendUpgradeTimeoutOrDefault returns the configured timeout or the default. +func (c DistributedConfig) BackendUpgradeTimeoutOrDefault() time.Duration { + return cmp.Or(c.BackendUpgradeTimeout, DefaultBackendUpgradeTimeout) +} + // MCPToolTimeoutOrDefault returns the configured timeout or the default. func (c DistributedConfig) MCPToolTimeoutOrDefault() time.Duration { return cmp.Or(c.MCPToolTimeout, DefaultMCPToolTimeout) diff --git a/core/config/distributed_config_test.go b/core/config/distributed_config_test.go new file mode 100644 index 000000000..8fae7ef72 --- /dev/null +++ b/core/config/distributed_config_test.go @@ -0,0 +1,36 @@ +package config_test + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" +) + +var _ = Describe("DistributedConfig backend NATS timeouts", func() { + Context("BackendInstallTimeoutOrDefault", func() { + It("returns 15 minutes when unset", func() { + c := config.DistributedConfig{} + Expect(c.BackendInstallTimeoutOrDefault()).To(Equal(15 * time.Minute)) + }) + + It("returns the configured value when set", func() { + c := config.DistributedConfig{BackendInstallTimeout: 42 * time.Minute} + Expect(c.BackendInstallTimeoutOrDefault()).To(Equal(42 * time.Minute)) + }) + }) + + Context("BackendUpgradeTimeoutOrDefault", func() { + It("returns 15 minutes when unset", func() { + c := config.DistributedConfig{} + Expect(c.BackendUpgradeTimeoutOrDefault()).To(Equal(15 * time.Minute)) + }) + + It("returns the configured value when set", func() { + c := config.DistributedConfig{BackendUpgradeTimeout: 30 * time.Minute} + Expect(c.BackendUpgradeTimeoutOrDefault()).To(Equal(30 * time.Minute)) + }) + }) +})