mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 19:58:44 -04:00
* feat(distributed): NATS JWT auth, TLS/mTLS options, and e2e coverage Mint per-node NATS user JWTs at registration when LOCALAI_NATS_ACCOUNT_SEED is set, and connect workers with scoped credentials from the register response. Add optional LOCALAI_NATS_TLS_CA/CERT/KEY for private CA and mTLS alongside tls:// URLs, plus test-e2e-distributed and NatsJWT container e2e specs. Document JWT setup (nats-auth-setup.sh) and TLS env vars in distributed-mode. Assisted-by: Grok:grok grok-build Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(distributed): correct NATS JWT scoping and harden client auth The JWT-auth path added in 46467cc7 had several gaps that fail silently under LOCALAI_NATS_REQUIRE_AUTH: - Agent-worker minted JWTs did not allow the subjects the agent worker actually subscribes to (jobs.mcp-ci.new and nodes.<id>.backend.stop), so MCP-CI jobs and backend-stop session cleanup were silently dropped. Scope the agent permission set to those subjects. - NATS subscription permission violations were swallowed (Subscribe returned a live-but-dead subscription). Confirm subscriptions with a server round-trip so a denial surfaces synchronously, and log async permission errors. - The backend worker connected anonymously when given a JWT without its paired seed; reject the unpaired credential instead. - The documented service-user permissions in nats-auth-setup.sh omitted prefixcache.>, which the frontend publishes and subscribes; add it. Also: add a credential-provider hook to the messaging client (consumed by the follow-up credential-lifecycle change), drop the always-nil error from NatsMessagingOptions, run go mod tidy (jwt/v2 and nkeys are now direct), and gofmt the feature's files. Tests: an agent-JWT e2e spec that connects to the enforcing NATS server and exercises every subscription the agent worker makes, plus permission allow-list coverage unit tests. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(distributed): acquire and auto-refresh worker NATS credentials Workers fetched NATS credentials once at startup, which broke two cases under JWT auth: a worker that registered while still pending admin approval never received a minted JWT (it connected unauthenticated and gave up), and a long-running worker's 24h JWT expired with no way to renew it. Introduce workerregistry.NATSCredentialManager, built on idempotent re-registration (the frontend preserves the node row and mints a fresh JWT each call): - Acquire re-registers through admin approval until the node is approved and credentials are minted (or returns the first success when auth is not required, preserving anonymous-NATS behavior). - RefreshLoop re-registers before the JWT expires (~75% of its lifetime), updating the credentials served to the connection. - Both are bounded (default 100 attempts / consecutive failures) and return an error on exhaustion, so an unapprovable or unrenewable worker exits non-zero and surfaces the problem instead of hanging or drifting toward an expired credential. The messaging client gains WithUserJWTProvider, fetching credentials on each (re)connect so the connection transparently adopts a refreshed JWT when the server expires the old one. RegisterFull exposes the approval status and full response; Register delegates to it. Both the backend worker and the agent worker are wired to this: explicit env credentials are used as-is, minted credentials are acquired-with-wait and refreshed, and a permanent refresh failure shuts the worker down so it restarts and re-acquires. Tests cover Acquire (wait-through-pending, bounded give-up, context cancel), RefreshLoop (refresh-before-expiry, bounded failure, no-expiry exit) and jwtExpiry decoding. Docs updated in distributed-mode.md. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
100 lines
4.2 KiB
Go
100 lines
4.2 KiB
Go
package distributed_test
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/core/services/messaging"
|
|
"github.com/mudler/LocalAI/pkg/natsauth"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("NATS JWT Auth", Label("Distributed", "NatsJWT"), func() {
|
|
var infra *JWTTestInfra
|
|
|
|
BeforeEach(func() {
|
|
infra = SetupJWTInfra()
|
|
})
|
|
|
|
It("connects with a minted backend worker JWT and publishes on allowed subjects", func() {
|
|
// Backend workers may publish under nodes.<id>.files.> (see pkg/natsauth permissions).
|
|
subject := nodeSubjectPrefix(infra.NodeID) + ".files.in"
|
|
Expect(infra.NC.Publish(subject, map[string]string{"path": "/tmp/model"})).To(Succeed())
|
|
Expect(infra.NC.Conn().FlushTimeout(2 * time.Second)).To(Succeed())
|
|
Expect(infra.NC.Conn().IsConnected()).To(BeTrue())
|
|
})
|
|
|
|
It("allows backend subscribe on the node prefix", func() {
|
|
wild := nodeSubjectPrefix(infra.NodeID) + ".>"
|
|
sub, err := infra.NC.Subscribe(wild, func(_ []byte) {})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
defer func() { _ = sub.Unsubscribe() }()
|
|
Expect(infra.NC.Conn().FlushTimeout(2 * time.Second)).To(Succeed())
|
|
Expect(infra.NC.Conn().IsConnected()).To(BeTrue())
|
|
})
|
|
|
|
It("rejects anonymous publish on the JWT-enabled server", func() {
|
|
anon, err := messaging.New(infra.NatsURL)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
defer anon.Close()
|
|
|
|
err = anon.Publish("nodes.any.files.x", map[string]string{"x": "1"})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(anon.Conn().FlushTimeout(2 * time.Second)).To(HaveOccurred())
|
|
})
|
|
|
|
It("denies backend publish to another node's subjects", func() {
|
|
other := nodeSubjectPrefix("other-node-id") + ".files.stage"
|
|
Expect(infra.NC.Publish(other, map[string]string{"stage": "nope"})).To(Succeed())
|
|
Eventually(func() error {
|
|
_ = infra.NC.Conn().FlushTimeout(500 * time.Millisecond)
|
|
return infra.NC.Conn().LastError()
|
|
}, "3s", "50ms").Should(HaveOccurred())
|
|
})
|
|
|
|
It("mints agent JWT without backend.install in claims", func() {
|
|
cfg := natsauth.Config{AccountSeed: infra.AccountSeed}
|
|
token, _, err := cfg.MintWorkerJWT("agent-node-1", "agent")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
claims, err := natsauth.DecodeUserClaims(token)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(claims.Permissions.Sub.Allow).To(ContainElement("agent.execute"))
|
|
for _, subj := range claims.Permissions.Sub.Allow {
|
|
Expect(subj).NotTo(ContainSubstring("backend.install"))
|
|
}
|
|
})
|
|
|
|
// Regression guard for the silent permission gaps: decoding the JWT claims
|
|
// (above) only proves the agent JWT is *restrictive*, not that it is
|
|
// *sufficient*. Stand a real agent connection up against the enforcing
|
|
// server and exercise every subscription core/cli/agent_worker.go actually
|
|
// makes — a denied SUB now surfaces synchronously via confirmSubscription,
|
|
// so a missing allow rule fails this test instead of silently dropping
|
|
// backend.stop / MCP-CI deliveries at runtime.
|
|
It("lets an agent-minted JWT establish all the subscriptions the agent worker uses", func() {
|
|
const nodeID = "agent-node-subs"
|
|
cfg := natsauth.Config{AccountSeed: infra.AccountSeed, WorkerJWTTTL: time.Hour}
|
|
token, seed, err := cfg.MintWorkerJWT(nodeID, "agent")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
nc, err := messaging.New(infra.NatsURL, messaging.WithUserJWT(token, seed))
|
|
Expect(err).ToNot(HaveOccurred())
|
|
DeferCleanup(nc.Close)
|
|
|
|
// Mirror core/cli/agent_worker.go exactly.
|
|
_, err = nc.QueueSubscribeReply(messaging.SubjectMCPToolExecute, messaging.QueueAgentWorkers, func([]byte, func([]byte)) {})
|
|
Expect(err).ToNot(HaveOccurred(), "agent JWT must allow %s", messaging.SubjectMCPToolExecute)
|
|
|
|
_, err = nc.QueueSubscribeReply(messaging.SubjectMCPDiscovery, messaging.QueueAgentWorkers, func([]byte, func([]byte)) {})
|
|
Expect(err).ToNot(HaveOccurred(), "agent JWT must allow %s", messaging.SubjectMCPDiscovery)
|
|
|
|
_, err = nc.QueueSubscribe(messaging.SubjectMCPCIJobsNew, messaging.QueueWorkers, func([]byte) {})
|
|
Expect(err).ToNot(HaveOccurred(), "agent JWT must allow %s (MCP CI jobs)", messaging.SubjectMCPCIJobsNew)
|
|
|
|
_, err = nc.Subscribe(messaging.SubjectNodeBackendStop(nodeID), func([]byte) {})
|
|
Expect(err).ToNot(HaveOccurred(), "agent JWT must allow %s (MCP session cleanup)", messaging.SubjectNodeBackendStop(nodeID))
|
|
})
|
|
})
|