mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 19:58:44 -04:00
* feat(distributed): NATS JWT auth, TLS/mTLS options, and e2e coverage Mint per-node NATS user JWTs at registration when LOCALAI_NATS_ACCOUNT_SEED is set, and connect workers with scoped credentials from the register response. Add optional LOCALAI_NATS_TLS_CA/CERT/KEY for private CA and mTLS alongside tls:// URLs, plus test-e2e-distributed and NatsJWT container e2e specs. Document JWT setup (nats-auth-setup.sh) and TLS env vars in distributed-mode. Assisted-by: Grok:grok grok-build Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(distributed): correct NATS JWT scoping and harden client auth The JWT-auth path added in 46467cc7 had several gaps that fail silently under LOCALAI_NATS_REQUIRE_AUTH: - Agent-worker minted JWTs did not allow the subjects the agent worker actually subscribes to (jobs.mcp-ci.new and nodes.<id>.backend.stop), so MCP-CI jobs and backend-stop session cleanup were silently dropped. Scope the agent permission set to those subjects. - NATS subscription permission violations were swallowed (Subscribe returned a live-but-dead subscription). Confirm subscriptions with a server round-trip so a denial surfaces synchronously, and log async permission errors. - The backend worker connected anonymously when given a JWT without its paired seed; reject the unpaired credential instead. - The documented service-user permissions in nats-auth-setup.sh omitted prefixcache.>, which the frontend publishes and subscribes; add it. Also: add a credential-provider hook to the messaging client (consumed by the follow-up credential-lifecycle change), drop the always-nil error from NatsMessagingOptions, run go mod tidy (jwt/v2 and nkeys are now direct), and gofmt the feature's files. Tests: an agent-JWT e2e spec that connects to the enforcing NATS server and exercises every subscription the agent worker makes, plus permission allow-list coverage unit tests. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(distributed): acquire and auto-refresh worker NATS credentials Workers fetched NATS credentials once at startup, which broke two cases under JWT auth: a worker that registered while still pending admin approval never received a minted JWT (it connected unauthenticated and gave up), and a long-running worker's 24h JWT expired with no way to renew it. Introduce workerregistry.NATSCredentialManager, built on idempotent re-registration (the frontend preserves the node row and mints a fresh JWT each call): - Acquire re-registers through admin approval until the node is approved and credentials are minted (or returns the first success when auth is not required, preserving anonymous-NATS behavior). - RefreshLoop re-registers before the JWT expires (~75% of its lifetime), updating the credentials served to the connection. - Both are bounded (default 100 attempts / consecutive failures) and return an error on exhaustion, so an unapprovable or unrenewable worker exits non-zero and surfaces the problem instead of hanging or drifting toward an expired credential. The messaging client gains WithUserJWTProvider, fetching credentials on each (re)connect so the connection transparently adopts a refreshed JWT when the server expires the old one. RegisterFull exposes the approval status and full response; Register delegates to it. Both the backend worker and the agent worker are wired to this: explicit env credentials are used as-is, minted credentials are acquired-with-wait and refreshed, and a permanent refresh failure shuts the worker down so it restarts and re-acquires. Tests cover Acquire (wait-through-pending, bounded give-up, context cancel), RefreshLoop (refresh-before-expiry, bounded failure, no-expiry exit) and jwtExpiry decoding. Docs updated in distributed-mode.md. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
156 lines
4.1 KiB
Go
156 lines
4.1 KiB
Go
package distributed_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/core/services/messaging"
|
|
"github.com/mudler/LocalAI/pkg/natsauth"
|
|
"github.com/nats-io/jwt/v2"
|
|
"github.com/nats-io/nkeys"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
|
|
"github.com/testcontainers/testcontainers-go"
|
|
tcnats "github.com/testcontainers/testcontainers-go/modules/nats"
|
|
)
|
|
|
|
// JWTTestInfra holds a NATS server configured with JWT auth and minted worker credentials.
|
|
type JWTTestInfra struct {
|
|
*TestInfra
|
|
AccountSeed string
|
|
NodeID string
|
|
WorkerJWT string
|
|
WorkerSeed string
|
|
}
|
|
|
|
// SetupJWTInfra starts NATS with an in-memory JWT resolver and returns worker credentials
|
|
// minted the same way as node registration (pkg/natsauth).
|
|
func SetupJWTInfra() *JWTTestInfra {
|
|
GinkgoHelper()
|
|
|
|
infra := &JWTTestInfra{TestInfra: &TestInfra{Ctx: context.Background()}}
|
|
|
|
operatorJWT, accountJWT, accountSeed, err := jwtResolverMaterial()
|
|
Expect(err).ToNot(HaveOccurred())
|
|
infra.AccountSeed = accountSeed
|
|
|
|
conf := fmt.Sprintf(`listen: 0.0.0.0:4222
|
|
|
|
operator: %s
|
|
|
|
resolver: MEMORY
|
|
resolver_preload: {
|
|
%s: %s
|
|
}
|
|
`, operatorJWT, accountPublicKeyFromSeed(accountSeed), accountJWT)
|
|
|
|
var natsContainer *tcnats.NATSContainer
|
|
// Override default testcontainers -js: JetStream fails without a system account in JWT mode.
|
|
natsContainer, err = tcnats.Run(infra.Ctx, "nats:2-alpine",
|
|
tcnats.WithConfigFile(bytes.NewBufferString(conf)),
|
|
testcontainers.WithCmd("-c", "/etc/nats.conf"),
|
|
)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
infra.NATSContainer = natsContainer
|
|
|
|
infra.NatsURL, err = infra.NATSContainer.ConnectionString(infra.Ctx)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
infra.NodeID = "550e8400-e29b-41d4-a716-446655440000"
|
|
cfg := natsauth.Config{AccountSeed: infra.AccountSeed, WorkerJWTTTL: time.Hour}
|
|
infra.WorkerJWT, infra.WorkerSeed, err = cfg.MintWorkerJWT(infra.NodeID, "backend")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
infra.NC, err = messaging.New(infra.NatsURL, messaging.WithUserJWT(infra.WorkerJWT, infra.WorkerSeed))
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
DeferCleanup(func() {
|
|
if infra.NC != nil {
|
|
infra.NC.Close()
|
|
}
|
|
if infra.NATSContainer != nil {
|
|
_ = infra.NATSContainer.Terminate(context.Background())
|
|
}
|
|
})
|
|
|
|
return infra
|
|
}
|
|
|
|
// jwtResolverMaterial builds operator + account JWTs for a MEMORY resolver.
|
|
// Follows the NATS JWT tutorial: self-signed account, then operator re-sign, with the
|
|
// account identity key listed as a signing key so MintWorkerJWT can use the account seed.
|
|
func jwtResolverMaterial() (operatorJWT, accountJWT, accountSeed string, err error) {
|
|
okp, err := nkeys.CreateOperator()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
opk, err := okp.PublicKey()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
oc := jwt.NewOperatorClaims(opk)
|
|
oc.Name = "localai-test-operator"
|
|
oskp, err := nkeys.CreateOperator()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
ospk, err := oskp.PublicKey()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
oc.SigningKeys.Add(ospk)
|
|
operatorJWT, err = oc.Encode(okp)
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
|
|
akp, err := nkeys.CreateAccount()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
seed, err := akp.Seed()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
accountSeed = string(seed)
|
|
|
|
apk, err := akp.PublicKey()
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
ac := jwt.NewAccountClaims(apk)
|
|
ac.Name = "localai-test-account"
|
|
ac.SigningKeys.Add(apk)
|
|
accountJWT, err = ac.Encode(akp)
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
ac, err = jwt.DecodeAccountClaims(accountJWT)
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
accountJWT, err = ac.Encode(oskp)
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
return operatorJWT, accountJWT, accountSeed, nil
|
|
}
|
|
|
|
func accountPublicKeyFromSeed(accountSeed string) string {
|
|
akp, err := nkeys.FromSeed([]byte(accountSeed))
|
|
Expect(err).ToNot(HaveOccurred())
|
|
pk, err := akp.PublicKey()
|
|
Expect(err).ToNot(HaveOccurred())
|
|
return pk
|
|
}
|
|
|
|
// nodeSubjectPrefix returns the sanitized nodes.* prefix for a node ID.
|
|
func nodeSubjectPrefix(nodeID string) string {
|
|
tok := strings.NewReplacer(".", "-", "*", "-", ">", "-", " ", "-", "\t", "-", "\n", "-").Replace(nodeID)
|
|
return "nodes." + tok
|
|
} |