Files
LocalAI/docker-compose.distributed.yaml
LocalAI [bot] f3d829e2ef feat(distributed): add LOCALAI_DISTRIBUTED_SHARED_MODELS to skip staging on shared volumes (#10556) (#10566)
In distributed mode, even when the frontend and workers share the same
models directory via a shared volume mount, starting a model on a worker
re-staged (re-downloaded) it: stageModelFiles always uploads model files
into a tracking-key-namespaced subdir on the worker, and the staging probe
only checks that staged location, so a file already present on the shared
volume at the canonical path was never reused.

Add a config switch LOCALAI_DISTRIBUTED_SHARED_MODELS (default false). When
enabled, the operator asserts that all nodes mount the SAME models directory
at the SAME path, so staging is unnecessary: the frontend's absolute model
paths are already valid on the worker. In that mode stageModelFiles returns
the cloned opts unchanged without uploading, leaving the path fields pointing
at their canonical absolute paths so the worker loads them directly from the
shared volume.

The value is plumbed from DistributedConfig through SmartRouterOptions into
the SmartRouter. Docs and docker-compose.distributed.yaml updated.


Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-28 01:23:07 +02:00

226 lines
8.0 KiB
YAML

# Docker Compose for LocalAI Distributed Mode
#
# Starts a full distributed stack: PostgreSQL, NATS, a LocalAI frontend,
# and one llama-cpp backend node.
#
# Model files are transferred from the frontend to backend nodes via HTTP
# — no shared volumes needed between frontend and backends.
#
# Usage:
# docker compose -f docker-compose.distributed.yaml up
#
# See docs: https://localai.io/features/distributed-mode/
services:
# --- Infrastructure ---
postgres:
image: quay.io/mudler/localrecall:v0.5.5-postgresql # PostgreSQL with pgvector
environment:
POSTGRES_DB: localai
POSTGRES_USER: localai
POSTGRES_PASSWORD: localai
volumes:
- postgres_data:/var/lib/postgresql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U localai"]
interval: 5s
timeout: 3s
retries: 10
nats:
image: nats:2-alpine
ports:
- "4222:4222" # Client connections
- "8222:8222" # HTTP monitoring (optional, useful for debugging)
command: ["--js", "-m", "8222"] # Enable JetStream + monitoring
# --- LocalAI Frontend ---
# Stateless API server that routes requests to backend nodes.
# Add more replicas behind a load balancer for HA.
localai:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
ports:
- "8080:8080"
environment:
# Distributed mode
LOCALAI_DISTRIBUTED: "true"
LOCALAI_NATS_URL: "nats://nats:4222"
LOCALAI_AGENT_POOL_EMBEDDING_MODEL: "granite-embedding-107m-multilingual"
LOCALAI_AGENT_POOL_VECTOR_ENGINE: "postgres"
LOCALAI_AGENT_POOL_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Change this in production!
# Shared-models mode (optional): set when every node mounts the SAME
# models directory at the SAME path (see "Shared Volume Mode" below).
# The router then skips gRPC file staging and workers load models
# directly from the shared volume instead of re-downloading them.
# LOCALAI_DISTRIBUTED_SHARED_MODELS: "true"
# Auth (required for distributed mode — must use PostgreSQL)
LOCALAI_AUTH: "true"
LOCALAI_AUTH_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
# Force pure-Go DNS resolver. The default cgo resolver follows the
# container's nsswitch.conf and ends up forwarding to host
# systemd-resolved (127.0.0.53), which isn't reachable from inside
# the container — failing every postgres/nats hostname lookup at
# boot. The pure-Go path reads /etc/resolv.conf directly and uses
# Docker's embedded DNS at 127.0.0.11.
GODEBUG: "netdns=go"
# Paths
MODELS_PATH: /models
volumes:
- frontend_models:/models
- frontend_data:/data
depends_on:
postgres:
condition: service_healthy
nats:
condition: service_started
# --- Worker Node ---
# A generic worker that self-registers with the frontend.
# The same LocalAI image is used — no separate image needed.
# The SmartRouter dynamically tells workers which backend to install via NATS.
#
# Model files are transferred from the frontend via HTTP file staging.
# The worker has its own independent models volume.
worker-1:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
command:
- worker
# The image's default HEALTHCHECK targets the server's /readyz on 8080.
# Workers don't run the OpenAI API server — their HTTP file transfer
# server runs on the gRPC base port - 1 (50050 here) and exposes /readyz.
# Override the env var so the inherited HEALTHCHECK probes the right port.
environment:
HEALTHCHECK_ENDPOINT: "http://localhost:50050/readyz"
LOCALAI_SERVE_ADDR: "0.0.0.0:50051"
LOCALAI_ADVERTISE_ADDR: "worker-1:50051"
LOCALAI_ADVERTISE_HTTP_ADDR: "worker-1:50050"
DEBUG: "true"
LOCALAI_REGISTER_TO: "http://localai:8080"
LOCALAI_NODE_NAME: "worker-1"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
LOCALAI_HEARTBEAT_INTERVAL: "10s"
LOCALAI_NATS_URL: "nats://nats:4222"
GODEBUG: "netdns=go" # See note in localai service
MODELS_PATH: /models
volumes:
- worker_1_models:/models
depends_on:
localai:
condition: service_started
nats:
condition: service_started
# --- GPU Support (NVIDIA) ---
# Uncomment the following and change the image to a CUDA variant
# (e.g., localai/localai:latest-gpu-nvidia-cuda-12) to enable GPU.
#
# NVIDIA_DRIVER_CAPABILITIES must include `utility` so nvidia-smi / NVML
# are available inside the container; without it the worker cannot report
# free VRAM and the Nodes page will show 0 free / total used.
# `init: true` avoids zombie-reap races that make nvidia-smi flaky.
#
# init: true
# environment:
# NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia.com/gpu
# count: all
# capabilities: [gpu, utility]
# --- Shared Volume Mode (optional) ---
# If all services run on the same Docker host, you can skip gRPC file transfer
# by sharing a single models volume. Replace the volumes above with:
#
# localai:
# volumes:
# - shared_models:/models
# - frontend_data:/data
#
# backend-llama-cpp:
# volumes:
# - shared_models:/models
#
# Then add to the volumes section:
# shared_models:
#
# With shared volumes the model files are already present on every worker at
# the same path. Set LOCALAI_DISTRIBUTED_SHARED_MODELS=true on the frontend
# (see its environment above) so the router skips gRPC file staging and the
# worker loads the model directly from the shared path instead of
# re-downloading it into a per-model subdirectory.
# --- Adding More Workers ---
# Copy the worker-1 service above and change:
# - Service name (e.g., worker-2)
# - LOCALAI_NODE_NAME (must be unique)
# - LOCALAI_ADVERTISE_ADDR (must match service name)
#
# Workers are generic — no backend type needed. The SmartRouter
# will dynamically install the required backend via NATS when
# a model request arrives.
# --- Agent Worker ---
# Dedicated process for agent chat execution.
# Receives chat jobs from NATS, runs cogito LLM calls via the LocalAI API,
# and publishes results back via NATS for SSE delivery.
# No database access needed — config and skills are sent in the NATS payload.
agent-worker-1:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
# Install Docker CLI and start agent-worker.
# The Docker socket is mounted from the host so that MCP stdio servers
# using "docker run" commands can spawn containers on the host Docker.
entrypoint: ["/bin/sh", "-c"]
command:
- |
apt-get update -qq && apt-get install -y -qq docker.io >/dev/null 2>&1
exec /entrypoint.sh agent-worker
# The agent worker is NATS-only — no HTTP server to probe. Disable the
# image's inherited HEALTHCHECK so the container doesn't show unhealthy.
healthcheck:
disable: true
environment:
LOCALAI_NATS_URL: "nats://nats:4222"
LOCALAI_REGISTER_TO: "http://localai:8080"
LOCALAI_NODE_NAME: "agent-worker-1"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
GODEBUG: "netdns=go" # See note in localai service
volumes:
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
localai:
condition: service_started
nats:
condition: service_started
volumes:
postgres_data:
frontend_models:
frontend_data:
worker_1_models: