mirror of
https://github.com/mudler/LocalAI.git
synced 2026-03-31 13:15:51 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
192 lines
6.0 KiB
YAML
192 lines
6.0 KiB
YAML
# Docker Compose for LocalAI Distributed Mode
|
|
#
|
|
# Starts a full distributed stack: PostgreSQL, NATS, a LocalAI frontend,
|
|
# and one llama-cpp backend node.
|
|
#
|
|
# Model files are transferred from the frontend to backend nodes via HTTP
|
|
# — no shared volumes needed between frontend and backends.
|
|
#
|
|
# Usage:
|
|
# docker compose -f docker-compose.distributed.yaml up
|
|
#
|
|
# See docs: https://localai.io/features/distributed-mode/
|
|
|
|
services:
|
|
# --- Infrastructure ---
|
|
|
|
postgres:
|
|
image: quay.io/mudler/localrecall:v0.5.5-postgresql # PostgreSQL with pgvector
|
|
environment:
|
|
POSTGRES_DB: localai
|
|
POSTGRES_USER: localai
|
|
POSTGRES_PASSWORD: localai
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U localai"]
|
|
interval: 5s
|
|
timeout: 3s
|
|
retries: 10
|
|
|
|
nats:
|
|
image: nats:2-alpine
|
|
ports:
|
|
- "4222:4222" # Client connections
|
|
- "8222:8222" # HTTP monitoring (optional, useful for debugging)
|
|
command: ["--js", "-m", "8222"] # Enable JetStream + monitoring
|
|
|
|
# --- LocalAI Frontend ---
|
|
# Stateless API server that routes requests to backend nodes.
|
|
# Add more replicas behind a load balancer for HA.
|
|
|
|
localai:
|
|
# image: localai/localai:latest-cpu
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
- IMAGE_TYPE=core
|
|
- BASE_IMAGE=ubuntu:24.04
|
|
ports:
|
|
- "8080:8080"
|
|
environment:
|
|
# Distributed mode
|
|
LOCALAI_DISTRIBUTED: "true"
|
|
LOCALAI_NATS_URL: "nats://nats:4222"
|
|
LOCALAI_AGENT_POOL_EMBEDDING_MODEL: "granite-embedding-107m-multilingual"
|
|
LOCALAI_AGENT_POOL_VECTOR_ENGINE: "postgres"
|
|
LOCALAI_AGENT_POOL_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
|
LOCALAI_REGISTRATION_TOKEN: "changeme" # Change this in production!
|
|
# Auth (required for distributed mode — must use PostgreSQL)
|
|
LOCALAI_AUTH: "true"
|
|
LOCALAI_AUTH_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
|
# Paths
|
|
MODELS_PATH: /models
|
|
volumes:
|
|
- frontend_models:/models
|
|
- frontend_data:/data
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
nats:
|
|
condition: service_started
|
|
|
|
# --- Worker Node ---
|
|
# A generic worker that self-registers with the frontend.
|
|
# The same LocalAI image is used — no separate image needed.
|
|
# The SmartRouter dynamically tells workers which backend to install via NATS.
|
|
#
|
|
# Model files are transferred from the frontend via HTTP file staging.
|
|
# The worker has its own independent models volume.
|
|
|
|
worker-1:
|
|
# image: localai/localai:latest-cpu
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
- IMAGE_TYPE=core
|
|
- BASE_IMAGE=ubuntu:24.04
|
|
command:
|
|
- worker
|
|
environment:
|
|
LOCALAI_SERVE_ADDR: "0.0.0.0:50051"
|
|
LOCALAI_ADVERTISE_ADDR: "worker-1:50051"
|
|
LOCALAI_ADVERTISE_HTTP_ADDR: "worker-1:50050"
|
|
DEBUG: "true"
|
|
LOCALAI_REGISTER_TO: "http://localai:8080"
|
|
LOCALAI_NODE_NAME: "worker-1"
|
|
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
|
|
LOCALAI_HEARTBEAT_INTERVAL: "10s"
|
|
LOCALAI_NATS_URL: "nats://nats:4222"
|
|
MODELS_PATH: /models
|
|
volumes:
|
|
- worker_1_models:/models
|
|
depends_on:
|
|
localai:
|
|
condition: service_started
|
|
nats:
|
|
condition: service_started
|
|
|
|
# --- GPU Support (NVIDIA) ---
|
|
# Uncomment the following and change the image to a CUDA variant
|
|
# (e.g., localai/localai:latest-gpu-nvidia-cuda-12) to enable GPU:
|
|
#
|
|
# deploy:
|
|
# resources:
|
|
# reservations:
|
|
# devices:
|
|
# - driver: nvidia.com/gpu
|
|
# count: all
|
|
# capabilities: [gpu]
|
|
|
|
# --- Shared Volume Mode (optional) ---
|
|
# If all services run on the same Docker host, you can skip gRPC file transfer
|
|
# by sharing a single models volume. Replace the volumes above with:
|
|
#
|
|
# localai:
|
|
# volumes:
|
|
# - shared_models:/models
|
|
# - frontend_data:/data
|
|
#
|
|
# backend-llama-cpp:
|
|
# volumes:
|
|
# - shared_models:/models
|
|
#
|
|
# Then add to the volumes section:
|
|
# shared_models:
|
|
#
|
|
# With shared volumes, model files are already available on the backend —
|
|
# gRPC file staging becomes a no-op (paths match).
|
|
|
|
# --- Adding More Workers ---
|
|
# Copy the worker-1 service above and change:
|
|
# - Service name (e.g., worker-2)
|
|
# - LOCALAI_NODE_NAME (must be unique)
|
|
# - LOCALAI_ADVERTISE_ADDR (must match service name)
|
|
#
|
|
# Workers are generic — no backend type needed. The SmartRouter
|
|
# will dynamically install the required backend via NATS when
|
|
# a model request arrives.
|
|
|
|
# --- Agent Worker ---
|
|
# Dedicated process for agent chat execution.
|
|
# Receives chat jobs from NATS, runs cogito LLM calls via the LocalAI API,
|
|
# and publishes results back via NATS for SSE delivery.
|
|
# No database access needed — config and skills are sent in the NATS payload.
|
|
|
|
agent-worker-1:
|
|
# image: localai/localai:latest-cpu
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
- IMAGE_TYPE=core
|
|
- BASE_IMAGE=ubuntu:24.04
|
|
# Install Docker CLI and start agent-worker.
|
|
# The Docker socket is mounted from the host so that MCP stdio servers
|
|
# using "docker run" commands can spawn containers on the host Docker.
|
|
entrypoint: ["/bin/sh", "-c"]
|
|
command:
|
|
- |
|
|
apt-get update -qq && apt-get install -y -qq docker.io >/dev/null 2>&1
|
|
exec /entrypoint.sh agent-worker
|
|
environment:
|
|
LOCALAI_NATS_URL: "nats://nats:4222"
|
|
LOCALAI_REGISTER_TO: "http://localai:8080"
|
|
LOCALAI_NODE_NAME: "agent-worker-1"
|
|
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
depends_on:
|
|
localai:
|
|
condition: service_started
|
|
nats:
|
|
condition: service_started
|
|
|
|
volumes:
|
|
postgres_data:
|
|
frontend_models:
|
|
frontend_data:
|
|
worker_1_models:
|