## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
# LOCALAI_THREADS=14

## Specify a different bind address (defaults to ":8080")
# LOCALAI_ADDRESS=127.0.0.1:8080

## Default models context size
# LOCALAI_CONTEXT_SIZE=512
#
## Define galleries.
## models will to install will be visible in `/models/available`
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]

## CORS settings
# LOCALAI_CORS=true
# LOCALAI_CORS_ALLOW_ORIGINS=*

## Default path for models
#
# LOCALAI_MODELS_PATH=/models

## Enable debug mode
# LOCALAI_LOG_LEVEL=debug

## Disables COMPEL (Diffusers)
# COMPEL=0

## Disables SD_EMBED (Diffusers)
# SD_EMBED=0

## Enable/Disable single backend (useful if only one GPU is available)
# LOCALAI_SINGLE_ACTIVE_BACKEND=true

# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true

## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images

## Specify a default upload limit in MB (whisper)
# LOCALAI_UPLOAD_LIMIT=15

## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py

### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
##
### Preload libraries
# LD_PRELOAD=

### Huggingface cache for models
# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface

### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1

### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1

### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""

### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true

# Enable to allow p2p mode
# LOCALAI_P2P=true

# Enable to use federated mode
# LOCALAI_FEDERATED=true

# Enable to start federation server
# FEDERATED_SERVER=true

# Define to use federation token
# TOKEN=""

### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
# LOCALAI_WATCHDOG_IDLE=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
#
# Enables watchdog to kill backends that are busy for too much time
# LOCALAI_WATCHDOG_BUSY=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
