feat: Add --data-path CLI flag for persistent data separation (#8888)

feat: add --data-path CLI flag for persistent data separation

- Add LOCALAI_DATA_PATH environment variable and --data-path CLI flag
- Default data path: /data (separate from configuration directory)
- Automatic migration on startup: moves agent_tasks.json, agent_jobs.json, collections/, and assets/ from old config dir to new data path
- Backward compatible: preserves old behavior if LOCALAI_DATA_PATH is not set
- Agent state and job directories now use DataPath with proper fallback chain
- Update documentation with new flag and docker-compose example

This separates mutable persistent data (collectiondb, agents, assets, skills) from configuration files, enabling better volume mounting and data persistence in containerized deployments.

Signed-off-by: localai-bot <localai-bot@noreply.github.com>
Co-authored-by: localai-bot <localai-bot@noreply.github.com>
This commit is contained in:
LocalAI [bot]
2026-03-09 14:11:15 +01:00
committed by GitHub
parent 95aef32492
commit d200401e86
7 changed files with 77 additions and 5 deletions

View File

@@ -70,6 +70,17 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
// Create and migrate data directory
if options.DataPath != "" {
if err := os.MkdirAll(options.DataPath, 0750); err != nil {
return nil, fmt.Errorf("unable to create DataPath: %q", err)
}
// Migrate data from DynamicConfigsDir to DataPath if needed
if options.DynamicConfigsDir != "" && options.DataPath != options.DynamicConfigsDir {
migrateDataFiles(options.DynamicConfigsDir, options.DataPath)
}
}
if err := coreStartup.InstallModels(options.Context, application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
xlog.Error("error installing models", "error", err)
}
@@ -414,3 +425,43 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
}()
}
}
// migrateDataFiles moves persistent data files from the old config directory
// to the new data directory. Only moves files that exist in src but not in dst.
func migrateDataFiles(srcDir, dstDir string) {
// Files and directories to migrate
items := []string{
"agent_tasks.json",
"agent_jobs.json",
"collections",
"assets",
}
migrated := false
for _, item := range items {
srcPath := filepath.Join(srcDir, item)
dstPath := filepath.Join(dstDir, item)
// Only migrate if source exists and destination does not
if _, err := os.Stat(srcPath); os.IsNotExist(err) {
continue
}
if _, err := os.Stat(dstPath); err == nil {
continue // destination already exists, skip
}
if err := os.Rename(srcPath, dstPath); err != nil {
xlog.Warn("Failed to migrate data file, will copy instead", "src", srcPath, "dst", dstPath, "error", err)
// os.Rename fails across filesystems, fall back to leaving in place
// and log a warning for the user to manually move
xlog.Warn("Data file remains in old location, please move manually", "src", srcPath, "dst", dstPath)
continue
}
migrated = true
xlog.Info("Migrated data file to new data path", "src", srcPath, "dst", dstPath)
}
if migrated {
xlog.Info("Data migration complete", "from", srcDir, "to", dstDir)
}
}

View File

@@ -27,6 +27,7 @@ type RunCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
DataPath string `env:"LOCALAI_DATA_PATH" type:"path" default:"${basepath}/data" help:"Path for persistent data (collectiondb, agent state, tasks, jobs). Separates mutable data from configuration" group:"storage"`
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
@@ -146,6 +147,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithDebug(ctx.Debug || (ctx.LogLevel != nil && *ctx.LogLevel == "debug")),
config.WithGeneratedContentDir(r.GeneratedContentPath),
config.WithUploadDir(r.UploadPath),
config.WithDataPath(r.DataPath),
config.WithDynamicConfigDir(r.LocalaiConfigDir),
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
config.WithF16(r.F16),

View File

@@ -24,6 +24,7 @@ type ApplicationConfig struct {
GeneratedContentDir string
UploadDir string
DataPath string // Persistent data directory for collectiondb, agents, etc.
DynamicConfigsDir string
DynamicConfigsDirPollInterval time.Duration
@@ -492,6 +493,12 @@ func WithUploadDir(uploadDir string) AppOption {
}
}
func WithDataPath(dataPath string) AppOption {
return func(o *ApplicationConfig) {
o.DataPath = dataPath
}
}
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
return func(o *ApplicationConfig) {
o.DynamicConfigsDir = dynamicConfigsDir

View File

@@ -93,11 +93,16 @@ func NewAgentJobService(
retentionDays = 30 // Default
}
// Determine storage directory: DataPath > DynamicConfigsDir
tasksFile := ""
jobsFile := ""
if appConfig.DynamicConfigsDir != "" {
tasksFile = filepath.Join(appConfig.DynamicConfigsDir, "agent_tasks.json")
jobsFile = filepath.Join(appConfig.DynamicConfigsDir, "agent_jobs.json")
dataDir := appConfig.DataPath
if dataDir == "" {
dataDir = appConfig.DynamicConfigsDir
}
if dataDir != "" {
tasksFile = filepath.Join(dataDir, "agent_tasks.json")
jobsFile = filepath.Join(dataDir, "agent_jobs.json")
}
return &AgentJobService{

View File

@@ -64,8 +64,11 @@ func (s *AgentPoolService) Start(ctx context.Context) error {
apiKey = s.appConfig.ApiKeys[0]
}
// State dir defaults to DynamicConfigsDir (LocalAI configuration folder)
// State dir: explicit config > DataPath > DynamicConfigsDir > fallback
stateDir := cfg.StateDir
if stateDir == "" {
stateDir = s.appConfig.DataPath
}
if stateDir == "" {
stateDir = s.appConfig.DynamicConfigsDir
}

View File

@@ -60,7 +60,7 @@ All agent-related settings can be configured via environment variables:
| `LOCALAI_AGENT_POOL_TRANSCRIPTION_MODEL` | _(empty)_ | Default transcription (speech-to-text) model for agents |
| `LOCALAI_AGENT_POOL_TRANSCRIPTION_LANGUAGE` | _(empty)_ | Default transcription language for agents |
| `LOCALAI_AGENT_POOL_TTS_MODEL` | _(empty)_ | Default TTS (text-to-speech) model for agents |
| `LOCALAI_AGENT_POOL_STATE_DIR` | _(config dir)_ | Directory for persisting agent state |
| `LOCALAI_AGENT_POOL_STATE_DIR` | _(data path)_ | Directory for persisting agent state. Defaults to `LOCALAI_DATA_PATH` if set, otherwise falls back to `LOCALAI_CONFIG_DIR` |
| `LOCALAI_AGENT_POOL_TIMEOUT` | `5m` | Default timeout for agent operations |
| `LOCALAI_AGENT_POOL_ENABLE_SKILLS` | `false` | Enable the skills service |
| `LOCALAI_AGENT_POOL_VECTOR_ENGINE` | `chromem` | Vector engine for knowledge base (`chromem` or `postgres`) |
@@ -96,15 +96,18 @@ services:
- 8080:8080
environment:
- MODELS_PATH=/models
- LOCALAI_DATA_PATH=/data
- LOCALAI_AGENT_POOL_DEFAULT_MODEL=hermes-3-llama3.1-8b
- LOCALAI_AGENT_POOL_EMBEDDING_MODEL=granite-embedding-107m-multilingual
- LOCALAI_AGENT_POOL_ENABLE_SKILLS=true
- LOCALAI_AGENT_POOL_ENABLE_LOGS=true
volumes:
- models:/models
- localai_data:/data
- localai_config:/etc/localai
volumes:
models:
localai_data:
localai_config:
```

View File

@@ -22,6 +22,7 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e
| Parameter | Default | Description | Environment Variable |
|-----------|---------|-------------|----------------------|
| `--models-path` | `BASEPATH/models` | Path containing models used for inferencing | `$LOCALAI_MODELS_PATH`, `$MODELS_PATH` |
| `--data-path` | `BASEPATH/data` | Path for persistent data (collectiondb, agent state, tasks, jobs). Separates mutable data from configuration | `$LOCALAI_DATA_PATH` |
| `--generated-content-path` | `/tmp/generated/content` | Location for assets generated by backends (e.g. stablediffusion, images, audio, videos) | `$LOCALAI_GENERATED_CONTENT_PATH`, `$GENERATED_CONTENT_PATH` |
| `--upload-path` | `/tmp/localai/upload` | Path to store uploads from files API | `$LOCALAI_UPLOAD_PATH`, `$UPLOAD_PATH` |
| `--localai-config-dir` | `BASEPATH/configuration` | Directory for dynamic loading of certain configuration files (currently runtime_settings.json, api_keys.json, and external_backends.json). See [Runtime Settings]({{%relref "features/runtime-settings" %}}) for web-based configuration. | `$LOCALAI_CONFIG_DIR` |