From d200401e86e01ed82478fec6822f52b753fc802e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:11:15 +0100 Subject: [PATCH] feat: Add --data-path CLI flag for persistent data separation (#8888) feat: add --data-path CLI flag for persistent data separation - Add LOCALAI_DATA_PATH environment variable and --data-path CLI flag - Default data path: /data (separate from configuration directory) - Automatic migration on startup: moves agent_tasks.json, agent_jobs.json, collections/, and assets/ from old config dir to new data path - Backward compatible: preserves old behavior if LOCALAI_DATA_PATH is not set - Agent state and job directories now use DataPath with proper fallback chain - Update documentation with new flag and docker-compose example This separates mutable persistent data (collectiondb, agents, assets, skills) from configuration files, enabling better volume mounting and data persistence in containerized deployments. Signed-off-by: localai-bot Co-authored-by: localai-bot --- core/application/startup.go | 51 +++++++++++++++++++++++++ core/cli/run.go | 2 + core/config/application_config.go | 7 ++++ core/services/agent_jobs.go | 11 ++++-- core/services/agent_pool.go | 5 ++- docs/content/features/agents.md | 5 ++- docs/content/reference/cli-reference.md | 1 + 7 files changed, 77 insertions(+), 5 deletions(-) diff --git a/core/application/startup.go b/core/application/startup.go index 9683d4e13..19966ecb2 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -70,6 +70,17 @@ func New(opts ...config.AppOption) (*Application, error) { } } + // Create and migrate data directory + if options.DataPath != "" { + if err := os.MkdirAll(options.DataPath, 0750); err != nil { + return nil, fmt.Errorf("unable to create DataPath: %q", err) + } + // Migrate data from DynamicConfigsDir to DataPath if needed + if options.DynamicConfigsDir != "" && options.DataPath != options.DynamicConfigsDir { + migrateDataFiles(options.DynamicConfigsDir, options.DataPath) + } + } + if err := coreStartup.InstallModels(options.Context, application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil { xlog.Error("error installing models", "error", err) } @@ -414,3 +425,43 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon }() } } + +// migrateDataFiles moves persistent data files from the old config directory +// to the new data directory. Only moves files that exist in src but not in dst. +func migrateDataFiles(srcDir, dstDir string) { + // Files and directories to migrate + items := []string{ + "agent_tasks.json", + "agent_jobs.json", + "collections", + "assets", + } + + migrated := false + for _, item := range items { + srcPath := filepath.Join(srcDir, item) + dstPath := filepath.Join(dstDir, item) + + // Only migrate if source exists and destination does not + if _, err := os.Stat(srcPath); os.IsNotExist(err) { + continue + } + if _, err := os.Stat(dstPath); err == nil { + continue // destination already exists, skip + } + + if err := os.Rename(srcPath, dstPath); err != nil { + xlog.Warn("Failed to migrate data file, will copy instead", "src", srcPath, "dst", dstPath, "error", err) + // os.Rename fails across filesystems, fall back to leaving in place + // and log a warning for the user to manually move + xlog.Warn("Data file remains in old location, please move manually", "src", srcPath, "dst", dstPath) + continue + } + migrated = true + xlog.Info("Migrated data file to new data path", "src", srcPath, "dst", dstPath) + } + + if migrated { + xlog.Info("Data migration complete", "from", srcDir, "to", dstDir) + } +} diff --git a/core/cli/run.go b/core/cli/run.go index 8544d2211..57aa53207 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -27,6 +27,7 @@ type RunCMD struct { ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"` UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` + DataPath string `env:"LOCALAI_DATA_PATH" type:"path" default:"${basepath}/data" help:"Path for persistent data (collectiondb, agent state, tasks, jobs). Separates mutable data from configuration" group:"storage"` LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"` // The alias on this option is there to preserve functionality with the old `--config-file` parameter @@ -146,6 +147,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithDebug(ctx.Debug || (ctx.LogLevel != nil && *ctx.LogLevel == "debug")), config.WithGeneratedContentDir(r.GeneratedContentPath), config.WithUploadDir(r.UploadPath), + config.WithDataPath(r.DataPath), config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithF16(r.F16), diff --git a/core/config/application_config.go b/core/config/application_config.go index 63fc3de2b..6f1a86622 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -24,6 +24,7 @@ type ApplicationConfig struct { GeneratedContentDir string UploadDir string + DataPath string // Persistent data directory for collectiondb, agents, etc. DynamicConfigsDir string DynamicConfigsDirPollInterval time.Duration @@ -492,6 +493,12 @@ func WithUploadDir(uploadDir string) AppOption { } } +func WithDataPath(dataPath string) AppOption { + return func(o *ApplicationConfig) { + o.DataPath = dataPath + } +} + func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { return func(o *ApplicationConfig) { o.DynamicConfigsDir = dynamicConfigsDir diff --git a/core/services/agent_jobs.go b/core/services/agent_jobs.go index ab7271de7..8cf9777e8 100644 --- a/core/services/agent_jobs.go +++ b/core/services/agent_jobs.go @@ -93,11 +93,16 @@ func NewAgentJobService( retentionDays = 30 // Default } + // Determine storage directory: DataPath > DynamicConfigsDir tasksFile := "" jobsFile := "" - if appConfig.DynamicConfigsDir != "" { - tasksFile = filepath.Join(appConfig.DynamicConfigsDir, "agent_tasks.json") - jobsFile = filepath.Join(appConfig.DynamicConfigsDir, "agent_jobs.json") + dataDir := appConfig.DataPath + if dataDir == "" { + dataDir = appConfig.DynamicConfigsDir + } + if dataDir != "" { + tasksFile = filepath.Join(dataDir, "agent_tasks.json") + jobsFile = filepath.Join(dataDir, "agent_jobs.json") } return &AgentJobService{ diff --git a/core/services/agent_pool.go b/core/services/agent_pool.go index 106a637b9..324edf316 100644 --- a/core/services/agent_pool.go +++ b/core/services/agent_pool.go @@ -64,8 +64,11 @@ func (s *AgentPoolService) Start(ctx context.Context) error { apiKey = s.appConfig.ApiKeys[0] } - // State dir defaults to DynamicConfigsDir (LocalAI configuration folder) + // State dir: explicit config > DataPath > DynamicConfigsDir > fallback stateDir := cfg.StateDir + if stateDir == "" { + stateDir = s.appConfig.DataPath + } if stateDir == "" { stateDir = s.appConfig.DynamicConfigsDir } diff --git a/docs/content/features/agents.md b/docs/content/features/agents.md index ab83cc876..e8c4a86c9 100644 --- a/docs/content/features/agents.md +++ b/docs/content/features/agents.md @@ -60,7 +60,7 @@ All agent-related settings can be configured via environment variables: | `LOCALAI_AGENT_POOL_TRANSCRIPTION_MODEL` | _(empty)_ | Default transcription (speech-to-text) model for agents | | `LOCALAI_AGENT_POOL_TRANSCRIPTION_LANGUAGE` | _(empty)_ | Default transcription language for agents | | `LOCALAI_AGENT_POOL_TTS_MODEL` | _(empty)_ | Default TTS (text-to-speech) model for agents | -| `LOCALAI_AGENT_POOL_STATE_DIR` | _(config dir)_ | Directory for persisting agent state | +| `LOCALAI_AGENT_POOL_STATE_DIR` | _(data path)_ | Directory for persisting agent state. Defaults to `LOCALAI_DATA_PATH` if set, otherwise falls back to `LOCALAI_CONFIG_DIR` | | `LOCALAI_AGENT_POOL_TIMEOUT` | `5m` | Default timeout for agent operations | | `LOCALAI_AGENT_POOL_ENABLE_SKILLS` | `false` | Enable the skills service | | `LOCALAI_AGENT_POOL_VECTOR_ENGINE` | `chromem` | Vector engine for knowledge base (`chromem` or `postgres`) | @@ -96,15 +96,18 @@ services: - 8080:8080 environment: - MODELS_PATH=/models + - LOCALAI_DATA_PATH=/data - LOCALAI_AGENT_POOL_DEFAULT_MODEL=hermes-3-llama3.1-8b - LOCALAI_AGENT_POOL_EMBEDDING_MODEL=granite-embedding-107m-multilingual - LOCALAI_AGENT_POOL_ENABLE_SKILLS=true - LOCALAI_AGENT_POOL_ENABLE_LOGS=true volumes: - models:/models + - localai_data:/data - localai_config:/etc/localai volumes: models: + localai_data: localai_config: ``` diff --git a/docs/content/reference/cli-reference.md b/docs/content/reference/cli-reference.md index 2b24020a8..575521ffa 100644 --- a/docs/content/reference/cli-reference.md +++ b/docs/content/reference/cli-reference.md @@ -22,6 +22,7 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e | Parameter | Default | Description | Environment Variable | |-----------|---------|-------------|----------------------| | `--models-path` | `BASEPATH/models` | Path containing models used for inferencing | `$LOCALAI_MODELS_PATH`, `$MODELS_PATH` | +| `--data-path` | `BASEPATH/data` | Path for persistent data (collectiondb, agent state, tasks, jobs). Separates mutable data from configuration | `$LOCALAI_DATA_PATH` | | `--generated-content-path` | `/tmp/generated/content` | Location for assets generated by backends (e.g. stablediffusion, images, audio, videos) | `$LOCALAI_GENERATED_CONTENT_PATH`, `$GENERATED_CONTENT_PATH` | | `--upload-path` | `/tmp/localai/upload` | Path to store uploads from files API | `$LOCALAI_UPLOAD_PATH`, `$UPLOAD_PATH` | | `--localai-config-dir` | `BASEPATH/configuration` | Directory for dynamic loading of certain configuration files (currently runtime_settings.json, api_keys.json, and external_backends.json). See [Runtime Settings]({{%relref "features/runtime-settings" %}}) for web-based configuration. | `$LOCALAI_CONFIG_DIR` |