diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go index ca10f604c..84fc9afda 100644 --- a/core/config/meta/registry.go +++ b/core/config/meta/registry.go @@ -286,6 +286,15 @@ func DefaultRegistry() map[string]FieldMetaOverride { Order: 45, }, + // --- Alias --- + "alias": { + Section: "alias", + Label: "Alias target", + Description: "Redirect all traffic for this model to another configured model. When set, every other field on this config is ignored and requests are served by the target model.", + Component: "model-select", + Order: 0, + }, + // --- Pipeline --- "pipeline.llm": { Section: "pipeline", diff --git a/core/config/meta/registry_test.go b/core/config/meta/registry_test.go new file mode 100644 index 000000000..e9d998609 --- /dev/null +++ b/core/config/meta/registry_test.go @@ -0,0 +1,28 @@ +package meta_test + +import ( + "github.com/mudler/LocalAI/core/config/meta" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("alias field metadata", func() { + It("registers the alias field as a model-select in the alias section", func() { + reg := meta.DefaultRegistry() + f, ok := reg["alias"] + Expect(ok).To(BeTrue(), "alias field should have a registry override") + Expect(f.Section).To(Equal("alias")) + Expect(f.Component).To(Equal("model-select")) + }) + + It("defines an alias section", func() { + var found bool + for _, s := range meta.DefaultSections() { + if s.ID == "alias" { + found = true + } + } + Expect(found).To(BeTrue(), "DefaultSections should include an alias section") + }) +}) diff --git a/core/config/meta/types.go b/core/config/meta/types.go index a86b8bb69..a29e66967 100644 --- a/core/config/meta/types.go +++ b/core/config/meta/types.go @@ -69,6 +69,7 @@ type FieldMetaOverride struct { func DefaultSections() []Section { return []Section{ {ID: "general", Label: "General", Icon: "settings", Order: 0}, + {ID: "alias", Label: "Alias", Icon: "git-merge", Order: 5}, {ID: "llm", Label: "LLM", Icon: "cpu", Order: 10}, {ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20}, {ID: "templates", Label: "Templates", Icon: "file-text", Order: 30}, diff --git a/core/config/model_config.go b/core/config/model_config.go index 75136ec6c..50836b99e 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -37,6 +37,12 @@ type ModelConfig struct { schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"` Name string `yaml:"name,omitempty" json:"name,omitempty"` + // Alias, when set, makes this config a pure redirect: every request for + // Name is served by the model named here. All other fields are ignored. + // The target must be an existing, non-alias model (enforced at load and + // at create/swap time). See docs/content for Model Aliases. + Alias string `yaml:"alias,omitempty" json:"alias,omitempty"` + F16 *bool `yaml:"f16,omitempty" json:"f16,omitempty"` Threads *int `yaml:"threads,omitempty" json:"threads,omitempty"` Debug *bool `yaml:"debug,omitempty" json:"debug,omitempty"` @@ -391,6 +397,10 @@ func (c *ModelConfig) HasRouter() bool { return len(c.Router.Candidates) > 0 } +// IsAlias reports whether this config is a pure redirect to another model. +// Value receiver so it is callable on non-addressable config values too. +func (c ModelConfig) IsAlias() bool { return c.Alias != "" } + // @Description PII filtering configuration. PII redaction is per-model so // that local models don't pay the latency or behaviour change of regex // scanning, while cloud-bound traffic (cloud-proxy backend) can default to @@ -1248,6 +1258,22 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) { } func (c *ModelConfig) Validate() (bool, error) { + // An alias is a pure redirect: validate only its own shape here. Target + // existence and the no-chain rule need the full config set, so the loader + // (load-time) and the create/swap endpoints enforce those. + if c.IsAlias() { + if c.Name == "" { + return false, fmt.Errorf("alias config requires a name") + } + if c.Alias == c.Name { + return false, fmt.Errorf("alias %q cannot point to itself", c.Name) + } + if c.Backend != "" || c.Model != "" { + return false, fmt.Errorf("alias config %q must not set backend or parameters.model: an alias is a pure redirect", c.Name) + } + return true, nil + } + downloadedFileNames := []string{} for _, f := range c.DownloadFiles { downloadedFileNames = append(downloadedFileNames, f.Filename) diff --git a/core/config/model_config_loader.go b/core/config/model_config_loader.go index 89f4bc5cb..e2f43e83f 100644 --- a/core/config/model_config_loader.go +++ b/core/config/model_config_loader.go @@ -294,6 +294,44 @@ func (bcl *ModelConfigLoader) UpdateModelConfig(m string, updater func(*ModelCon } } +// ResolveAlias follows a one-hop alias to its target config. Returns +// (resolved, wasAlias, err). Non-alias configs return (cfg, false, nil) +// unchanged. Strict: the target must exist and must not itself be an alias +// (chains are rejected). The returned config is a copy of the target. +func (bcl *ModelConfigLoader) ResolveAlias(cfg *ModelConfig) (*ModelConfig, bool, error) { + if cfg == nil || !cfg.IsAlias() { + return cfg, false, nil + } + target, exists := bcl.GetModelConfig(cfg.Alias) + if !exists { + return nil, true, fmt.Errorf("alias %q points to unknown model %q", cfg.Name, cfg.Alias) + } + if target.IsAlias() { + return nil, true, fmt.Errorf("alias %q points to another alias %q (chains are not allowed)", cfg.Name, cfg.Alias) + } + return &target, true, nil +} + +// ValidateAliasTarget checks an alias config's target at create/swap time: +// the target must exist, must not be an alias, and must not be disabled. +// Returns nil for non-alias configs. +func (bcl *ModelConfigLoader) ValidateAliasTarget(cfg *ModelConfig) error { + if cfg == nil || !cfg.IsAlias() { + return nil + } + target, exists := bcl.GetModelConfig(cfg.Alias) + if !exists { + return fmt.Errorf("alias target %q does not exist", cfg.Alias) + } + if target.IsAlias() { + return fmt.Errorf("alias target %q is itself an alias (chains are not allowed)", cfg.Alias) + } + if target.IsDisabled() { + return fmt.Errorf("alias target %q is disabled", cfg.Alias) + } + return nil +} + // Preload prepare models if they are not local but url or huggingface repositories func (bcl *ModelConfigLoader) Preload(modelPath string) error { bcl.Lock() @@ -475,5 +513,21 @@ func (bcl *ModelConfigLoader) LoadModelConfigsFromPath(path string, opts ...Conf } } + // Surface aliases whose targets are missing or themselves aliases. These + // resolve to a clear request-time error; warning here gives operators + // visibility without failing startup. + for name, c := range bcl.configs { + if !c.IsAlias() { + continue + } + target, ok := bcl.configs[c.Alias] + switch { + case !ok: + xlog.Warn("alias points to unknown model", "alias", name, "target", c.Alias) + case target.IsAlias(): + xlog.Warn("alias points to another alias (chains are not allowed)", "alias", name, "target", c.Alias) + } + } + return nil } diff --git a/core/config/model_config_loader_test.go b/core/config/model_config_loader_test.go index 924a4d1e4..06ab65a20 100644 --- a/core/config/model_config_loader_test.go +++ b/core/config/model_config_loader_test.go @@ -61,3 +61,51 @@ var _ = Describe("ModelConfigLoader.GetModelsConflictingWith", func() { Expect(bcl.GetModelsConflictingWith("a")).To(ConsistOf("b")) }) }) + +var _ = Describe("ModelConfigLoader alias resolution", func() { + var loader *ModelConfigLoader + + BeforeEach(func() { + loader = NewModelConfigLoader("") + loader.configs["real"] = ModelConfig{Name: "real", Backend: "llama-cpp"} + loader.configs["gpt-4"] = ModelConfig{Name: "gpt-4", Alias: "real"} + loader.configs["chain"] = ModelConfig{Name: "chain", Alias: "gpt-4"} + loader.configs["dangling"] = ModelConfig{Name: "dangling", Alias: "nope"} + }) + + It("returns non-alias configs unchanged", func() { + cfg := loader.configs["real"] + got, was, err := loader.ResolveAlias(&cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(was).To(BeFalse()) + Expect(got.Name).To(Equal("real")) + }) + + It("resolves an alias to its target", func() { + cfg := loader.configs["gpt-4"] + got, was, err := loader.ResolveAlias(&cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(was).To(BeTrue()) + Expect(got.Name).To(Equal("real")) + }) + + It("rejects an alias chain", func() { + cfg := loader.configs["chain"] + _, was, err := loader.ResolveAlias(&cfg) + Expect(was).To(BeTrue()) + Expect(err).To(MatchError(ContainSubstring("chains are not allowed"))) + }) + + It("rejects a dangling alias", func() { + cfg := loader.configs["dangling"] + _, _, err := loader.ResolveAlias(&cfg) + Expect(err).To(MatchError(ContainSubstring("unknown model"))) + }) + + It("ValidateAliasTarget passes for a real target and fails for a chain", func() { + good := loader.configs["gpt-4"] + Expect(loader.ValidateAliasTarget(&good)).ToNot(HaveOccurred()) + bad := loader.configs["chain"] + Expect(loader.ValidateAliasTarget(&bad)).To(MatchError(ContainSubstring("itself an alias"))) + }) +}) diff --git a/core/config/model_config_test.go b/core/config/model_config_test.go index 7f256354d..2f2f3fd82 100644 --- a/core/config/model_config_test.go +++ b/core/config/model_config_test.go @@ -787,3 +787,32 @@ var _ = Describe("pattern detector config", func() { Expect(err).To(MatchError(ContainSubstring("pattern \"EMAILish\""))) }) }) + +var _ = Describe("ModelConfig alias", func() { + It("reports IsAlias when alias is set", func() { + c := ModelConfig{Name: "gpt-4", Alias: "my-llama-3"} + Expect(c.IsAlias()).To(BeTrue()) + Expect(ModelConfig{Name: "real"}.IsAlias()).To(BeFalse()) + }) + + It("validates a minimal alias config", func() { + c := ModelConfig{Name: "gpt-4", Alias: "my-llama-3"} + ok, err := c.Validate() + Expect(err).ToNot(HaveOccurred()) + Expect(ok).To(BeTrue()) + }) + + It("rejects an alias pointing to itself", func() { + c := ModelConfig{Name: "loop", Alias: "loop"} + ok, err := c.Validate() + Expect(ok).To(BeFalse()) + Expect(err).To(MatchError(ContainSubstring("itself"))) + }) + + It("rejects an alias that also sets a backend", func() { + c := ModelConfig{Name: "gpt-4", Alias: "my-llama-3", Backend: "llama-cpp"} + ok, err := c.Validate() + Expect(ok).To(BeFalse()) + Expect(err).To(MatchError(ContainSubstring("pure redirect"))) + }) +}) diff --git a/core/http/endpoints/localai/aliases.go b/core/http/endpoints/localai/aliases.go new file mode 100644 index 000000000..923e22c63 --- /dev/null +++ b/core/http/endpoints/localai/aliases.go @@ -0,0 +1,33 @@ +package localai + +import ( + "net/http" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" +) + +// AliasInfo is one alias -> target pair. +type AliasInfo struct { + Name string `json:"name"` + Target string `json:"target"` +} + +// ListAliasesEndpoint returns every configured model alias and its target. +// +// @Summary List model aliases +// @Tags models +// @Success 200 {array} AliasInfo +// @Router /api/aliases [get] +func ListAliasesEndpoint(cl *config.ModelConfigLoader) echo.HandlerFunc { + return func(c echo.Context) error { + // Non-nil so an empty result marshals as [] rather than null. + out := []AliasInfo{} + for _, cfg := range cl.GetAllModelsConfigs() { + if cfg.IsAlias() { + out = append(out, AliasInfo{Name: cfg.Name, Target: cfg.Alias}) + } + } + return c.JSON(http.StatusOK, out) + } +} diff --git a/core/http/endpoints/localai/aliases_test.go b/core/http/endpoints/localai/aliases_test.go new file mode 100644 index 000000000..e1c44898a --- /dev/null +++ b/core/http/endpoints/localai/aliases_test.go @@ -0,0 +1,57 @@ +package localai_test + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + . "github.com/mudler/LocalAI/core/http/endpoints/localai" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ListAliasesEndpoint", func() { + var tempDir string + + BeforeEach(func() { + var err error + tempDir, err = os.MkdirTemp("", "localai-aliases-test") + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + _ = os.RemoveAll(tempDir) + }) + + It("returns only alias configs as name/target pairs", func() { + // Seed one real model and one alias pointing at it. + Expect(os.WriteFile( + filepath.Join(tempDir, "real.yaml"), + []byte("name: real\nbackend: llama-cpp\nmodel: foo\n"), + 0644, + )).To(Succeed()) + Expect(os.WriteFile( + filepath.Join(tempDir, "gpt-4.yaml"), + []byte("name: gpt-4\nalias: real\n"), + 0644, + )).To(Succeed()) + + loader := config.NewModelConfigLoader(tempDir) + Expect(loader.LoadModelConfigsFromPath(tempDir)).To(Succeed()) + + app := echo.New() + app.GET("/api/aliases", ListAliasesEndpoint(loader)) + + req := httptest.NewRequest("GET", "/api/aliases", nil) + rec := httptest.NewRecorder() + app.ServeHTTP(rec, req) + + Expect(rec.Code).To(Equal(http.StatusOK)) + Expect(rec.Body.String()).To(ContainSubstring(`"name":"gpt-4"`)) + Expect(rec.Body.String()).To(ContainSubstring(`"target":"real"`)) + // The real model must not appear as an alias entry. + Expect(rec.Body.String()).ToNot(ContainSubstring(`"name":"real"`)) + }) +}) diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index dc225abdd..54a80a9cc 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -181,6 +181,12 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica return c.JSON(http.StatusBadRequest, ModelResponse{Success: false, Error: msg}) } + // Reject aliases whose target is missing, chained, or disabled so a + // dangling alias can't be persisted and surface as a runtime error later. + if err := cl.ValidateAliasTarget(&modelConfig); err != nil { + return c.JSON(http.StatusBadRequest, ModelResponse{Success: false, Error: err.Error()}) + } + // Create the configuration file configPath := filepath.Join(appConfig.SystemState.Model.ModelsPath, modelConfig.Name+".yaml") if err := utils.VerifyPath(modelConfig.Name+".yaml", appConfig.SystemState.Model.ModelsPath); err != nil { diff --git a/core/http/endpoints/mcp/localai_assistant_test.go b/core/http/endpoints/mcp/localai_assistant_test.go index 26cd2878f..8de7355c6 100644 --- a/core/http/endpoints/mcp/localai_assistant_test.go +++ b/core/http/endpoints/mcp/localai_assistant_test.go @@ -51,6 +51,12 @@ func (stubClient) EditModelConfig(_ context.Context, _ string, _ map[string]any) return nil } func (stubClient) ReloadModels(_ context.Context) error { return nil } +func (stubClient) SetAlias(_ context.Context, _, _ string) error { + return nil +} +func (stubClient) ListAliases(_ context.Context) ([]localaitools.AliasInfo, error) { + return nil, nil +} func (stubClient) ListBackends(_ context.Context) ([]localaitools.Backend, error) { return []localaitools.Backend{{Name: "stub-backend", Installed: true}}, nil } diff --git a/core/http/middleware/request.go b/core/http/middleware/request.go index ff0d929ac..74f7e8565 100644 --- a/core/http/middleware/request.go +++ b/core/http/middleware/request.go @@ -167,6 +167,27 @@ func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIR } } + // Resolve a model alias to its target before the disabled check and + // before storing MODEL_CONFIG, so every modality (chat, embeddings, + // tts, image, ...) inherits redirection. The response keeps echoing + // the alias name (input.ModelName is left unchanged); usage accounting + // records requested=alias / served=target. + if cfg != nil && cfg.IsAlias() { + resolved, _, aliasErr := re.modelConfigLoader.ResolveAlias(cfg) + if aliasErr != nil { + return c.JSON(http.StatusBadRequest, schema.ErrorResponse{ + Error: &schema.APIError{ + Message: aliasErr.Error(), + Code: http.StatusBadRequest, + Type: "invalid_request_error", + }, + }) + } + c.Set(ContextKeyRequestedModel, modelName) + c.Set(ContextKeyServedModel, resolved.Name) + cfg = resolved + } + // Check if the model is disabled if cfg != nil && cfg.IsDisabled() { return c.JSON(http.StatusForbidden, schema.ErrorResponse{ diff --git a/core/http/middleware/request_test.go b/core/http/middleware/request_test.go index fe9fc926c..010379714 100644 --- a/core/http/middleware/request_test.go +++ b/core/http/middleware/request_test.go @@ -151,6 +151,107 @@ var _ = Describe("SetModelAndConfig middleware", func() { }) }) +// --------------------------------------------------------------------------- +// SetModelAndConfig - model alias resolution +// --------------------------------------------------------------------------- +// +// An alias config (`alias: `) is a pure redirect: the middleware must +// swap MODEL_CONFIG to the target config before the disabled check and before +// storing it, while leaving the response-facing model name as the alias. It +// also stamps routing.requested_model = alias and routing.served_model = +// target so usage accounting records both identities. +var _ = Describe("SetModelAndConfig alias resolution", func() { + var ( + modelDir string + capturedConfig *config.ModelConfig + capturedReq any + capturedServed any + app *echo.Echo + ) + + BeforeEach(func() { + var err error + modelDir, err = os.MkdirTemp("", "localai-alias-*") + Expect(err).ToNot(HaveOccurred()) + }) + + AfterEach(func() { + _ = os.RemoveAll(modelDir) + }) + + // buildApp seeds the loader from every YAML in modelDir (so an alias's + // target is present in the loader map) and wires a handler that captures + // the resolved config plus the stamped identity keys. + buildApp := func() *echo.Echo { + ss := &system.SystemState{Model: system.Model{ModelsPath: modelDir}} + appConfig := config.NewApplicationConfig() + appConfig.SystemState = ss + + mcl := config.NewModelConfigLoader(modelDir) + Expect(mcl.LoadModelConfigsFromPath(modelDir)).To(Succeed()) + ml := model.NewModelLoader(ss) + re := NewRequestExtractor(mcl, ml, appConfig) + + capturedConfig = nil + capturedReq = nil + capturedServed = nil + e := echo.New() + e.POST("/v1/chat/completions", + func(c echo.Context) error { + if cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig); ok { + capturedConfig = cfg + } + capturedReq = c.Get(ContextKeyRequestedModel) + capturedServed = c.Get(ContextKeyServedModel) + return c.String(http.StatusOK, "ok") + }, + re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), + ) + return e + } + + It("serves the target config but keeps the alias name and stamps identity", func() { + Expect(os.WriteFile(filepath.Join(modelDir, "real.yaml"), + []byte("name: real\nbackend: llama-cpp\n"), 0644)).To(Succeed()) + Expect(os.WriteFile(filepath.Join(modelDir, "gpt-4.yaml"), + []byte("name: gpt-4\nalias: real\n"), 0644)).To(Succeed()) + app = buildApp() + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", + strings.NewReader(`{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + app.ServeHTTP(rec, req) + + Expect(rec.Code).To(Equal(http.StatusOK)) + Expect(capturedConfig).ToNot(BeNil()) + // MODEL_CONFIG must be the target, not the alias stub. + Expect(capturedConfig.Name).To(Equal("real")) + Expect(capturedConfig.IsAlias()).To(BeFalse()) + // Identity stamps: requested = alias, served = target. + Expect(capturedReq).To(Equal("gpt-4")) + Expect(capturedServed).To(Equal("real")) + }) + + It("returns 400 when the alias target is missing", func() { + Expect(os.WriteFile(filepath.Join(modelDir, "gpt-4.yaml"), + []byte("name: gpt-4\nalias: nope\n"), 0644)).To(Succeed()) + app = buildApp() + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", + strings.NewReader(`{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + app.ServeHTTP(rec, req) + + Expect(rec.Code).To(Equal(http.StatusBadRequest)) + var resp schema.ErrorResponse + Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed()) + Expect(resp.Error).ToNot(BeNil()) + Expect(resp.Error.Type).To(Equal("invalid_request_error")) + }) +}) + // --------------------------------------------------------------------------- // MergeOpenResponsesConfig — tool_choice parsing // --------------------------------------------------------------------------- diff --git a/core/http/middleware/route_model.go b/core/http/middleware/route_model.go index 7ff286af4..470bd05f5 100644 --- a/core/http/middleware/route_model.go +++ b/core/http/middleware/route_model.go @@ -189,7 +189,12 @@ func RouteModel(loader *config.ModelConfigLoader, appConfig *config.ApplicationC } c.Set(CONTEXT_LOCALS_KEY_MODEL_CONFIG, result.ChosenConfig) - c.Set(ContextKeyRequestedModel, result.RouterModel) + // Preserve an upstream requested model (e.g. an alias that points + // at this router model) so accounting keeps the name the client + // actually sent. Served always reflects the final candidate. + if c.Get(ContextKeyRequestedModel) == nil { + c.Set(ContextKeyRequestedModel, result.RouterModel) + } c.Set(ContextKeyServedModel, result.ChosenModel) if store != nil { diff --git a/core/http/react-ui/e2e/alias-template.spec.js b/core/http/react-ui/e2e/alias-template.spec.js new file mode 100644 index 000000000..f3b1a0ca0 --- /dev/null +++ b/core/http/react-ui/e2e/alias-template.spec.js @@ -0,0 +1,77 @@ +import { test, expect } from './coverage-fixtures.js' + +// Alias / Routing template + Manage alias badge regression tests. +// +// An alias is a model config with `alias: ` that redirects traffic to +// the target model. This covers the two discoverability surfaces: +// - the create-flow template gallery exposes an "Alias / Routing" card that +// seeds a minimal name + alias config +// - the Manage Models tab renders a read-only "alias -> target" badge on +// rows that resolve to an alias (looked up via GET /api/aliases, since the +// capabilities row payload doesn't carry the alias field) + +// Minimal metadata so the editor renders the alias field once the template +// loads. Mirrors the Task 7 config-meta registry, which surfaces `alias` as a +// model-select component. +const ALIAS_METADATA = { + sections: [ + { id: 'general', label: 'General', icon: 'settings', order: 0 }, + { id: 'other', label: 'Other', icon: 'more-horizontal', order: 100 }, + ], + fields: [ + { path: 'name', yaml_key: 'name', go_type: 'string', ui_type: 'string', + section: 'general', label: 'Model Name', component: 'input', order: 0 }, + { path: 'alias', yaml_key: 'alias', go_type: 'string', ui_type: 'string', + section: 'general', label: 'Alias', component: 'model-select', autocomplete_provider: 'models', + description: 'Redirect this model name to another configured model.', order: 1 }, + ], +} + +test.describe('Alias template - create flow', () => { + test.beforeEach(async ({ page }) => { + await page.route('**/api/auth/status', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify({ authEnabled: false, staticApiKeyRequired: false, providers: [] }) })) + await page.route('**/api/models/config-metadata*', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify(ALIAS_METADATA) })) + await page.route('**/api/models/config-metadata/autocomplete/**', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify({ values: [] }) })) + + page.on('pageerror', (err) => { + throw new Error(`uncaught page error: ${err.message}`) + }) + }) + + test('template gallery exposes the Alias / Routing card', async ({ page }) => { + await page.goto('/app/model-editor') + await expect(page.getByRole('button', { name: /Alias \/ Routing/i })).toBeVisible({ timeout: 10_000 }) + }) + + test('alias template loads the editor with the alias field', async ({ page }) => { + await page.goto('/app/model-editor?template=alias') + await expect(page.getByText(/Unexpected Application Error/i)).toHaveCount(0) + await expect(page.locator('h1.page-title')).toBeVisible({ timeout: 10_000 }) + await expect(page.getByText('Alias').first()).toBeVisible() + }) +}) + +test.describe('Manage - alias badge', () => { + test.beforeEach(async ({ page }) => { + await page.route('**/api/auth/status', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify({ authEnabled: false, staticApiKeyRequired: false, providers: [] }) })) + await page.route('**/api/models/capabilities', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify({ data: [ + { id: 'fast-llm', capabilities: ['chat'], backend: 'llama-cpp' }, + { id: 'gpt-4', capabilities: ['chat'], backend: 'llama-cpp' }, + ] }) })) + await page.route('**/api/aliases', (route) => + route.fulfill({ contentType: 'application/json', body: JSON.stringify([{ name: 'gpt-4', target: 'fast-llm' }]) })) + }) + + test('renders a read-only alias -> target badge on aliased rows', async ({ page }) => { + await page.goto('/app/manage') + await expect(page.locator('.table')).toBeVisible({ timeout: 10_000 }) + + // The aliased row shows the target; the plain model row does not. + await expect(page.getByText('alias -> fast-llm')).toBeVisible({ timeout: 10_000 }) + }) +}) diff --git a/core/http/react-ui/src/pages/Manage.jsx b/core/http/react-ui/src/pages/Manage.jsx index 48d18c33c..16d04f709 100644 --- a/core/http/react-ui/src/pages/Manage.jsx +++ b/core/http/react-ui/src/pages/Manage.jsx @@ -133,6 +133,10 @@ export default function Manage() { const { enrichModel, enrichBackend } = useGalleryEnrichment() const { operations } = useOperations() const [loadedModelIds, setLoadedModelIds] = useState(new Set()) + // Map of alias name -> target. The capabilities endpoint that feeds the row + // list doesn't carry the alias field, so we fetch it once and look rows up by + // name to render the read-only "alias -> target" badge. + const [aliasTargets, setAliasTargets] = useState({}) const [backends, setBackends] = useState([]) const [backendsLoading, setBackendsLoading] = useState(true) const [reloading, setReloading] = useState(false) @@ -228,12 +232,24 @@ export default function Manage() { } }, []) + const fetchAliases = useCallback(async () => { + try { + const data = await modelsApi.listAliases() + const map = {} + for (const a of Array.isArray(data) ? data : []) map[a.name] = a.target + setAliasTargets(map) + } catch { + setAliasTargets({}) + } + }, []) + useEffect(() => { fetchLoadedModels() fetchBackends() + fetchAliases() // Detect distributed mode (nodes API returns 503 when not enabled) nodesApi.list().then(() => setDistributedMode(true)).catch(() => {}) - }, [fetchLoadedModels, fetchBackends]) + }, [fetchLoadedModels, fetchBackends, fetchAliases]) // Auto-refresh the Models tab every 10s in distributed mode so ghost models // (loaded on a worker but absent from this frontend's in-memory cache) @@ -636,6 +652,11 @@ export default function Manage() { Pinned )} + {aliasTargets[model.id] && ( + ${aliasTargets[model.id]}`}> + alias -> {aliasTargets[model.id]} + + )} diff --git a/core/http/react-ui/src/utils/api.js b/core/http/react-ui/src/utils/api.js index a8ffa2f04..20bb90363 100644 --- a/core/http/react-ui/src/utils/api.js +++ b/core/http/react-ui/src/utils/api.js @@ -84,6 +84,7 @@ export const modelsApi = { list: (params) => fetchJSON(buildUrl(API_CONFIG.endpoints.models, params)), listV1: () => fetchJSON(API_CONFIG.endpoints.modelsList), listCapabilities: () => fetchJSON(API_CONFIG.endpoints.modelsCapabilities), + listAliases: () => fetchJSON(API_CONFIG.endpoints.modelsAliases), install: (id) => postJSON(API_CONFIG.endpoints.installModel(id), {}), delete: (id) => postJSON(API_CONFIG.endpoints.deleteModel(id), {}), estimate: (id, contexts) => fetchJSON( diff --git a/core/http/react-ui/src/utils/config.js b/core/http/react-ui/src/utils/config.js index cf83d590f..65797fe41 100644 --- a/core/http/react-ui/src/utils/config.js +++ b/core/http/react-ui/src/utils/config.js @@ -95,6 +95,7 @@ export const API_CONFIG = { modelsList: '/v1/models', modelsCapabilities: '/api/models/capabilities', + modelsAliases: '/api/aliases', // Realtime / WebRTC realtimeCalls: '/v1/realtime/calls', diff --git a/core/http/react-ui/src/utils/modelTemplates.js b/core/http/react-ui/src/utils/modelTemplates.js index 54d34aecc..c3675f9db 100644 --- a/core/http/react-ui/src/utils/modelTemplates.js +++ b/core/http/react-ui/src/utils/modelTemplates.js @@ -142,6 +142,16 @@ const MODEL_TEMPLATES = [ ], }, }, + { + id: 'alias', + label: 'Alias / Routing', + icon: 'fa-arrow-right-arrow-left', + description: 'Point a model name at another configured model. Clients keep calling the alias; you swap the target anytime.', + fields: { + 'name': '', + 'alias': '', + }, + }, { id: 'mitm', label: 'MITM Intercept', diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index a66801556..1df1d5d8c 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -80,6 +80,9 @@ func RegisterLocalAIRoutes(router *echo.Echo, // Custom model edit endpoint router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware) + // List model aliases endpoint + router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware) + // Toggle model enable/disable endpoint router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware) @@ -303,6 +306,7 @@ func RegisterLocalAIRoutes(router *echo.Echo, "edit": "/models/edit/:name", "import": "/models/import", "reload": "/models/reload", + "list_aliases": "/api/aliases", }, "ai_functions": map[string]string{ "tts": "/tts", diff --git a/core/services/modeladmin/config.go b/core/services/modeladmin/config.go index c01e2fb4c..f4fc53d97 100644 --- a/core/services/modeladmin/config.go +++ b/core/services/modeladmin/config.go @@ -130,6 +130,9 @@ func (s *ConfigService) PatchConfig(_ context.Context, name string, patch map[st } return nil, ErrInvalidConfig } + if err := s.Loader.ValidateAliasTarget(&updated); err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidConfig, err) + } if err := writeFileAtomic(configPath, yamlData, 0644); err != nil { return nil, fmt.Errorf("write config file: %w", err) } @@ -215,6 +218,9 @@ func (s *ConfigService) EditYAML(_ context.Context, name string, body []byte, ml if valid, _ := req.Validate(); !valid { return nil, ErrInvalidConfig } + if err := s.Loader.ValidateAliasTarget(&req); err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidConfig, err) + } configPath := existing.GetModelConfigFile() modelsPath := s.modelsPath() diff --git a/core/services/modeladmin/config_test.go b/core/services/modeladmin/config_test.go index d4157047d..36569c19b 100644 --- a/core/services/modeladmin/config_test.go +++ b/core/services/modeladmin/config_test.go @@ -211,5 +211,23 @@ var _ = Describe("ConfigService", func() { _, err := svc.EditYAML(ctx, "alpha", nil, nil) Expect(err).To(MatchError(ErrEmptyBody)) }) + + It("rejects editing a config into an alias with a missing target", func() { + writeModelYAML(svc, dir, "base", map[string]any{"backend": "llama-cpp"}) + + body := []byte("name: base\nalias: ghost\n") + _, err := svc.EditYAML(ctx, "base", body, nil) + Expect(err).To(MatchError(ErrInvalidConfig)) + Expect(err.Error()).To(ContainSubstring("ghost")) + }) + + It("accepts editing a config into an alias with a real target", func() { + writeModelYAML(svc, dir, "base", map[string]any{"backend": "llama-cpp"}) + writeModelYAML(svc, dir, "target", map[string]any{"backend": "llama-cpp"}) + + body := []byte("name: base\nalias: target\n") + _, err := svc.EditYAML(ctx, "base", body, nil) + Expect(err).ToNot(HaveOccurred()) + }) }) }) diff --git a/docs/content/features/model-aliases.md b/docs/content/features/model-aliases.md new file mode 100644 index 000000000..8c4bd977d --- /dev/null +++ b/docs/content/features/model-aliases.md @@ -0,0 +1,81 @@ + ++++ +disableToc = false +title = "Model Aliases" +weight = 24 +url = "/features/model-aliases/" ++++ + +A **model alias** is a model name that redirects all traffic to another +configured model. Declare `gpt-4` as an alias of `my-llama-3` and every client +calling `gpt-4` is served by `my-llama-3` with no client reconfiguration: the +clients keep their existing model name while you control what answers them on +the server side. + +## Declaring an alias + +Create a minimal config file in your models directory: + +```yaml +name: gpt-4 +alias: my-llama-3 +``` + +That is the whole config: a `name` (the alias clients call) and an `alias` key +(the target that actually serves the request). + +## Rules and behavior + +- The target (`my-llama-3`) must be an existing, non-alias, enabled model. You + cannot point an alias at a missing model, a disabled model, or another alias + (no chains). +- Aliases are 1:1. One alias maps to exactly one target. +- The target can be swapped live by editing the config file, calling the API, + using the UI, or asking the assistant. No restart is required. +- Both `gpt-4` and `my-llama-3` appear in `GET /v1/models`. +- Responses echo the requested alias: a call to `gpt-4` returns `gpt-4` in the + response `model` field, not the target name. +- Usage accounting records both sides: requested `gpt-4`, served `my-llama-3`. +- Aliases work for every modality (chat, embeddings, audio, images, and so on). + +## Managing aliases + +You can create, swap, and remove aliases from any of the management surfaces. + +### Web UI + +Open **Add Model** and pick the **Alias / Routing** template, then set a name +and a target. To re-point an existing alias, edit it and change the target. + +### REST API + +- Create: `POST /models/import` +- Swap the target: `PATCH /api/models/config-json/:name` +- List all aliases: `GET /api/aliases` +- Delete: `POST /models/delete/:name` + +### Assistant and MCP + +The LocalAI Assistant (and the MCP server) expose the same operations as tools: +`set_alias`, `list_aliases`, and `delete_model`. + +{{% notice note %}} +**You cannot turn an existing real model into an alias.** If you run `set_alias` +(or `PATCH /api/models/config-json/:name`) against a name that is already a real, +non-alias model, the request is **rejected**. An alias is a pure redirect, so it +must not carry a `backend` or `parameters.model`; a real model does, and merging +an `alias` onto it produces an invalid config that validation refuses with +`alias config ... must not set backend or parameters.model`. This is intentional: +it stops a stray `set_alias` call from clobbering a model that is serving. + +To add an alias, point a **new** name at the target instead of reusing an +existing model's name. Re-pointing an **existing alias** at a different target +is fully supported and is the live-swap path: the alias config has no backend of +its own, so swapping its target stays a valid pure redirect. +{{% /notice %}} + +## Limits + +Aliases are a static 1:1 redirect. For classifier-based or load-balanced +selection across several downstream models, use the intelligent router in the +[Middleware]({{%relref "features/middleware" %}}) feature instead. diff --git a/pkg/mcp/localaitools/client.go b/pkg/mcp/localaitools/client.go index 5ac519aca..f6f6114be 100644 --- a/pkg/mcp/localaitools/client.go +++ b/pkg/mcp/localaitools/client.go @@ -38,6 +38,14 @@ type LocalAIClient interface { ReloadModels(ctx context.Context) error ImportModelURI(ctx context.Context, req ImportModelURIRequest) (*ImportModelURIResponse, error) + // ---- Model aliases ---- + // SetAlias creates the alias `name` pointing at `target`, or swaps an + // existing alias's target. The server validates that `target` is an + // existing, non-alias, enabled model. Deletion reuses DeleteModel. + SetAlias(ctx context.Context, name, target string) error + // ListAliases returns every configured alias and its target. + ListAliases(ctx context.Context) ([]AliasInfo, error) + // ---- Backends ---- // ListBackends returns installed backends. The shape stays a thin // localaitools.Backend rather than gallery.SystemBackend because the diff --git a/pkg/mcp/localaitools/coverage_test.go b/pkg/mcp/localaitools/coverage_test.go index ddf5e9c1d..39a2ab544 100644 --- a/pkg/mcp/localaitools/coverage_test.go +++ b/pkg/mcp/localaitools/coverage_test.go @@ -41,6 +41,7 @@ var toolToHTTPRoute = map[string]string{ ToolGetPIIEvents: "GET /api/pii/events", ToolGetMiddlewareStatus: "GET /api/middleware/status", ToolGetRouterDecisions: "GET /api/router/decisions", + ToolListAliases: "GET /api/aliases", // Mutating tools. ToolInstallModel: "POST /models/apply", @@ -53,6 +54,7 @@ var toolToHTTPRoute = map[string]string{ ToolToggleModelState: "PUT /models/toggle-state/:name/:action", ToolToggleModelPinned: "PUT /models/toggle-pinned/:name/:action", ToolSetBranding: "POST /api/settings (instance_name, instance_tagline)", + ToolSetAlias: "PATCH /api/models/config-json/:name (swap) or POST /models/import (create)", } // allKnownTools is the union of expectedFullCatalog (defined in diff --git a/pkg/mcp/localaitools/dto.go b/pkg/mcp/localaitools/dto.go index 77e9a9065..f8aa98eee 100644 --- a/pkg/mcp/localaitools/dto.go +++ b/pkg/mcp/localaitools/dto.go @@ -52,6 +52,14 @@ type ModelConfigView struct { JSON map[string]any `json:"json,omitempty" jsonschema:"Parsed JSON view of the same config (convenience for diffing)."` } +// AliasInfo is one alias -> target pair, the shape list_aliases returns and +// GET /api/aliases emits. Kept aligned with localai.AliasInfo so the +// MCP wire output matches the REST endpoint by construction. +type AliasInfo struct { + Name string `json:"name"` + Target string `json:"target"` +} + // InstallModelRequest is the input for install_model. type InstallModelRequest struct { GalleryName string `json:"gallery_name,omitempty" jsonschema:"The gallery the model lives in (from gallery_search). Optional when ModelName is unique across galleries."` diff --git a/pkg/mcp/localaitools/fakes_test.go b/pkg/mcp/localaitools/fakes_test.go index 3d76ae8b9..388245ad2 100644 --- a/pkg/mcp/localaitools/fakes_test.go +++ b/pkg/mcp/localaitools/fakes_test.go @@ -32,6 +32,8 @@ type fakeClient struct { importModelURI func(ImportModelURIRequest) (*ImportModelURIResponse, error) deleteModel func(string) error editModelConfig func(string, map[string]any) error + setAlias func(string, string) error + listAliases func() ([]AliasInfo, error) reloadModels func() error listBackends func() ([]Backend, error) listKnownBackends func() ([]schema.KnownBackend, error) @@ -143,6 +145,22 @@ func (f *fakeClient) EditModelConfig(_ context.Context, name string, patch map[s return nil } +func (f *fakeClient) SetAlias(_ context.Context, name, target string) error { + f.record("SetAlias", []any{name, target}) + if f.setAlias != nil { + return f.setAlias(name, target) + } + return nil +} + +func (f *fakeClient) ListAliases(_ context.Context) ([]AliasInfo, error) { + f.record("ListAliases", nil) + if f.listAliases != nil { + return f.listAliases() + } + return []AliasInfo{}, nil +} + func (f *fakeClient) ReloadModels(_ context.Context) error { f.record("ReloadModels", nil) if f.reloadModels != nil { diff --git a/pkg/mcp/localaitools/httpapi/client.go b/pkg/mcp/localaitools/httpapi/client.go index d2947a5b1..90ec332e2 100644 --- a/pkg/mcp/localaitools/httpapi/client.go +++ b/pkg/mcp/localaitools/httpapi/client.go @@ -338,6 +338,42 @@ func (c *Client) ReloadModels(ctx context.Context) error { return c.do(ctx, http.MethodPost, routeModelsReload, nil, nil) } +// ---- Model aliases ---- + +// SetAlias is swap-first: it PATCHes the alias config (a deep-merge that +// validates the target and preserves any other fields), and only creates a +// fresh config when the PATCH reports the model doesn't exist yet. We prefer +// PATCH over POST /models/import for existing names because import rewrites +// the whole file, whereas PATCH gives a reliable 404 not-found signal +// (ErrHTTPNotFound) to branch on and never clobbers an existing config. +func (c *Client) SetAlias(ctx context.Context, name, target string) error { + if name == "" { + return errors.New("name is required") + } + if target == "" { + return errors.New("target is required") + } + err := c.do(ctx, http.MethodPatch, routeModelConfigJSON(name), map[string]any{"alias": target}, nil) + if err == nil { + return nil + } + if !errors.Is(err, ErrHTTPNotFound) { + return err + } + // No such config yet: create it. The import endpoint validates the alias + // target server-side, same as the PATCH path. + return c.do(ctx, http.MethodPost, routeModelImport, map[string]any{"name": name, "alias": target}, nil) +} + +func (c *Client) ListAliases(ctx context.Context) ([]localaitools.AliasInfo, error) { + // /api/aliases returns []{name,target} directly - pass it through. + var out []localaitools.AliasInfo + if err := c.do(ctx, http.MethodGet, routeAliases, nil, &out); err != nil { + return nil, err + } + return out, nil +} + // ---- Backends ---- func (c *Client) ListBackends(ctx context.Context) ([]localaitools.Backend, error) { diff --git a/pkg/mcp/localaitools/httpapi/client_test.go b/pkg/mcp/localaitools/httpapi/client_test.go index 6e6fc3972..319ceffee 100644 --- a/pkg/mcp/localaitools/httpapi/client_test.go +++ b/pkg/mcp/localaitools/httpapi/client_test.go @@ -166,6 +166,92 @@ var _ = Describe("httpapi.Client against the LocalAI admin REST surface", func() }) }) +var _ = Describe("Model aliases", func() { + Describe("ListAliases", func() { + It("passes the GET /api/aliases payload through unchanged", func() { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + Expect(r.Method).To(Equal(http.MethodGet)) + Expect(r.URL.Path).To(Equal("/api/aliases")) + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"name": "gpt-4", "target": "qwen"}, + }) + })) + DeferCleanup(srv.Close) + + out, err := New(srv.URL, "").ListAliases(context.Background()) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(HaveLen(1)) + Expect(out[0].Name).To(Equal("gpt-4")) + Expect(out[0].Target).To(Equal("qwen")) + }) + }) + + Describe("SetAlias", func() { + It("swaps an existing alias via PATCH without falling back to import", func() { + var patched, imported bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPatch && r.URL.Path == "/api/models/config-json/gpt-4": + patched = true + var body map[string]any + Expect(json.NewDecoder(r.Body).Decode(&body)).To(Succeed()) + Expect(body).To(HaveKeyWithValue("alias", "qwen")) + _ = json.NewEncoder(w).Encode(map[string]any{"success": true}) + case r.URL.Path == "/models/import": + imported = true + w.WriteHeader(http.StatusOK) + default: + http.Error(w, "unexpected", http.StatusTeapot) + } + })) + DeferCleanup(srv.Close) + + Expect(New(srv.URL, "").SetAlias(context.Background(), "gpt-4", "qwen")).To(Succeed()) + Expect(patched).To(BeTrue(), "PATCH should be attempted first") + Expect(imported).To(BeFalse(), "import must not run when PATCH succeeds") + }) + + It("creates a fresh alias via import when PATCH reports the model is missing", func() { + var imported bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPatch: + http.Error(w, "model configuration not found", http.StatusNotFound) + case r.Method == http.MethodPost && r.URL.Path == "/models/import": + imported = true + var body map[string]any + Expect(json.NewDecoder(r.Body).Decode(&body)).To(Succeed()) + Expect(body).To(HaveKeyWithValue("name", "gpt-4")) + Expect(body).To(HaveKeyWithValue("alias", "qwen")) + _ = json.NewEncoder(w).Encode(map[string]any{"success": true}) + default: + http.Error(w, "unexpected", http.StatusTeapot) + } + })) + DeferCleanup(srv.Close) + + Expect(New(srv.URL, "").SetAlias(context.Background(), "gpt-4", "qwen")).To(Succeed()) + Expect(imported).To(BeTrue(), "import should create the alias on a 404") + }) + + It("surfaces a non-404 PATCH error without attempting import", func() { + var imported bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/models/import" { + imported = true + } + http.Error(w, "target is an alias", http.StatusBadRequest) + })) + DeferCleanup(srv.Close) + + err := New(srv.URL, "").SetAlias(context.Background(), "gpt-4", "bad") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("target is an alias")) + Expect(imported).To(BeFalse(), "a 400 swap error must not trigger create") + }) + }) +}) + var _ = Describe("ErrHTTPNotFound", func() { Context("on a clean 404 status", func() { var ( diff --git a/pkg/mcp/localaitools/httpapi/routes.go b/pkg/mcp/localaitools/httpapi/routes.go index 79504dc1b..cc552b728 100644 --- a/pkg/mcp/localaitools/httpapi/routes.go +++ b/pkg/mcp/localaitools/httpapi/routes.go @@ -16,6 +16,8 @@ const ( routeModelsAvail = "/models/available" routeModelsGall = "/models/galleries" routeModelsImport = "/models/import-uri" + routeModelImport = "/models/import" + routeAliases = "/api/aliases" routeModelsReload = "/models/reload" routeBackends = "/backends" routeBackendsKnown = "/backends/known" diff --git a/pkg/mcp/localaitools/inproc/client.go b/pkg/mcp/localaitools/inproc/client.go index 6e047d751..e62934ccc 100644 --- a/pkg/mcp/localaitools/inproc/client.go +++ b/pkg/mcp/localaitools/inproc/client.go @@ -9,6 +9,8 @@ import ( "encoding/json" "errors" "fmt" + "os" + "path/filepath" "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" @@ -25,7 +27,9 @@ import ( localaitools "github.com/mudler/LocalAI/pkg/mcp/localaitools" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/system" + "github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/vram" + "gopkg.in/yaml.v3" ) // Client implements localaitools.LocalAIClient by calling LocalAI services @@ -298,6 +302,80 @@ func (c *Client) ReloadModels(_ context.Context) error { return c.ConfigLoader.LoadModelConfigsFromPath(c.SystemState.Model.ModelsPath) } +// ---- Model aliases ---- + +// SetAlias is swap-first to match the httpapi client: PatchConfig swaps an +// existing alias's target (validating it and preserving other fields) and +// returns ErrNotFound when the config doesn't exist yet, which is the signal +// to create it. createAlias mirrors the create path of ImportModelEndpoint. +func (c *Client) SetAlias(ctx context.Context, name, target string) error { + if name == "" { + return errors.New("name is required") + } + if target == "" { + return errors.New("target is required") + } + _, err := c.modelAdmin.PatchConfig(ctx, name, map[string]any{"alias": target}) + if err == nil { + return nil + } + if !errors.Is(err, modeladmin.ErrNotFound) { + return err + } + return c.createAlias(name, target) +} + +// createAlias writes a fresh `{name, alias}` config to disk and reloads, +// mirroring localai.ImportModelEndpoint's create path: validate, validate the +// alias target, verify the path is trusted, write, reload, best-effort preload. +func (c *Client) createAlias(name, target string) error { + if c.SystemState == nil { + return errors.New("system state not available") + } + cfg := config.ModelConfig{Name: name, Alias: target} + if valid, vErr := cfg.Validate(); !valid { + if vErr != nil { + return vErr + } + return errors.New("invalid alias configuration") + } + if err := c.ConfigLoader.ValidateAliasTarget(&cfg); err != nil { + return err + } + modelsPath := c.SystemState.Model.ModelsPath + if err := utils.VerifyPath(name+".yaml", modelsPath); err != nil { + return fmt.Errorf("model path not trusted: %w", err) + } + // Marshal only the user-provided fields (not the full struct with Go + // zero values), matching what the import endpoint persists for an alias. + yamlData, err := yaml.Marshal(map[string]any{"name": name, "alias": target}) + if err != nil { + return fmt.Errorf("marshal alias config: %w", err) + } + // 0600: the LocalAI process is the sole reader/writer of model configs, + // and a tighter mode keeps the gosec G306 scan clean for this new write. + if err := os.WriteFile(filepath.Join(modelsPath, name+".yaml"), yamlData, 0600); err != nil { + return fmt.Errorf("write alias config: %w", err) + } + if err := c.ConfigLoader.LoadModelConfigsFromPath(modelsPath, c.AppConfig.ToConfigLoaderOptions()...); err != nil { + return fmt.Errorf("reload configs: %w", err) + } + // Preload is best-effort - a failure here doesn't undo the create. + _ = c.ConfigLoader.Preload(modelsPath) + return nil +} + +func (c *Client) ListAliases(_ context.Context) ([]localaitools.AliasInfo, error) { + // Mirror localai.ListAliasesEndpoint: every config whose Alias is set. + out := []localaitools.AliasInfo{} + for _, cfg := range c.ConfigLoader.GetAllModelsConfigs() { + if cfg.IsAlias() { + out = append(out, localaitools.AliasInfo{Name: cfg.Name, Target: cfg.Alias}) + } + } + return out, nil +} + // ---- Backends ---- func (c *Client) ListBackends(_ context.Context) ([]localaitools.Backend, error) { diff --git a/pkg/mcp/localaitools/inproc/client_test.go b/pkg/mcp/localaitools/inproc/client_test.go index 1da00602a..e385897c7 100644 --- a/pkg/mcp/localaitools/inproc/client_test.go +++ b/pkg/mcp/localaitools/inproc/client_test.go @@ -3,6 +3,8 @@ package inproc import ( "context" "errors" + "os" + "path/filepath" "time" . "github.com/onsi/ginkgo/v2" @@ -47,3 +49,78 @@ var _ = Describe("inproc.Client cancellation", func() { Expect(errors.Is(err, context.Canceled)).To(BeTrue(), "got: %v", err) }) }) + +var _ = Describe("inproc.Client model aliases", func() { + var ( + ctx context.Context + tempDir string + cl *config.ModelConfigLoader + c *Client + seedModel func(name, body string) + ) + + BeforeEach(func() { + ctx = context.Background() + tempDir = GinkgoT().TempDir() + systemState, err := system.GetSystemState(system.WithModelPath(tempDir)) + Expect(err).ToNot(HaveOccurred()) + appConfig := config.NewApplicationConfig(config.WithSystemState(systemState)) + cl = config.NewModelConfigLoader(tempDir) + // Gallery/model loaders are unused by the alias methods, so nil is fine. + c = New(appConfig, systemState, cl, nil, nil) + + seedModel = func(name, body string) { + Expect(os.WriteFile(filepath.Join(tempDir, name+".yaml"), []byte(body), 0644)).To(Succeed()) + Expect(cl.LoadModelConfigsFromPath(tempDir)).To(Succeed()) + } + }) + + Describe("ListAliases", func() { + It("returns only configs whose alias field is set", func() { + seedModel("real", "name: real\nbackend: llama-cpp\n") + seedModel("gpt-4", "name: gpt-4\nalias: real\n") + + out, err := c.ListAliases(ctx) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(ConsistOf(localaitools.AliasInfo{Name: "gpt-4", Target: "real"})) + }) + + It("returns an empty slice when there are no aliases", func() { + seedModel("real", "name: real\nbackend: llama-cpp\n") + out, err := c.ListAliases(ctx) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(BeEmpty()) + }) + }) + + Describe("SetAlias", func() { + It("creates a new alias config on disk when the name is unused", func() { + seedModel("real", "name: real\nbackend: llama-cpp\n") + + Expect(c.SetAlias(ctx, "gpt-4", "real")).To(Succeed()) + + Expect(filepath.Join(tempDir, "gpt-4.yaml")).To(BeAnExistingFile()) + out, err := c.ListAliases(ctx) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(ConsistOf(localaitools.AliasInfo{Name: "gpt-4", Target: "real"})) + }) + + It("swaps an existing alias's target in place", func() { + seedModel("real", "name: real\nbackend: llama-cpp\n") + seedModel("other", "name: other\nbackend: llama-cpp\n") + seedModel("gpt-4", "name: gpt-4\nalias: real\n") + + Expect(c.SetAlias(ctx, "gpt-4", "other")).To(Succeed()) + + out, err := c.ListAliases(ctx) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(ConsistOf(localaitools.AliasInfo{Name: "gpt-4", Target: "other"})) + }) + + It("rejects an alias whose target does not exist", func() { + err := c.SetAlias(ctx, "gpt-4", "missing") + Expect(err).To(HaveOccurred()) + Expect(filepath.Join(tempDir, "gpt-4.yaml")).ToNot(BeAnExistingFile()) + }) + }) +}) diff --git a/pkg/mcp/localaitools/server.go b/pkg/mcp/localaitools/server.go index fd9f5da00..4b662f66b 100644 --- a/pkg/mcp/localaitools/server.go +++ b/pkg/mcp/localaitools/server.go @@ -43,6 +43,7 @@ func NewServer(client LocalAIClient, opts Options) *mcp.Server { }) registerModelTools(srv, client, opts) + registerAliasTools(srv, client, opts) registerBackendTools(srv, client, opts) registerConfigTools(srv, client, opts) registerSystemTools(srv, client, opts) diff --git a/pkg/mcp/localaitools/server_test.go b/pkg/mcp/localaitools/server_test.go index eb1579449..052ca1e8b 100644 --- a/pkg/mcp/localaitools/server_test.go +++ b/pkg/mcp/localaitools/server_test.go @@ -88,10 +88,12 @@ var expectedFullCatalog = sortedStrings( ToolInstallModel, ToolListBackends, ToolListGalleries, + ToolListAliases, ToolListInstalledModels, ToolListKnownBackends, ToolListNodes, ToolReloadModels, + ToolSetAlias, ToolSetBranding, ToolSystemInfo, ToolToggleModelPinned, @@ -110,6 +112,7 @@ var expectedReadOnlyCatalog = sortedStrings( ToolGetPIIEvents, ToolGetRouterDecisions, ToolGetUsageStats, + ToolListAliases, ToolListBackends, ToolListGalleries, ToolListInstalledModels, @@ -165,6 +168,8 @@ var _ = Describe("Tool dispatch", func() { {ToolReloadModels, struct{}{}, "ReloadModels"}, {ToolToggleModelState, map[string]any{"name": "foo", "action": "enable"}, "ToggleModelState"}, {ToolToggleModelPinned, map[string]any{"name": "foo", "action": "pin"}, "ToggleModelPinned"}, + {ToolSetAlias, map[string]any{"name": "gpt-4", "target": "real"}, "SetAlias"}, + {ToolListAliases, struct{}{}, "ListAliases"}, } for _, c := range cases { diff --git a/pkg/mcp/localaitools/tools.go b/pkg/mcp/localaitools/tools.go index c7bf620c3..263bd791e 100644 --- a/pkg/mcp/localaitools/tools.go +++ b/pkg/mcp/localaitools/tools.go @@ -36,6 +36,11 @@ const ( ToolToggleModelState = "toggle_model_state" ToolToggleModelPinned = "toggle_model_pinned" ToolSetBranding = "set_branding" + ToolSetAlias = "set_alias" + + // ToolListAliases is read-only but lives here so the alias tools stay + // grouped; the catalog tests assert its read-only placement. + ToolListAliases = "list_aliases" ) // DefaultServerName is the MCP Implementation.Name surfaced when diff --git a/pkg/mcp/localaitools/tools_aliases.go b/pkg/mcp/localaitools/tools_aliases.go new file mode 100644 index 000000000..6b75619c1 --- /dev/null +++ b/pkg/mcp/localaitools/tools_aliases.go @@ -0,0 +1,48 @@ +package localaitools + +import ( + "context" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +// registerAliasTools wires the conversational alias-management tools. An +// alias redirects all traffic for one model name to another configured +// model; list_aliases enumerates them, set_alias creates or swaps the +// target. Deletion reuses the existing delete_model tool, which works on +// any config including an alias. +func registerAliasTools(s *mcp.Server, client LocalAIClient, opts Options) { + mcp.AddTool(s, &mcp.Tool{ + Name: ToolListAliases, + Description: "List every configured model alias and the target model it routes to.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) { + aliases, err := client.ListAliases(ctx) + if err != nil { + return errorResult(err), nil, nil + } + return jsonResult(aliases), nil, nil + }) + + if opts.DisableMutating { + return + } + + mcp.AddTool(s, &mcp.Tool{ + Name: ToolSetAlias, + Description: "Create a model alias (name -> target) or swap an existing alias's target. The target must be an existing, non-alias, enabled model. Requires user confirmation per safety rule 1.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, args struct { + Name string `json:"name" jsonschema:"The alias name clients will call."` + Target string `json:"target" jsonschema:"The existing model the alias routes to."` + }) (*mcp.CallToolResult, any, error) { + if args.Name == "" { + return errorResultf("name is required"), nil, nil + } + if args.Target == "" { + return errorResultf("target is required"), nil, nil + } + if err := client.SetAlias(ctx, args.Name, args.Target); err != nil { + return errorResult(err), nil, nil + } + return jsonResult(AliasInfo{Name: args.Name, Target: args.Target}), nil, nil + }) +} diff --git a/swagger/docs.go b/swagger/docs.go index 19cb95fd2..20a1f5a3f 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -500,6 +500,25 @@ const docTemplate = `{ } } }, + "/api/aliases": { + "get": { + "tags": [ + "models" + ], + "summary": "List model aliases", + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/localai.AliasInfo" + } + } + } + } + } + }, "/api/backend-logs": { "get": { "description": "Returns a sorted list of model IDs that have captured backend process output", @@ -3486,6 +3505,17 @@ const docTemplate = `{ } } }, + "localai.AliasInfo": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "target": { + "type": "string" + } + } + }, "localai.BrandingResponse": { "type": "object", "properties": { diff --git a/swagger/swagger.json b/swagger/swagger.json index e23b81cea..09e03581b 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -497,6 +497,25 @@ } } }, + "/api/aliases": { + "get": { + "tags": [ + "models" + ], + "summary": "List model aliases", + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/localai.AliasInfo" + } + } + } + } + } + }, "/api/backend-logs": { "get": { "description": "Returns a sorted list of model IDs that have captured backend process output", @@ -3483,6 +3502,17 @@ } } }, + "localai.AliasInfo": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "target": { + "type": "string" + } + } + }, "localai.BrandingResponse": { "type": "object", "properties": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 719b72f6c..a25674539 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -281,6 +281,13 @@ definitions: type: string type: array type: object + localai.AliasInfo: + properties: + name: + type: string + target: + type: string + type: object localai.BrandingResponse: properties: favicon_url: @@ -2780,6 +2787,18 @@ paths: summary: Execute an agent task by name tags: - agent-jobs + /api/aliases: + get: + responses: + "200": + description: OK + schema: + items: + $ref: '#/definitions/localai.AliasInfo' + type: array + summary: List model aliases + tags: + - models /api/backend-logs: get: description: Returns a sorted list of model IDs that have captured backend process