Files
LocalAI/pkg/mcp/localaitools/httpapi/client.go
Ettore Di Giacinto b1a99436c7 feat(branding): admin-configurable instance name, tagline, and assets (#9635)
Adds a whitelabeling feature so an operator can replace the LocalAI
instance name, tagline, square logo, horizontal logo, and favicon from
the admin Settings page. Defaults fall back to the bundled assets so
existing installs are unaffected.

The public GET /api/branding endpoint is reachable pre-auth so the
login screen can render the configured branding before sign-in.
Mutating routes (POST/DELETE /api/branding/asset/:kind) remain
admin-only. Text fields (instance_name, instance_tagline) ride the
existing /api/settings flow; binary assets get a dedicated multipart
upload route that persists files under DynamicConfigsDir/branding/.

To prevent the Settings page's stale local state from clobbering an
upload on save, UpdateSettingsEndpoint preserves whatever the on-disk
asset filename fields are regardless of the body — /api/branding/asset/*
are the sole writers for those fields.

The MCP catalog gains get_branding and set_branding tools (text fields
only; file upload stays UI-only) plus a configure_branding skill prompt.

While wiring this up, the same restart-loss class of bug surfaced for
several existing fields whose RuntimeSettings entries were never read
by the startup loader. Fix loadRuntimeSettingsFromFile() to load:

  - branding (instance_name, instance_tagline, *_file basenames)
  - auto_upgrade_backends, prefer_development_backends
  - localai_assistant_enabled
  - open_responses_store_ttl
  - the 7 existing AgentPool fields (enabled, default/embedding model,
    chunking sizes, enable_logs, collection_db_path)

Also exposes 3 new AgentPool runtime settings (vector_engine,
database_url, agent_hub_url) via /api/settings + the Settings UI, with
the same load-on-startup wiring. The file watcher's manual-edit path
is intentionally not changed — the in-process API endpoints already
update appConfig directly, so the watcher is redundant for supported
flows and a separate refactor for everything else.

15 TDD specs cover the loader behaviour (1 branding + 11 adjacent + 3
new agent-pool); 2 specs cover the persistence helpers and the
clobber-prevention contract.


Assisted-by: claude-code:claude-opus-4-7

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-02 15:51:36 +02:00

532 lines
17 KiB
Go

// Package httpapi provides a LocalAIClient that talks to a remote LocalAI
// instance over its REST API. Used by the standalone "local-ai mcp-server"
// subcommand to control a remote deployment over stdio.
package httpapi
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services/modeladmin"
localaitools "github.com/mudler/LocalAI/pkg/mcp/localaitools"
"github.com/mudler/LocalAI/pkg/vram"
)
// Client is a thin REST wrapper. It maps each LocalAIClient method to the
// matching admin endpoint. Errors from non-2xx responses include the body for
// the MCP layer to surface verbatim to the LLM.
type Client struct {
BaseURL string
APIKey string
HTTPClient *http.Client
}
// New returns a Client targeting baseURL with an optional bearer token.
func New(baseURL, apiKey string) *Client {
return &Client{
BaseURL: strings.TrimRight(baseURL, "/"),
APIKey: apiKey,
HTTPClient: &http.Client{
Timeout: 60 * time.Second,
},
}
}
// Compile-time assertion.
var _ localaitools.LocalAIClient = (*Client)(nil)
// HTTPError is returned by do() for non-2xx responses. Callers should use
// errors.Is(err, ErrHTTPNotFound) instead of substring-matching on
// err.Error() — the latter is brittle to status-code formatting changes.
type HTTPError struct {
Method string
Path string
StatusCode int
Body string
}
func (e *HTTPError) Error() string {
return fmt.Sprintf("%s %s: %d %s: %s", e.Method, e.Path, e.StatusCode, http.StatusText(e.StatusCode), strings.TrimSpace(e.Body))
}
// ErrHTTPNotFound is the sentinel for "the resource you asked for doesn't
// exist". Match it via errors.Is on an *HTTPError.
var ErrHTTPNotFound = errors.New("httpapi: not found")
// Is supports errors.Is(*HTTPError, ErrHTTPNotFound). The 500-with-text
// branch is a transitional fallback for /models/jobs/:uuid which today
// returns a 500 carrying "could not find any status for ID" instead of a
// proper 404. Drop the branch when the server is fixed.
func (e *HTTPError) Is(target error) bool {
if target != ErrHTTPNotFound {
return false
}
if e.StatusCode == http.StatusNotFound {
return true
}
return e.StatusCode == http.StatusInternalServerError && strings.Contains(e.Body, "could not find")
}
// ---- HTTP helpers ----
func (c *Client) do(ctx context.Context, method, path string, body any, out any) error {
var rdr io.Reader
if body != nil {
raw, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("marshal body: %w", err)
}
rdr = bytes.NewReader(raw)
}
req, err := http.NewRequestWithContext(ctx, method, c.BaseURL+path, rdr)
if err != nil {
return err
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
req.Header.Set("Accept", "application/json")
if c.APIKey != "" {
req.Header.Set("Authorization", "Bearer "+c.APIKey)
}
resp, err := c.HTTPClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return &HTTPError{Method: method, Path: path, StatusCode: resp.StatusCode, Body: string(respBody)}
}
if out == nil {
return nil
}
if err := json.Unmarshal(respBody, out); err != nil {
return fmt.Errorf("decode %s %s response: %w (body=%q)", method, path, err, truncate(string(respBody), 200))
}
return nil
}
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}
// ---- Models / gallery (read) ----
func (c *Client) GallerySearch(ctx context.Context, q localaitools.GallerySearchQuery) ([]gallery.Metadata, error) {
// /models/available already returns []gallery.Metadata — pass it
// through after applying the LLM-supplied filters client-side.
var metas []gallery.Metadata
if err := c.do(ctx, http.MethodGet, routeModelsAvail, nil, &metas); err != nil {
return nil, err
}
limit := q.Limit
if limit <= 0 {
limit = 20
}
out := make([]gallery.Metadata, 0, limit)
needle := strings.ToLower(q.Query)
tag := strings.ToLower(q.Tag)
for _, m := range metas {
if q.Gallery != "" && m.Gallery.Name != q.Gallery {
continue
}
if needle != "" && !contains(m.Name, needle) && !contains(m.Description, needle) && !containsTagsAny(m.Tags, needle) {
continue
}
if tag != "" && !containsTagExact(m.Tags, tag) {
continue
}
out = append(out, m)
if len(out) >= limit {
break
}
}
return out, nil
}
func (c *Client) ListInstalledModels(ctx context.Context, capability localaitools.Capability) ([]localaitools.InstalledModel, error) {
_ = capability // Capability filtering is unavailable over the welcome HTTP shape today; see TODO below.
// /v1/models is the OpenAI-compat shape; we use the LocalAI welcome JSON
// for richer info.
var welcome struct {
ModelsConfig []struct {
Name string `json:"name"`
Backend string `json:"backend"`
} `json:"ModelsConfig"`
}
if err := c.do(ctx, http.MethodGet, routeWelcome, nil, &welcome); err != nil {
return nil, err
}
// Capability filtering is unavailable over HTTP without a dedicated endpoint
// — for now we return everything and let the LLM filter from the names. A
// follow-up should add a /api/models?capability=chat endpoint.
out := make([]localaitools.InstalledModel, 0, len(welcome.ModelsConfig))
for _, m := range welcome.ModelsConfig {
out = append(out, localaitools.InstalledModel{Name: m.Name, Backend: m.Backend})
}
return out, nil
}
func (c *Client) ListGalleries(ctx context.Context) ([]config.Gallery, error) {
// /models/galleries returns []config.Gallery directly.
var out []config.Gallery
if err := c.do(ctx, http.MethodGet, routeModelsGall, nil, &out); err != nil {
return nil, err
}
return out, nil
}
func (c *Client) GetJobStatus(ctx context.Context, jobID string) (*localaitools.JobStatus, error) {
if jobID == "" {
return nil, errors.New("job id is required")
}
var raw struct {
Processed bool `json:"processed"`
Cancelled bool `json:"cancelled"`
Progress float64 `json:"progress"`
Message string `json:"message"`
FileSize string `json:"file_size"`
DownloadedSize string `json:"downloaded_size"`
Error string `json:"error,omitempty"`
GalleryElementName string `json:"gallery_element_name"`
}
if err := c.do(ctx, http.MethodGet, routeJobStatus(jobID), nil, &raw); err != nil {
// "no such job" is not a real failure — surface (nil, nil) so the
// LLM can stop polling without treating the response as an error.
if errors.Is(err, ErrHTTPNotFound) {
return nil, nil
}
return nil, err
}
return &localaitools.JobStatus{
ID: jobID,
Processed: raw.Processed,
Cancelled: raw.Cancelled,
Progress: raw.Progress,
TotalFileSize: raw.FileSize,
DownloadedFileSize: raw.DownloadedSize,
Message: raw.Message,
ErrorMessage: raw.Error,
}, nil
}
// GetModelConfig is intentionally a stub for the HTTP client: LocalAI's
// /models/edit/:name endpoint returns rendered HTML, not JSON, so the
// standalone CLI's `get_model_config` tool surfaces a clear error to the
// LLM. Tracked under the localai-assistant follow-ups (see
// .agents/localai-assistant-mcp.md) — once a JSON-only
// GET /api/models/config-yaml/:name endpoint lands on the server, this
// method calls it and the stub goes away.
//
// FIXME(localai-assistant): wire to a JSON read-back endpoint.
func (c *Client) GetModelConfig(_ context.Context, _ string) (*localaitools.ModelConfigView, error) {
return nil, errors.New("get_model_config over HTTP not yet supported by this client; use the in-process inproc client or REST /models/edit/{name}")
}
// ---- Models / gallery (write) ----
func (c *Client) InstallModel(ctx context.Context, req localaitools.InstallModelRequest) (string, error) {
body := map[string]any{"id": req.ModelName}
if req.GalleryName != "" {
body["id"] = req.GalleryName + "@" + req.ModelName
}
body["name"] = req.ModelName
if len(req.Overrides) > 0 {
body["overrides"] = req.Overrides
}
var resp struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}
if err := c.do(ctx, http.MethodPost, routeModelsApply, body, &resp); err != nil {
return "", err
}
return resp.ID, nil
}
func (c *Client) ImportModelURI(ctx context.Context, req localaitools.ImportModelURIRequest) (*localaitools.ImportModelURIResponse, error) {
if req.URI == "" {
return nil, errors.New("uri is required")
}
body := map[string]any{"uri": req.URI}
if req.BackendPreference != "" {
// Server expects preferences as a JSON object; wrap the backend
// preference accordingly.
body["preferences"] = map[string]string{"backend": req.BackendPreference}
}
rawReq, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("marshal body: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+routeModelsImport, bytes.NewReader(rawReq))
if err != nil {
return nil, err
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "application/json")
if c.APIKey != "" {
httpReq.Header.Set("Authorization", "Bearer "+c.APIKey)
}
resp, err := c.HTTPClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
// 400 with `error: "ambiguous import"` is not a transport error — it's the
// disambiguation signal. Translate it back into AmbiguousBackend so the
// MCP layer surface stays identical regardless of in-process vs HTTP.
if resp.StatusCode == http.StatusBadRequest {
var amb struct {
Error string `json:"error"`
Detail string `json:"detail"`
Modality string `json:"modality"`
Candidates []string `json:"candidates"`
Hint string `json:"hint"`
}
if json.Unmarshal(respBody, &amb) == nil && amb.Error == "ambiguous import" {
return &localaitools.ImportModelURIResponse{
AmbiguousBackend: true,
Modality: amb.Modality,
BackendCandidates: amb.Candidates,
Hint: amb.Hint,
}, nil
}
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("POST %s: %d %s: %s", routeModelsImport, resp.StatusCode, http.StatusText(resp.StatusCode), strings.TrimSpace(string(respBody)))
}
var raw struct {
ID string `json:"uuid"`
}
if err := json.Unmarshal(respBody, &raw); err != nil {
return nil, fmt.Errorf("decode import response: %w", err)
}
return &localaitools.ImportModelURIResponse{JobID: raw.ID}, nil
}
func (c *Client) DeleteModel(ctx context.Context, name string) error {
return c.do(ctx, http.MethodPost, routeModelDelete(name), nil, nil)
}
func (c *Client) EditModelConfig(ctx context.Context, name string, patch map[string]any) error {
return c.do(ctx, http.MethodPatch, routeModelConfigJSON(name), patch, nil)
}
func (c *Client) ReloadModels(ctx context.Context) error {
return c.do(ctx, http.MethodPost, routeModelsReload, nil, nil)
}
// ---- Backends ----
func (c *Client) ListBackends(ctx context.Context) ([]localaitools.Backend, error) {
var raw []struct {
Name string `json:"name"`
Installed bool `json:"installed"`
}
if err := c.do(ctx, http.MethodGet, routeBackends, nil, &raw); err != nil {
return nil, err
}
out := make([]localaitools.Backend, 0, len(raw))
for _, b := range raw {
out = append(out, localaitools.Backend{Name: b.Name, Installed: b.Installed})
}
return out, nil
}
func (c *Client) ListKnownBackends(ctx context.Context) ([]schema.KnownBackend, error) {
// /backends/known emits []schema.KnownBackend directly — pass through.
var out []schema.KnownBackend
if err := c.do(ctx, http.MethodGet, routeBackendsKnown, nil, &out); err != nil {
return nil, err
}
return out, nil
}
func (c *Client) InstallBackend(ctx context.Context, req localaitools.InstallBackendRequest) (string, error) {
body := map[string]any{"id": req.BackendName}
if req.GalleryName != "" {
body["id"] = req.GalleryName + "@" + req.BackendName
}
body["name"] = req.BackendName
var resp struct {
ID string `json:"uuid"`
}
if err := c.do(ctx, http.MethodPost, routeBackendsApply, body, &resp); err != nil {
return "", err
}
return resp.ID, nil
}
func (c *Client) UpgradeBackend(ctx context.Context, name string) (string, error) {
var resp struct {
ID string `json:"uuid"`
}
if err := c.do(ctx, http.MethodPost, routeBackendUpgrade(name), nil, &resp); err != nil {
return "", err
}
return resp.ID, nil
}
// ---- System ----
func (c *Client) SystemInfo(ctx context.Context) (*localaitools.SystemInfo, error) {
var welcome struct {
Version string `json:"Version"`
LoadedModels []any `json:"LoadedModels"`
InstalledBackends map[string]bool `json:"InstalledBackends"`
}
if err := c.do(ctx, http.MethodGet, routeWelcome, nil, &welcome); err != nil {
return nil, err
}
info := &localaitools.SystemInfo{Version: welcome.Version}
for name := range welcome.InstalledBackends {
info.InstalledBackends = append(info.InstalledBackends, name)
}
// LoadedModels shape varies; we don't attempt to decode it client-side.
return info, nil
}
func (c *Client) ListNodes(ctx context.Context) ([]localaitools.Node, error) {
var raw []struct {
ID string `json:"id"`
Address string `json:"address"`
HTTPAddress string `json:"http_address"`
Status string `json:"status"`
}
if err := c.do(ctx, http.MethodGet, routeNodes, nil, &raw); err != nil {
// Treat 404/disabled as "no nodes" to keep parity with single-process.
if errors.Is(err, ErrHTTPNotFound) {
return []localaitools.Node{}, nil
}
return nil, err
}
out := make([]localaitools.Node, 0, len(raw))
for _, n := range raw {
out = append(out, localaitools.Node{
ID: n.ID,
Address: n.Address,
HTTPAddress: n.HTTPAddress,
Healthy: n.Status == "healthy",
})
}
return out, nil
}
func (c *Client) VRAMEstimate(ctx context.Context, req localaitools.VRAMEstimateRequest) (*vram.EstimateResult, error) {
body := map[string]any{"model": req.ModelName}
if req.ContextSize > 0 {
body["context_size"] = req.ContextSize
}
if req.GPULayers != 0 {
body["gpu_layers"] = req.GPULayers
}
if req.KVQuantBits > 0 {
body["kv_quant_bits"] = req.KVQuantBits
}
// /api/models/vram-estimate returns a wrapper carrying vram.EstimateResult
// (size_bytes/size_display/vram_bytes/vram_display) plus context-note
// fields. Decode directly into EstimateResult — the LLM gets the
// pre-formatted display strings, identical to REST.
var out vram.EstimateResult
if err := c.do(ctx, http.MethodPost, routeVRAMEstimate, body, &out); err != nil {
return nil, err
}
return &out, nil
}
// ---- State ----
func (c *Client) ToggleModelState(ctx context.Context, name string, action modeladmin.Action) error {
return c.do(ctx, http.MethodPut, routeToggleModelState(name, string(action)), nil, nil)
}
func (c *Client) ToggleModelPinned(ctx context.Context, name string, action modeladmin.Action) error {
return c.do(ctx, http.MethodPut, routeToggleModelPinned(name, string(action)), nil, nil)
}
// ---- Branding ----
// brandingResponse mirrors the JSON shape emitted by GET /api/branding.
// We don't import the server-side type here so the MCP HTTP client stays
// independent of the localai endpoint package.
type brandingResponse struct {
InstanceName string `json:"instance_name"`
InstanceTagline string `json:"instance_tagline"`
LogoURL string `json:"logo_url"`
LogoHorizontalURL string `json:"logo_horizontal_url"`
FaviconURL string `json:"favicon_url"`
}
func (c *Client) GetBranding(ctx context.Context) (*localaitools.Branding, error) {
var raw brandingResponse
if err := c.do(ctx, http.MethodGet, routeBranding, nil, &raw); err != nil {
return nil, err
}
return (*localaitools.Branding)(&raw), nil
}
func (c *Client) SetBranding(ctx context.Context, req localaitools.SetBrandingRequest) (*localaitools.Branding, error) {
// Text fields ride the existing /api/settings POST, which maps the
// pointer fields onto RuntimeSettings.InstanceName / InstanceTagline.
body := map[string]any{}
if req.InstanceName != nil {
body["instance_name"] = *req.InstanceName
}
if req.InstanceTagline != nil {
body["instance_tagline"] = *req.InstanceTagline
}
if len(body) == 0 {
return c.GetBranding(ctx)
}
if err := c.do(ctx, http.MethodPost, routeSettings, body, nil); err != nil {
return nil, err
}
return c.GetBranding(ctx)
}
// ---- helpers ----
func contains(haystack, lowerNeedle string) bool {
return strings.Contains(strings.ToLower(haystack), lowerNeedle)
}
func containsTagsAny(tags []string, lowerNeedle string) bool {
for _, t := range tags {
if strings.Contains(strings.ToLower(t), lowerNeedle) {
return true
}
}
return false
}
func containsTagExact(tags []string, lowerNeedle string) bool {
for _, t := range tags {
if strings.EqualFold(t, lowerNeedle) {
return true
}
}
return false
}