LocalAI/pkg/mcp/localaitools/httpapi/client.go

// Package httpapi provides a LocalAIClient that talks to a remote LocalAI
// instance over its REST API. Used by the standalone "local-ai mcp-server"
// subcommand to control a remote deployment over stdio.
package httpapi

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"strings"
	"time"

	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/core/gallery"
	"github.com/mudler/LocalAI/core/schema"
	"github.com/mudler/LocalAI/core/services/modeladmin"
	localaitools "github.com/mudler/LocalAI/pkg/mcp/localaitools"
	"github.com/mudler/LocalAI/pkg/vram"
)

// Client is a thin REST wrapper. It maps each LocalAIClient method to the
// matching admin endpoint. Errors from non-2xx responses include the body for
// the MCP layer to surface verbatim to the LLM.
type Client struct {
	BaseURL string
	APIKey  string

	HTTPClient *http.Client
}

// New returns a Client targeting baseURL with an optional bearer token.
func New(baseURL, apiKey string) *Client {
	return &Client{
		BaseURL: strings.TrimRight(baseURL, "/"),
		APIKey:  apiKey,
		HTTPClient: &http.Client{
			Timeout: 60 * time.Second,
		},
	}
}

// Compile-time assertion.
var _ localaitools.LocalAIClient = (*Client)(nil)

// HTTPError is returned by do() for non-2xx responses. Callers should use
// errors.Is(err, ErrHTTPNotFound) instead of substring-matching on
// err.Error() — the latter is brittle to status-code formatting changes.
type HTTPError struct {
	Method     string
	Path       string
	StatusCode int
	Body       string
}

func (e *HTTPError) Error() string {
	return fmt.Sprintf("%s %s: %d %s: %s", e.Method, e.Path, e.StatusCode, http.StatusText(e.StatusCode), strings.TrimSpace(e.Body))
}

// ErrHTTPNotFound is the sentinel for "the resource you asked for doesn't
// exist". Match it via errors.Is on an *HTTPError.
var ErrHTTPNotFound = errors.New("httpapi: not found")

// Is supports errors.Is(*HTTPError, ErrHTTPNotFound). The 500-with-text
// branch is a transitional fallback for /models/jobs/:uuid which today
// returns a 500 carrying "could not find any status for ID" instead of a
// proper 404. Drop the branch when the server is fixed.
func (e *HTTPError) Is(target error) bool {
	if target != ErrHTTPNotFound {
		return false
	}
	if e.StatusCode == http.StatusNotFound {
		return true
	}
	return e.StatusCode == http.StatusInternalServerError && strings.Contains(e.Body, "could not find")
}

// ---- HTTP helpers ----

func (c *Client) do(ctx context.Context, method, path string, body any, out any) error {
	var rdr io.Reader
	if body != nil {
		raw, err := json.Marshal(body)
		if err != nil {
			return fmt.Errorf("marshal body: %w", err)
		}
		rdr = bytes.NewReader(raw)
	}

	req, err := http.NewRequestWithContext(ctx, method, c.BaseURL+path, rdr)
	if err != nil {
		return err
	}
	if body != nil {
		req.Header.Set("Content-Type", "application/json")
	}
	req.Header.Set("Accept", "application/json")
	if c.APIKey != "" {
		req.Header.Set("Authorization", "Bearer "+c.APIKey)
	}

	resp, err := c.HTTPClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		return &HTTPError{Method: method, Path: path, StatusCode: resp.StatusCode, Body: string(respBody)}
	}
	if out == nil {
		return nil
	}
	if err := json.Unmarshal(respBody, out); err != nil {
		return fmt.Errorf("decode %s %s response: %w (body=%q)", method, path, err, truncate(string(respBody), 200))
	}
	return nil
}

func truncate(s string, n int) string {
	if len(s) <= n {
		return s
	}
	return s[:n] + "..."
}

// ---- Models / gallery (read) ----

func (c *Client) GallerySearch(ctx context.Context, q localaitools.GallerySearchQuery) ([]gallery.Metadata, error) {
	// /models/available already returns []gallery.Metadata — pass it
	// through after applying the LLM-supplied filters client-side.
	var metas []gallery.Metadata
	if err := c.do(ctx, http.MethodGet, routeModelsAvail, nil, &metas); err != nil {
		return nil, err
	}
	limit := q.Limit
	if limit <= 0 {
		limit = 20
	}
	out := make([]gallery.Metadata, 0, limit)
	needle := strings.ToLower(q.Query)
	tag := strings.ToLower(q.Tag)
	for _, m := range metas {
		if q.Gallery != "" && m.Gallery.Name != q.Gallery {
			continue
		}
		if needle != "" && !contains(m.Name, needle) && !contains(m.Description, needle) && !containsTagsAny(m.Tags, needle) {
			continue
		}
		if tag != "" && !containsTagExact(m.Tags, tag) {
			continue
		}
		out = append(out, m)
		if len(out) >= limit {
			break
		}
	}
	return out, nil
}

func (c *Client) ListInstalledModels(ctx context.Context, capability localaitools.Capability) ([]localaitools.InstalledModel, error) {
	_ = capability // Capability filtering is unavailable over the welcome HTTP shape today; see TODO below.
	// /v1/models is the OpenAI-compat shape; we use the LocalAI welcome JSON
	// for richer info.
	var welcome struct {
		ModelsConfig []struct {
			Name    string `json:"name"`
			Backend string `json:"backend"`
		} `json:"ModelsConfig"`
	}
	if err := c.do(ctx, http.MethodGet, routeWelcome, nil, &welcome); err != nil {
		return nil, err
	}
	// Capability filtering is unavailable over HTTP without a dedicated endpoint
	// — for now we return everything and let the LLM filter from the names. A
	// follow-up should add a /api/models?capability=chat endpoint.
	out := make([]localaitools.InstalledModel, 0, len(welcome.ModelsConfig))
	for _, m := range welcome.ModelsConfig {
		out = append(out, localaitools.InstalledModel{Name: m.Name, Backend: m.Backend})
	}
	return out, nil
}

func (c *Client) ListGalleries(ctx context.Context) ([]config.Gallery, error) {
	// /models/galleries returns []config.Gallery directly.
	var out []config.Gallery
	if err := c.do(ctx, http.MethodGet, routeModelsGall, nil, &out); err != nil {
		return nil, err
	}
	return out, nil
}

func (c *Client) GetJobStatus(ctx context.Context, jobID string) (*localaitools.JobStatus, error) {
	if jobID == "" {
		return nil, errors.New("job id is required")
	}
	var raw struct {
		Processed          bool    `json:"processed"`
		Cancelled          bool    `json:"cancelled"`
		Progress           float64 `json:"progress"`
		Message            string  `json:"message"`
		FileSize           string  `json:"file_size"`
		DownloadedSize     string  `json:"downloaded_size"`
		Error              string  `json:"error,omitempty"`
		GalleryElementName string  `json:"gallery_element_name"`
	}
	if err := c.do(ctx, http.MethodGet, routeJobStatus(jobID), nil, &raw); err != nil {
		// "no such job" is not a real failure — surface (nil, nil) so the
		// LLM can stop polling without treating the response as an error.
		if errors.Is(err, ErrHTTPNotFound) {
			return nil, nil
		}
		return nil, err
	}
	return &localaitools.JobStatus{
		ID:                 jobID,
		Processed:          raw.Processed,
		Cancelled:          raw.Cancelled,
		Progress:           raw.Progress,
		TotalFileSize:      raw.FileSize,
		DownloadedFileSize: raw.DownloadedSize,
		Message:            raw.Message,
		ErrorMessage:       raw.Error,
	}, nil
}

// GetModelConfig is intentionally a stub for the HTTP client: LocalAI's
// /models/edit/:name endpoint returns rendered HTML, not JSON, so the
// standalone CLI's `get_model_config` tool surfaces a clear error to the
// LLM. Tracked under the localai-assistant follow-ups (see
// .agents/localai-assistant-mcp.md) — once a JSON-only
// GET /api/models/config-yaml/:name endpoint lands on the server, this
// method calls it and the stub goes away.
//
// FIXME(localai-assistant): wire to a JSON read-back endpoint.
func (c *Client) GetModelConfig(_ context.Context, _ string) (*localaitools.ModelConfigView, error) {
	return nil, errors.New("get_model_config over HTTP not yet supported by this client; use the in-process inproc client or REST /models/edit/{name}")
}

// ---- Models / gallery (write) ----

func (c *Client) InstallModel(ctx context.Context, req localaitools.InstallModelRequest) (string, error) {
	body := map[string]any{"id": req.ModelName}
	if req.GalleryName != "" {
		body["id"] = req.GalleryName + "@" + req.ModelName
	}
	body["name"] = req.ModelName
	if len(req.Overrides) > 0 {
		body["overrides"] = req.Overrides
	}
	var resp struct {
		ID        string `json:"uuid"`
		StatusURL string `json:"status"`
	}
	if err := c.do(ctx, http.MethodPost, routeModelsApply, body, &resp); err != nil {
		return "", err
	}
	return resp.ID, nil
}

func (c *Client) ImportModelURI(ctx context.Context, req localaitools.ImportModelURIRequest) (*localaitools.ImportModelURIResponse, error) {
	if req.URI == "" {
		return nil, errors.New("uri is required")
	}
	body := map[string]any{"uri": req.URI}
	if req.BackendPreference != "" {
		// Server expects preferences as a JSON object; wrap the backend
		// preference accordingly.
		body["preferences"] = map[string]string{"backend": req.BackendPreference}
	}

	rawReq, err := json.Marshal(body)
	if err != nil {
		return nil, fmt.Errorf("marshal body: %w", err)
	}
	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+routeModelsImport, bytes.NewReader(rawReq))
	if err != nil {
		return nil, err
	}
	httpReq.Header.Set("Content-Type", "application/json")
	httpReq.Header.Set("Accept", "application/json")
	if c.APIKey != "" {
		httpReq.Header.Set("Authorization", "Bearer "+c.APIKey)
	}
	resp, err := c.HTTPClient.Do(httpReq)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	respBody, _ := io.ReadAll(resp.Body)

	// 400 with `error: "ambiguous import"` is not a transport error — it's the
	// disambiguation signal. Translate it back into AmbiguousBackend so the
	// MCP layer surface stays identical regardless of in-process vs HTTP.
	if resp.StatusCode == http.StatusBadRequest {
		var amb struct {
			Error      string   `json:"error"`
			Detail     string   `json:"detail"`
			Modality   string   `json:"modality"`
			Candidates []string `json:"candidates"`
			Hint       string   `json:"hint"`
		}
		if json.Unmarshal(respBody, &amb) == nil && amb.Error == "ambiguous import" {
			return &localaitools.ImportModelURIResponse{
				AmbiguousBackend:  true,
				Modality:          amb.Modality,
				BackendCandidates: amb.Candidates,
				Hint:              amb.Hint,
			}, nil
		}
	}
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		return nil, fmt.Errorf("POST %s: %d %s: %s", routeModelsImport, resp.StatusCode, http.StatusText(resp.StatusCode), strings.TrimSpace(string(respBody)))
	}

	var raw struct {
		ID string `json:"uuid"`
	}
	if err := json.Unmarshal(respBody, &raw); err != nil {
		return nil, fmt.Errorf("decode import response: %w", err)
	}
	return &localaitools.ImportModelURIResponse{JobID: raw.ID}, nil
}

func (c *Client) DeleteModel(ctx context.Context, name string) error {
	return c.do(ctx, http.MethodPost, routeModelDelete(name), nil, nil)
}

func (c *Client) EditModelConfig(ctx context.Context, name string, patch map[string]any) error {
	return c.do(ctx, http.MethodPatch, routeModelConfigJSON(name), patch, nil)
}

func (c *Client) ReloadModels(ctx context.Context) error {
	return c.do(ctx, http.MethodPost, routeModelsReload, nil, nil)
}

// ---- Backends ----

func (c *Client) ListBackends(ctx context.Context) ([]localaitools.Backend, error) {
	var raw []struct {
		Name      string `json:"name"`
		Installed bool   `json:"installed"`
	}
	if err := c.do(ctx, http.MethodGet, routeBackends, nil, &raw); err != nil {
		return nil, err
	}
	out := make([]localaitools.Backend, 0, len(raw))
	for _, b := range raw {
		out = append(out, localaitools.Backend{Name: b.Name, Installed: b.Installed})
	}
	return out, nil
}

func (c *Client) ListKnownBackends(ctx context.Context) ([]schema.KnownBackend, error) {
	// /backends/known emits []schema.KnownBackend directly — pass through.
	var out []schema.KnownBackend
	if err := c.do(ctx, http.MethodGet, routeBackendsKnown, nil, &out); err != nil {
		return nil, err
	}
	return out, nil
}

func (c *Client) InstallBackend(ctx context.Context, req localaitools.InstallBackendRequest) (string, error) {
	body := map[string]any{"id": req.BackendName}
	if req.GalleryName != "" {
		body["id"] = req.GalleryName + "@" + req.BackendName
	}
	body["name"] = req.BackendName
	var resp struct {
		ID string `json:"uuid"`
	}
	if err := c.do(ctx, http.MethodPost, routeBackendsApply, body, &resp); err != nil {
		return "", err
	}
	return resp.ID, nil
}

func (c *Client) UpgradeBackend(ctx context.Context, name string) (string, error) {
	var resp struct {
		ID string `json:"uuid"`
	}
	if err := c.do(ctx, http.MethodPost, routeBackendUpgrade(name), nil, &resp); err != nil {
		return "", err
	}
	return resp.ID, nil
}

// ---- System ----

func (c *Client) SystemInfo(ctx context.Context) (*localaitools.SystemInfo, error) {
	var welcome struct {
		Version           string   `json:"Version"`
		LoadedModels      []any    `json:"LoadedModels"`
		InstalledBackends map[string]bool `json:"InstalledBackends"`
	}
	if err := c.do(ctx, http.MethodGet, routeWelcome, nil, &welcome); err != nil {
		return nil, err
	}
	info := &localaitools.SystemInfo{Version: welcome.Version}
	for name := range welcome.InstalledBackends {
		info.InstalledBackends = append(info.InstalledBackends, name)
	}
	// LoadedModels shape varies; we don't attempt to decode it client-side.
	return info, nil
}

func (c *Client) ListNodes(ctx context.Context) ([]localaitools.Node, error) {
	var raw []struct {
		ID          string `json:"id"`
		Address     string `json:"address"`
		HTTPAddress string `json:"http_address"`
		Status      string `json:"status"`
	}
	if err := c.do(ctx, http.MethodGet, routeNodes, nil, &raw); err != nil {
		// Treat 404/disabled as "no nodes" to keep parity with single-process.
		if errors.Is(err, ErrHTTPNotFound) {
			return []localaitools.Node{}, nil
		}
		return nil, err
	}
	out := make([]localaitools.Node, 0, len(raw))
	for _, n := range raw {
		out = append(out, localaitools.Node{
			ID:          n.ID,
			Address:     n.Address,
			HTTPAddress: n.HTTPAddress,
			Healthy:     n.Status == "healthy",
		})
	}
	return out, nil
}

func (c *Client) VRAMEstimate(ctx context.Context, req localaitools.VRAMEstimateRequest) (*vram.EstimateResult, error) {
	body := map[string]any{"model": req.ModelName}
	if req.ContextSize > 0 {
		body["context_size"] = req.ContextSize
	}
	if req.GPULayers != 0 {
		body["gpu_layers"] = req.GPULayers
	}
	if req.KVQuantBits > 0 {
		body["kv_quant_bits"] = req.KVQuantBits
	}
	// /api/models/vram-estimate returns a wrapper carrying vram.EstimateResult
	// (size_bytes/size_display/vram_bytes/vram_display) plus context-note
	// fields. Decode directly into EstimateResult — the LLM gets the
	// pre-formatted display strings, identical to REST.
	var out vram.EstimateResult
	if err := c.do(ctx, http.MethodPost, routeVRAMEstimate, body, &out); err != nil {
		return nil, err
	}
	return &out, nil
}

// ---- State ----

func (c *Client) ToggleModelState(ctx context.Context, name string, action modeladmin.Action) error {
	return c.do(ctx, http.MethodPut, routeToggleModelState(name, string(action)), nil, nil)
}

func (c *Client) ToggleModelPinned(ctx context.Context, name string, action modeladmin.Action) error {
	return c.do(ctx, http.MethodPut, routeToggleModelPinned(name, string(action)), nil, nil)
}

// ---- Branding ----

// brandingResponse mirrors the JSON shape emitted by GET /api/branding.
// We don't import the server-side type here so the MCP HTTP client stays
// independent of the localai endpoint package.
type brandingResponse struct {
	InstanceName      string `json:"instance_name"`
	InstanceTagline   string `json:"instance_tagline"`
	LogoURL           string `json:"logo_url"`
	LogoHorizontalURL string `json:"logo_horizontal_url"`
	FaviconURL        string `json:"favicon_url"`
}

func (c *Client) GetBranding(ctx context.Context) (*localaitools.Branding, error) {
	var raw brandingResponse
	if err := c.do(ctx, http.MethodGet, routeBranding, nil, &raw); err != nil {
		return nil, err
	}
	return (*localaitools.Branding)(&raw), nil
}

func (c *Client) SetBranding(ctx context.Context, req localaitools.SetBrandingRequest) (*localaitools.Branding, error) {
	// Text fields ride the existing /api/settings POST, which maps the
	// pointer fields onto RuntimeSettings.InstanceName / InstanceTagline.
	body := map[string]any{}
	if req.InstanceName != nil {
		body["instance_name"] = *req.InstanceName
	}
	if req.InstanceTagline != nil {
		body["instance_tagline"] = *req.InstanceTagline
	}
	if len(body) == 0 {
		return c.GetBranding(ctx)
	}
	if err := c.do(ctx, http.MethodPost, routeSettings, body, nil); err != nil {
		return nil, err
	}
	return c.GetBranding(ctx)
}

// ---- helpers ----

func contains(haystack, lowerNeedle string) bool {
	return strings.Contains(strings.ToLower(haystack), lowerNeedle)
}

func containsTagsAny(tags []string, lowerNeedle string) bool {
	for _, t := range tags {
		if strings.Contains(strings.ToLower(t), lowerNeedle) {
			return true
		}
	}
	return false
}

func containsTagExact(tags []string, lowerNeedle string) bool {
	for _, t := range tags {
		if strings.EqualFold(t, lowerNeedle) {
			return true
		}
	}
	return false
}