Files
LocalAI/pkg/mcp/localaitools/tools_models.go
Richard Palethorpe eb32cd9073 feat(realtime): eager blocking pipeline warm-up + /backend/load API (#10662)
Realtime sessions previously lazy-loaded each pipeline sub-model (VAD,
transcription, LLM, TTS) on first use, so every cold session paid a
per-request model-load stall and load errors only surfaced mid-stream.

Warm the whole pipeline eagerly and blockingly at session start
(including the voice-gate speaker-recognition model, which an enforced
gate blocks each utterance on; compaction's summary_model stays lazy
since it only runs off the response path):
- Add backend.PreloadModel / PreloadModelByName as the single load path
  for every modality (no transcription special-case; backend-omitted
  configs are deprecated).
- The realtime session blocks on Model.Warmup and returns a
  model_load_error to the client if any stage fails to load;
  updateSession warms in the background. Opt out per pipeline with
  pipeline.disable_warmup, exposed as a UI toggle via the
  config-metadata registry.

Add a LocalAI-native POST /backend/load (and /v1/backend/load) that
pre-loads a model -- expanding realtime pipelines into their sub-models
-- as the inverse of /backend/shutdown. There is one preload engine
(backend.PreloadStages): the realtime Warmup methods, /backend/load and
the --load-to-memory startup flag all use it, so --load-to-memory now
also expands pipeline models and records load-failure traces. Pipeline
sub-model alias resolution is likewise shared
(ModelConfigLoader.LoadResolvedModelConfig). Surface the endpoint
everywhere an admin manages models:
- MCP admin tool load_model (httpapi + inproc clients, safety/catalog
  prompts, catalog/dispatch tests).
- "Load into memory" action in the React models UI.
- Swagger regenerated; docs moved to the general backend-monitor page
  since it is not realtime-specific.

Fix a Traces UI crash ("json: unsupported value: -Inf"): audio-snippet
RMS/peak now floor at a finite dBFS, and backend-trace data is sanitized
to drop non-finite floats before marshaling. The sanitizer is
copy-on-write -- it runs on every RecordBackendTrace, so containers are
only re-allocated on the paths that actually changed.

Migrate core/http/openresponses_test.go onto the prebuilt mock-backend
the rest of the http suite already uses -- it was the last spec still
pointing at a real HuggingFace model, so it 404'd wherever no vision
backend was built -- and fix its item_reference specs to send the
spec's "id" field instead of "item_id", which the handler never
accepted.

Assisted-by: Claude:claude-opus-4-8 Claude Code

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-07-03 18:00:37 +02:00

130 lines
5.4 KiB
Go

package localaitools
import (
"context"
"github.com/modelcontextprotocol/go-sdk/mcp"
)
func registerModelTools(s *mcp.Server, client LocalAIClient, opts Options) {
mcp.AddTool(s, &mcp.Tool{
Name: ToolGallerySearch,
Description: "Search configured galleries for installable models. Returns name, gallery, description, license and tags. Always run this before install_model.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args GallerySearchQuery) (*mcp.CallToolResult, any, error) {
hits, err := client.GallerySearch(ctx, args)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(hits), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolListInstalledModels,
Description: "List models currently installed on this LocalAI. Optional capability filter (chat, completion, embeddings, image, tts, transcript, rerank, vad).",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args struct {
Capability Capability `json:"capability,omitempty" jsonschema:"Filter to models advertising this capability. One of: chat, completion, embeddings, image, tts, transcript, rerank, vad. Empty value = no filter."`
}) (*mcp.CallToolResult, any, error) {
models, err := client.ListInstalledModels(ctx, args.Capability)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(models), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolListGalleries,
Description: "List configured model galleries.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) {
galleries, err := client.ListGalleries(ctx)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(galleries), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolGetJobStatus,
Description: "Poll the status of an install/delete/upgrade job by id. Returns processed, progress, message, and error fields.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args struct {
JobID string `json:"job_id" jsonschema:"The job id returned by install_model / install_backend / upgrade_backend / delete_model."`
}) (*mcp.CallToolResult, any, error) {
if args.JobID == "" {
return errorResultf("job_id is required"), nil, nil
}
status, err := client.GetJobStatus(ctx, args.JobID)
if err != nil {
return errorResult(err), nil, nil
}
if status == nil {
return errorResultf("no job with id %q", args.JobID), nil, nil
}
return jsonResult(status), nil, nil
})
if opts.DisableMutating {
return
}
mcp.AddTool(s, &mcp.Tool{
Name: ToolLoadModel,
Description: "Pre-load a model into memory by name so the first request pays no cold-start cost (the inverse of shutting a model down). For a realtime pipeline model every configured sub-model (VAD, transcription, LLM, TTS, sound_detection, voice_recognition) is loaded. Returns the model names that became resident. Requires user confirmation per safety rule 1.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args struct {
Model string `json:"model" jsonschema:"The installed model name to load into memory."`
}) (*mcp.CallToolResult, any, error) {
if args.Model == "" {
return errorResultf("model is required"), nil, nil
}
loaded, err := client.LoadModel(ctx, args.Model)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(map[string]any{"loaded": loaded}), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolInstallModel,
Description: "Install a model from a gallery. Requires explicit user confirmation per safety rule 1. Returns a job id; poll with get_job_status.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args InstallModelRequest) (*mcp.CallToolResult, any, error) {
// Empty-string check at the tool layer: the SDK schema validator
// only enforces presence, not non-empty, and we want a consistent
// error regardless of which LocalAIClient backs the tool.
if args.ModelName == "" {
return errorResultf("model_name is required"), nil, nil
}
jobID, err := client.InstallModel(ctx, args)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(map[string]any{"job_id": jobID}), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolImportModelURI,
Description: "Import a model from a URI (HuggingFace link, OCI image, file path, or HTTP URL). The importer auto-detects the backend; when multiple backends could handle the source, the response sets ambiguous_backend=true and lists candidates. Surface them to the user, then call again with backend_preference set. Requires user confirmation per safety rule 1.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args ImportModelURIRequest) (*mcp.CallToolResult, any, error) {
if args.URI == "" {
return errorResultf("uri is required"), nil, nil
}
resp, err := client.ImportModelURI(ctx, args)
if err != nil {
return errorResult(err), nil, nil
}
return jsonResult(resp), nil, nil
})
mcp.AddTool(s, &mcp.Tool{
Name: ToolDeleteModel,
Description: "Delete an installed model by name. Requires explicit user confirmation per safety rule 1.",
}, func(ctx context.Context, _ *mcp.CallToolRequest, args struct {
Name string `json:"name" jsonschema:"The installed model name."`
}) (*mcp.CallToolResult, any, error) {
if args.Name == "" {
return errorResultf("name is required"), nil, nil
}
if err := client.DeleteModel(ctx, args.Name); err != nil {
return errorResult(err), nil, nil
}
return jsonResult(map[string]any{"deleted": args.Name}), nil, nil
})
}