mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-04 05:16:42 -04:00
Realtime sessions previously lazy-loaded each pipeline sub-model (VAD,
transcription, LLM, TTS) on first use, so every cold session paid a
per-request model-load stall and load errors only surfaced mid-stream.
Warm the whole pipeline eagerly and blockingly at session start
(including the voice-gate speaker-recognition model, which an enforced
gate blocks each utterance on; compaction's summary_model stays lazy
since it only runs off the response path):
- Add backend.PreloadModel / PreloadModelByName as the single load path
for every modality (no transcription special-case; backend-omitted
configs are deprecated).
- The realtime session blocks on Model.Warmup and returns a
model_load_error to the client if any stage fails to load;
updateSession warms in the background. Opt out per pipeline with
pipeline.disable_warmup, exposed as a UI toggle via the
config-metadata registry.
Add a LocalAI-native POST /backend/load (and /v1/backend/load) that
pre-loads a model -- expanding realtime pipelines into their sub-models
-- as the inverse of /backend/shutdown. There is one preload engine
(backend.PreloadStages): the realtime Warmup methods, /backend/load and
the --load-to-memory startup flag all use it, so --load-to-memory now
also expands pipeline models and records load-failure traces. Pipeline
sub-model alias resolution is likewise shared
(ModelConfigLoader.LoadResolvedModelConfig). Surface the endpoint
everywhere an admin manages models:
- MCP admin tool load_model (httpapi + inproc clients, safety/catalog
prompts, catalog/dispatch tests).
- "Load into memory" action in the React models UI.
- Swagger regenerated; docs moved to the general backend-monitor page
since it is not realtime-specific.
Fix a Traces UI crash ("json: unsupported value: -Inf"): audio-snippet
RMS/peak now floor at a finite dBFS, and backend-trace data is sanitized
to drop non-finite floats before marshaling. The sanitizer is
copy-on-write -- it runs on every RecordBackendTrace, so containers are
only re-allocated on the paths that actually changed.
Migrate core/http/openresponses_test.go onto the prebuilt mock-backend
the rest of the http suite already uses -- it was the last spec still
pointing at a real HuggingFace model, so it 404'd wherever no vision
backend was built -- and fix its item_reference specs to send the
spec's "id" field instead of "item_id", which the handler never
accepted.
Assisted-by: Claude:claude-opus-4-8 Claude Code
Signed-off-by: Richard Palethorpe <io@richiejp.com>
99 lines
4.4 KiB
Go
99 lines
4.4 KiB
Go
package localaitools
|
|
|
|
import (
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
// toolToHTTPRoute is the canonical mapping between MCP tools and the
|
|
// LocalAI admin REST endpoints they wrap. The httpapi.Client MUST hit the
|
|
// listed route for the tool; the inproc.Client may bypass HTTP and call
|
|
// services directly, but the on-the-wire shape is documented here so the
|
|
// two sides stay aligned.
|
|
//
|
|
// Updating the map is REQUIRED when:
|
|
// - You add a Tool* constant (tools.go).
|
|
// - You change which REST endpoint the httpapi.Client calls.
|
|
//
|
|
// The TestToolHTTPRouteMappingComplete spec below FAILS until every Tool*
|
|
// is in the map. That is the drift detector — see
|
|
// .agents/localai-assistant-mcp.md for the contributor contract.
|
|
//
|
|
// "(none)" is a deliberate sentinel for tools whose data is not exposed
|
|
// over a single REST endpoint (e.g. system_info aggregates data the
|
|
// inproc client picks up directly from services). The httpapi.Client may
|
|
// approximate via the welcome JSON; the test still requires an entry so
|
|
// the contributor explicitly acknowledges the asymmetry.
|
|
var toolToHTTPRoute = map[string]string{
|
|
// Read-only tools.
|
|
ToolGallerySearch: "GET /models/available",
|
|
ToolListInstalledModels: "GET / (welcome JSON, ModelsConfig field)",
|
|
ToolListGalleries: "GET /models/galleries",
|
|
ToolGetJobStatus: "GET /models/jobs/:uuid",
|
|
ToolGetModelConfig: "(none) — no JSON-only REST yet; httpapi.Client returns a documented stub",
|
|
ToolListBackends: "GET /backends",
|
|
ToolListKnownBackends: "GET /backends/known",
|
|
ToolSystemInfo: "GET / (welcome JSON)",
|
|
ToolListNodes: "GET /api/nodes",
|
|
ToolVRAMEstimate: "POST /api/models/vram-estimate",
|
|
ToolGetBranding: "GET /api/branding",
|
|
ToolGetUsageStats: "GET /api/usage (or /api/usage/all when all=true)",
|
|
ToolGetPIIEvents: "GET /api/pii/events",
|
|
ToolGetMiddlewareStatus: "GET /api/middleware/status",
|
|
ToolGetRouterDecisions: "GET /api/router/decisions",
|
|
ToolListAliases: "GET /api/aliases",
|
|
|
|
// Mutating tools.
|
|
ToolInstallModel: "POST /models/apply",
|
|
ToolImportModelURI: "POST /models/import-uri",
|
|
ToolDeleteModel: "POST /models/delete/:name",
|
|
ToolEditModelConfig: "PATCH /api/models/config-json/:name",
|
|
ToolReloadModels: "POST /models/reload",
|
|
ToolLoadModel: "POST /backend/load",
|
|
ToolInstallBackend: "POST /backends/apply",
|
|
ToolUpgradeBackend: "POST /backends/upgrade/:name",
|
|
ToolToggleModelState: "PUT /models/toggle-state/:name/:action",
|
|
ToolToggleModelPinned: "PUT /models/toggle-pinned/:name/:action",
|
|
ToolSetBranding: "POST /api/settings (instance_name, instance_tagline)",
|
|
ToolSetAlias: "PATCH /api/models/config-json/:name (swap) or POST /models/import (create)",
|
|
}
|
|
|
|
// allKnownTools is the union of expectedFullCatalog (defined in
|
|
// server_test.go). Keeping a single source of truth — the slice from
|
|
// server_test — and asserting the route map covers every entry catches
|
|
// the case "you added a Tool* but forgot to register it as MCP" indirectly
|
|
// (it'd be missing from expectedFullCatalog, which has its own assertion
|
|
// in TestServerRegistersExpectedToolCatalog).
|
|
var _ = Describe("Tool ↔ HTTP route coverage map", func() {
|
|
It("has an entry for every Tool* in the published catalog", func() {
|
|
for _, name := range expectedFullCatalog {
|
|
_, ok := toolToHTTPRoute[name]
|
|
Expect(ok).To(BeTrue(),
|
|
"Tool %q is in expectedFullCatalog but not in toolToHTTPRoute. "+
|
|
"When adding an MCP tool, update toolToHTTPRoute in coverage_test.go "+
|
|
"with the REST endpoint the httpapi.Client calls (or '(none)' with a reason).",
|
|
name)
|
|
}
|
|
})
|
|
|
|
It("does not document tools that no longer exist in the catalog", func() {
|
|
catalog := map[string]struct{}{}
|
|
for _, name := range expectedFullCatalog {
|
|
catalog[name] = struct{}{}
|
|
}
|
|
for name := range toolToHTTPRoute {
|
|
_, ok := catalog[name]
|
|
Expect(ok).To(BeTrue(),
|
|
"toolToHTTPRoute documents %q but the tool is not registered. "+
|
|
"Remove the stale entry.",
|
|
name)
|
|
}
|
|
})
|
|
|
|
// Deliberate non-test: we don't enumerate admin REST routes here. That
|
|
// would require booting Application or parsing core/http/routes/localai.go,
|
|
// both of which are brittle. The contract for "new admin REST endpoint
|
|
// → MCP tool" is enforced by the PR checklist in
|
|
// .agents/api-endpoints-and-auth.md, not by this test.
|
|
})
|