Compare commits

..

3 Commits

Author SHA1 Message Date
Ettore Di Giacinto
659636195c deterministic builds
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-01 19:45:31 +00:00
Ettore Di Giacinto
a7a142b651 refactor, macOS fixes
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-01 19:42:16 +00:00
Ettore Di Giacinto
e502e51d78 feat(llama.cpp): add turboquant support
This PR adds patchset from the great work of @TheTom in
https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline
that updates the patches against upstream automatically.

It also creates necessary scaffolding for doing this with other patches
sources.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-01 17:57:03 +00:00
119 changed files with 617 additions and 8500 deletions

View File

@@ -133,7 +133,6 @@ func getRealReadme(ctx context.Context, repository string) (string, error) {
result, err := cogito.ExecuteTools(llm, fragment,
cogito.WithIterations(3),
cogito.WithMaxAttempts(3),
cogito.DisableSinkState,
cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
if err != nil {
return "", err

View File

@@ -79,20 +79,7 @@ func generateYAMLEntry(model ProcessedModel, quantization string) string {
description = cleanTextContent(description)
formattedDescription := formatTextContent(description)
// Strip name and description from config file since they are
// already present at the gallery entry level and should not
// appear under overrides.
configFileContent := modelConfig.ConfigFile
var cfgMap map[string]any
if err := yaml.Unmarshal([]byte(configFileContent), &cfgMap); err == nil {
delete(cfgMap, "name")
delete(cfgMap, "description")
if cleaned, err := yaml.Marshal(cfgMap); err == nil {
configFileContent = string(cleaned)
}
}
configFile := formatTextContent(configFileContent)
configFile := formatTextContent(modelConfig.ConfigFile)
filesYAML, _ := yaml.Marshal(modelConfig.Files)

View File

@@ -17,7 +17,7 @@ func runSyntheticMode() error {
fmt.Printf("Generating %d synthetic models for testing...\n", numModels)
var models []ProcessedModel
for range numModels {
for i := range numModels {
model := generator.GenerateProcessedModel()
models = append(models, model)
fmt.Printf("Generated synthetic model: %s\n", model.ModelID)

View File

@@ -14,6 +14,10 @@ jobs:
variable: "LLAMA_VERSION"
branch: "master"
file: "backend/cpp/llama-cpp/Makefile"
- repository: "TheTom/llama-cpp-turboquant"
variable: "TURBOQUANT_VERSION"
branch: "feature/turboquant-kv-cache"
file: "backend/cpp/llama-cpp/Makefile"
- repository: "ggml-org/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
@@ -63,6 +67,3 @@ jobs:
branch: "update/${{ matrix.variable }}"
body: ${{ steps.bump.outputs.message }}
signoff: true

View File

@@ -55,7 +55,7 @@ jobs:
- name: Run gallery agent
env:
#OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
OPENAI_MODEL: Qwen3.5-2B-GGUF
OPENAI_MODE: Qwen3.5-2B-GGUF
OPENAI_BASE_URL: "http://localhost:8080"
OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
#OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}

View File

@@ -42,38 +42,16 @@ Created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
> [:book: Documentation](https://localai.io/) | [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) | [💻 Quickstart](https://localai.io/basics/getting_started/) | [🖼️ Models](https://models.localai.io/) | [❓FAQ](https://localai.io/faq/)
## Guided tour
## Screenshots
### Chat, Model gallery
https://github.com/user-attachments/assets/08cbb692-57da-48f7-963d-2e7b43883c18
<details>
<summary>
Click to see more!
</summary>
#### User and auth
https://github.com/user-attachments/assets/228fa9ad-81a3-4d43-bfb9-31557e14a36c
#### Agents
### Agents
https://github.com/user-attachments/assets/6270b331-e21d-4087-a540-6290006b381a
#### Usage metrics per user
https://github.com/user-attachments/assets/cbb03379-23b4-4e3d-bd26-d152f057007f
#### Fine-tuning and Quantization
https://github.com/user-attachments/assets/5ba4ace9-d3df-4795-b7d4-b0b404ea71ee
#### WebRTC
https://github.com/user-attachments/assets/ed88e34c-fed3-4b83-8a67-4716a9feeb7b
</details>
## Quickstart
### macOS

View File

@@ -1,7 +1,9 @@
LLAMA_VERSION?=b8635075ffe27b135c49afb9a8b5c434bd42c502
LLAMA_VERSION?=0fcb3760b2b9a3a496ef14621a7e4dad7a8df90f
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
TURBOQUANT_VERSION?=8ad0f00e9a38df6c29fc10363341dde300f92ae4
CMAKE_ARGS?=
BUILD_TYPE?=
NATIVE?=false

View File

@@ -40,41 +40,45 @@ using grpc::ServerBuilder;
using grpc::ServerContext;
using grpc::Status;
// gRPC bearer token auth for distributed mode.
// gRPC bearer token auth via AuthMetadataProcessor for distributed mode.
// Reads LOCALAI_GRPC_AUTH_TOKEN from the environment. When set, rejects
// requests without a matching "authorization: Bearer <token>" metadata header.
class TokenAuthMetadataProcessor : public grpc::AuthMetadataProcessor {
public:
explicit TokenAuthMetadataProcessor(const std::string& token) : token_(token) {}
// Cached auth token — empty means auth is disabled.
static std::string g_grpc_auth_token;
bool IsBlocking() const override { return false; }
// Minimal constant-time comparison (avoids OpenSSL dependency)
static int ct_memcmp(const void* a, const void* b, size_t n) {
const unsigned char* pa = static_cast<const unsigned char*>(a);
const unsigned char* pb = static_cast<const unsigned char*>(b);
unsigned char result = 0;
for (size_t i = 0; i < n; i++) {
result |= pa[i] ^ pb[i];
}
return result;
}
// Returns OK when auth is disabled or the token matches.
static grpc::Status checkAuth(grpc::ServerContext* context) {
if (g_grpc_auth_token.empty()) {
return grpc::Status::OK;
}
auto metadata = context->client_metadata();
auto it = metadata.find("authorization");
if (it != metadata.end()) {
std::string expected = "Bearer " + g_grpc_auth_token;
std::string got(it->second.data(), it->second.size());
if (expected.size() == got.size() &&
ct_memcmp(expected.data(), got.data(), expected.size()) == 0) {
return grpc::Status::OK;
grpc::Status Process(const InputMetadata& auth_metadata,
grpc::AuthContext* /*context*/,
OutputMetadata* /*consumed_auth_metadata*/,
OutputMetadata* /*response_metadata*/) override {
auto it = auth_metadata.find("authorization");
if (it != auth_metadata.end()) {
std::string expected = "Bearer " + token_;
std::string got(it->second.data(), it->second.size());
// Constant-time comparison
if (expected.size() == got.size() && ct_memcmp(expected.data(), got.data(), expected.size()) == 0) {
return grpc::Status::OK;
}
}
return grpc::Status(grpc::StatusCode::UNAUTHENTICATED, "invalid token");
}
return grpc::Status(grpc::StatusCode::UNAUTHENTICATED, "invalid token");
}
private:
std::string token_;
// Minimal constant-time comparison (avoids OpenSSL dependency)
static int ct_memcmp(const void* a, const void* b, size_t n) {
const unsigned char* pa = static_cast<const unsigned char*>(a);
const unsigned char* pb = static_cast<const unsigned char*>(b);
unsigned char result = 0;
for (size_t i = 0; i < n; i++) {
result |= pa[i] ^ pb[i];
}
return result;
}
};
// END LocalAI
@@ -753,17 +757,13 @@ private:
public:
BackendServiceImpl(server_context& ctx) : ctx_server(ctx) {}
grpc::Status Health(ServerContext* context, const backend::HealthMessage* /*request*/, backend::Reply* reply) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
grpc::Status Health(ServerContext* /*context*/, const backend::HealthMessage* /*request*/, backend::Reply* reply) override {
// Implement Health RPC
reply->set_message("OK");
return Status::OK;
}
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
grpc::Status LoadModel(ServerContext* /*context*/, const backend::ModelOptions* request, backend::Result* result) override {
// Implement LoadModel RPC
common_params params;
params_parse(ctx_server, request, params);
@@ -962,8 +962,6 @@ public:
}
grpc::Status PredictStream(grpc::ServerContext* context, const backend::PredictOptions* request, grpc::ServerWriter<backend::Reply>* writer) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
if (params_base.model.path.empty()) {
return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
}
@@ -1608,18 +1606,8 @@ public:
auto attach_chat_deltas = [](backend::Reply & reply, server_task_result * raw_result) {
// Try streaming partial result first
auto* partial = dynamic_cast<server_task_result_cmpl_partial*>(raw_result);
if (partial) {
if (!partial->oaicompat_msg_diffs.empty()) {
populate_chat_deltas_from_diffs(reply, partial->oaicompat_msg_diffs);
} else if (partial->is_updated) {
// Autoparser is active but hasn't classified this chunk yet
// (PEG parser warming up). Clear the raw message so the Go
// side doesn't try to parse partial tag tokens (e.g. "<|channel>"
// before the full "<|channel>thought\n" is received).
// This matches llama.cpp server behavior which only emits SSE
// chunks when the parser produces diffs.
reply.set_message("");
}
if (partial && !partial->oaicompat_msg_diffs.empty()) {
populate_chat_deltas_from_diffs(reply, partial->oaicompat_msg_diffs);
return;
}
// Try final result
@@ -1677,8 +1665,6 @@ public:
}
grpc::Status Predict(ServerContext* context, const backend::PredictOptions* request, backend::Reply* reply) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
if (params_base.model.path.empty()) {
return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
}
@@ -2397,8 +2383,6 @@ public:
}
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
if (params_base.model.path.empty()) {
return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
}
@@ -2579,9 +2563,7 @@ public:
return grpc::Status::OK;
}
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
grpc::Status TokenizeString(ServerContext* /*context*/, const backend::PredictOptions* request, backend::TokenizationResponse* response) override {
if (params_base.model.path.empty()) {
return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
}
@@ -2821,14 +2803,19 @@ int main(int argc, char** argv) {
BackendServiceImpl service(ctx_server);
ServerBuilder builder;
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
// Initialize bearer token auth if LOCALAI_GRPC_AUTH_TOKEN is set
// Add bearer token auth via AuthMetadataProcessor if LOCALAI_GRPC_AUTH_TOKEN is set
const char* auth_token = std::getenv("LOCALAI_GRPC_AUTH_TOKEN");
std::shared_ptr<grpc::ServerCredentials> creds;
if (auth_token != nullptr && auth_token[0] != '\0') {
g_grpc_auth_token = auth_token;
creds = grpc::InsecureServerCredentials();
creds->SetAuthMetadataProcessor(
std::make_shared<TokenAuthMetadataProcessor>(auth_token));
std::cout << "gRPC auth enabled via LOCALAI_GRPC_AUTH_TOKEN" << std::endl;
} else {
creds = grpc::InsecureServerCredentials();
}
builder.AddListeningPort(server_address, creds);
builder.RegisterService(&service);
builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB

View File

@@ -0,0 +1,14 @@
# Patch sources for the llama-cpp backend.
# Each source declares a fork whose commits are extracted as patches
# and applied on top of upstream llama.cpp during the build.
# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
#
# version_var: Makefile variable with the pinned fork commit SHA
# base_var: Makefile variable with the upstream base commit SHA
# Both are read from version_file (relative to backend dir) to compute the diff.
sources:
- name: turboquant
repo: https://github.com/TheTom/llama-cpp-turboquant.git
version_var: TURBOQUANT_VERSION
base_var: LLAMA_VERSION
version_file: Makefile

View File

@@ -1,17 +1,13 @@
#!/bin/bash
## Patches
## Apply patches from the `patches` directory
if [ -d "patches" ]; then
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
fi
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$SCRIPT_DIR/../../.."
## Apply patches from sources and/or local .patch files
"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
## Copy server files into grpc-server build directory
for file in $(ls llama.cpp/tools/server/); do
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
done
@@ -28,4 +24,3 @@ else
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
fi
set -e

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=87ecb95cbc65dc8e58e3d88f4f4a59a0939796f5
STABLEDIFFUSION_GGML_VERSION?=09b12d5f6d51d862749e8e0ee8baac8f012089e2
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -36,27 +36,6 @@ type TokenUsage struct {
Completion int
TimingPromptProcessing float64
TimingTokenGeneration float64
ChatDeltas []*proto.ChatDelta // per-chunk deltas from C++ autoparser (only set during streaming)
}
// HasChatDeltaContent returns true if any chat delta carries content or reasoning text.
// Used to decide whether to prefer C++ autoparser deltas over Go-side tag extraction.
func (t TokenUsage) HasChatDeltaContent() bool {
for _, d := range t.ChatDeltas {
if d.Content != "" || d.ReasoningContent != "" {
return true
}
}
return false
}
// ChatDeltaReasoningAndContent extracts accumulated reasoning and content from chat deltas.
func (t TokenUsage) ChatDeltaReasoningAndContent() (reasoning, content string) {
for _, d := range t.ChatDeltas {
content += d.Content
reasoning += d.ReasoningContent
}
return reasoning, content
}
// ModelInferenceFunc is a test-friendly indirection to call model inference logic.
@@ -192,9 +171,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
allChatDeltas = append(allChatDeltas, reply.ChatDeltas...)
}
// Attach per-chunk chat deltas to tokenUsage so the callback can use them
tokenUsage.ChatDeltas = reply.ChatDeltas
// Parse logprobs from reply if present (collect from last chunk that has them)
if len(reply.Logprobs) > 0 {
var parsedLogprobs schema.Logprobs
@@ -224,9 +200,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
if len(msg) == 0 {
tokenCallback("", tokenUsage)
}
// Clear per-chunk deltas so they don't leak to the next chunk
tokenUsage.ChatDeltas = nil
})
if len(allChatDeltas) > 0 {
xlog.Debug("[ChatDeltas] streaming completed, accumulated deltas from C++ autoparser", "total_deltas", len(allChatDeltas))

View File

@@ -4,7 +4,6 @@ import (
. "github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@@ -108,111 +107,3 @@ var _ = Describe("LLM tests", func() {
})
})
})
var _ = Describe("TokenUsage ChatDelta helpers", func() {
Describe("HasChatDeltaContent", func() {
It("should return false when ChatDeltas is nil", func() {
usage := TokenUsage{}
Expect(usage.HasChatDeltaContent()).To(BeFalse())
})
It("should return false when ChatDeltas is empty", func() {
usage := TokenUsage{ChatDeltas: []*pb.ChatDelta{}}
Expect(usage.HasChatDeltaContent()).To(BeFalse())
})
It("should return false when all deltas have empty content and reasoning", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{Content: "", ReasoningContent: ""},
{Content: ""},
},
}
Expect(usage.HasChatDeltaContent()).To(BeFalse())
})
It("should return true when a delta has content", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{Content: "hello"},
},
}
Expect(usage.HasChatDeltaContent()).To(BeTrue())
})
It("should return true when a delta has reasoning content", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{ReasoningContent: "thinking..."},
},
}
Expect(usage.HasChatDeltaContent()).To(BeTrue())
})
It("should return true when a delta has both content and reasoning", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{Content: "hello", ReasoningContent: "thinking..."},
},
}
Expect(usage.HasChatDeltaContent()).To(BeTrue())
})
})
Describe("ChatDeltaReasoningAndContent", func() {
It("should return empty strings when ChatDeltas is nil", func() {
usage := TokenUsage{}
reasoning, content := usage.ChatDeltaReasoningAndContent()
Expect(reasoning).To(BeEmpty())
Expect(content).To(BeEmpty())
})
It("should concatenate content from multiple deltas", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{Content: "Hello"},
{Content: " world"},
},
}
reasoning, content := usage.ChatDeltaReasoningAndContent()
Expect(content).To(Equal("Hello world"))
Expect(reasoning).To(BeEmpty())
})
It("should concatenate reasoning from multiple deltas", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{ReasoningContent: "step 1"},
{ReasoningContent: " step 2"},
},
}
reasoning, content := usage.ChatDeltaReasoningAndContent()
Expect(reasoning).To(Equal("step 1 step 2"))
Expect(content).To(BeEmpty())
})
It("should separate reasoning and content from mixed deltas", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{ReasoningContent: "thinking"},
{Content: "answer"},
},
}
reasoning, content := usage.ChatDeltaReasoningAndContent()
Expect(reasoning).To(Equal("thinking"))
Expect(content).To(Equal("answer"))
})
It("should handle deltas with both fields set", func() {
usage := TokenUsage{
ChatDeltas: []*pb.ChatDelta{
{Content: "a", ReasoningContent: "r1"},
{Content: "b", ReasoningContent: "r2"},
},
}
reasoning, content := usage.ChatDeltaReasoningAndContent()
Expect(reasoning).To(Equal("r1r2"))
Expect(content).To(Equal("ab"))
})
})
})

View File

@@ -512,9 +512,11 @@ func (s *backendSupervisor) stopBackend(backend string) {
// Network I/O outside the lock
client := grpc.NewClientWithToken(bp.addr, false, nil, false, s.cmd.RegistrationToken)
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
if err := client.Free(context.Background()); err != nil {
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
if err := freeFunc.Free(context.Background()); err != nil {
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
}
}
xlog.Info("Stopping backend process", "backend", backend, "addr", bp.addr)
@@ -690,13 +692,13 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
// backend.delete — stop backend + delete files (request-reply)
s.nats.SubscribeReply(messaging.SubjectNodeBackendDelete(s.nodeID), func(data []byte, reply func([]byte)) {
xlog.Info("Received NATS backend.delete event")
var req messaging.BackendDeleteRequest
if err := json.Unmarshal(data, &req); err != nil {
resp := messaging.BackendDeleteReply{Success: false, Error: fmt.Sprintf("invalid request: %v", err)}
replyJSON(reply, resp)
return
}
xlog.Info("Received NATS backend.delete event", "backend", req.Backend)
// Stop if running this backend
if s.isRunning(req.Backend) {
@@ -772,8 +774,10 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
if targetAddr != "" {
// Best-effort gRPC Free()
client := grpc.NewClientWithToken(targetAddr, false, nil, false, s.cmd.RegistrationToken)
if err := client.Free(context.Background()); err != nil {
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
if err := freeFunc.Free(context.Background()); err != nil {
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
}
}
}

View File

@@ -14,7 +14,6 @@
"qwen2-vl": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
"qwen2": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
"qwq": {"min_p":0,"repeat_penalty":1,"temperature":0.6,"top_k":40,"top_p":0.95},
"gemma-4": {"min_p":0,"presence_penalty":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
"gemma-3n": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
"gemma-3": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
"medgemma": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
@@ -54,5 +53,5 @@
"grok": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.95},
"mimo": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95}
},
"patterns": ["qwen3.5","qwen3-coder","qwen3-next","qwen3-vl","qwen3","qwen2.5-coder","qwen2.5-vl","qwen2.5-omni","qwen2.5-math","qwen2.5","qwen2-vl","qwen2","qwq","gemma-4","gemma-3n","gemma-3","medgemma","gemma-2","llama-4","llama-3.3","llama-3.2","llama-3.1","llama-3","phi-4","phi-3","mistral-nemo","mistral-small","mistral-large","magistral","ministral","devstral","pixtral","deepseek-r1","deepseek-v3","deepseek-ocr","glm-5","glm-4","nemotron","minimax-m2.5","minimax","gpt-oss","granite-4","kimi-k2","kimi","lfm2","smollm","olmo","falcon","ernie","seed","grok","mimo"]
"patterns": ["qwen3.5","qwen3-coder","qwen3-next","qwen3-vl","qwen3","qwen2.5-coder","qwen2.5-vl","qwen2.5-omni","qwen2.5-math","qwen2.5","qwen2-vl","qwen2","qwq","gemma-3n","gemma-3","medgemma","gemma-2","llama-4","llama-3.3","llama-3.2","llama-3.1","llama-3","phi-4","phi-3","mistral-nemo","mistral-small","mistral-large","magistral","ministral","devstral","pixtral","deepseek-r1","deepseek-v3","deepseek-ocr","glm-5","glm-4","nemotron","minimax-m2.5","minimax","gpt-oss","granite-4","kimi-k2","kimi","lfm2","smollm","olmo","falcon","ernie","seed","grok","mimo"]
}

View File

@@ -1,132 +0,0 @@
package meta
import (
"reflect"
"sort"
"sync"
)
var (
cachedMetadata *ConfigMetadata
cacheMu sync.RWMutex
)
// BuildConfigMetadata reflects on the given struct type (ModelConfig),
// merges the enrichment registry, and returns the full ConfigMetadata.
// The result is cached in memory after the first call.
func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata {
cacheMu.RLock()
if cachedMetadata != nil {
cacheMu.RUnlock()
return cachedMetadata
}
cacheMu.RUnlock()
cacheMu.Lock()
defer cacheMu.Unlock()
if cachedMetadata != nil {
return cachedMetadata
}
cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry())
return cachedMetadata
}
// buildConfigMetadataUncached does the actual work without caching.
func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
fields := WalkModelConfig(modelConfigType)
for i := range fields {
override, ok := registry[fields[i].Path]
if !ok {
continue
}
applyOverride(&fields[i], override)
}
allSections := DefaultSections()
sectionOrder := make(map[string]int, len(allSections))
for _, s := range allSections {
sectionOrder[s.ID] = s.Order
}
sort.SliceStable(fields, func(i, j int) bool {
si := sectionOrder[fields[i].Section]
sj := sectionOrder[fields[j].Section]
if si != sj {
return si < sj
}
return fields[i].Order < fields[j].Order
})
usedSections := make(map[string]bool)
for _, f := range fields {
usedSections[f.Section] = true
}
var sections []Section
for _, s := range allSections {
if usedSections[s.ID] {
sections = append(sections, s)
}
}
return &ConfigMetadata{
Sections: sections,
Fields: fields,
}
}
// applyOverride merges non-zero override values into the field.
func applyOverride(f *FieldMeta, o FieldMetaOverride) {
if o.Section != "" {
f.Section = o.Section
}
if o.Label != "" {
f.Label = o.Label
}
if o.Description != "" {
f.Description = o.Description
}
if o.Component != "" {
f.Component = o.Component
}
if o.Placeholder != "" {
f.Placeholder = o.Placeholder
}
if o.Default != nil {
f.Default = o.Default
}
if o.Min != nil {
f.Min = o.Min
}
if o.Max != nil {
f.Max = o.Max
}
if o.Step != nil {
f.Step = o.Step
}
if o.Options != nil {
f.Options = o.Options
}
if o.AutocompleteProvider != "" {
f.AutocompleteProvider = o.AutocompleteProvider
}
if o.VRAMImpact {
f.VRAMImpact = true
}
if o.Advanced {
f.Advanced = true
}
if o.Order != 0 {
f.Order = o.Order
}
}
// BuildForTest builds metadata without caching, for use in tests.
func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
return buildConfigMetadataUncached(modelConfigType, registry)
}

View File

@@ -1,211 +0,0 @@
package meta_test
import (
"reflect"
"testing"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/config/meta"
)
func TestBuildConfigMetadata(t *testing.T) {
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
if len(md.Sections) == 0 {
t.Fatal("expected sections, got 0")
}
if len(md.Fields) == 0 {
t.Fatal("expected fields, got 0")
}
// Verify sections are ordered
for i := 1; i < len(md.Sections); i++ {
if md.Sections[i].Order < md.Sections[i-1].Order {
t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)",
md.Sections[i-1].ID, md.Sections[i-1].Order,
md.Sections[i].ID, md.Sections[i].Order)
}
}
}
func TestRegistryOverrides(t *testing.T) {
registry := map[string]meta.FieldMetaOverride{
"name": {
Label: "My Custom Label",
Description: "Custom description",
Component: "textarea",
Order: 999,
},
}
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry)
byPath := make(map[string]meta.FieldMeta, len(md.Fields))
for _, f := range md.Fields {
byPath[f.Path] = f
}
f, ok := byPath["name"]
if !ok {
t.Fatal("field 'name' not found")
}
if f.Label != "My Custom Label" {
t.Errorf("expected label 'My Custom Label', got %q", f.Label)
}
if f.Description != "Custom description" {
t.Errorf("expected description 'Custom description', got %q", f.Description)
}
if f.Component != "textarea" {
t.Errorf("expected component 'textarea', got %q", f.Component)
}
if f.Order != 999 {
t.Errorf("expected order 999, got %d", f.Order)
}
}
func TestUnregisteredFieldsGetDefaults(t *testing.T) {
// Use empty registry - all fields should still get auto-generated metadata
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{})
byPath := make(map[string]meta.FieldMeta, len(md.Fields))
for _, f := range md.Fields {
byPath[f.Path] = f
}
// context_size should still exist with auto-generated label
f, ok := byPath["context_size"]
if !ok {
t.Fatal("field 'context_size' not found")
}
if f.Label == "" {
t.Error("expected auto-generated label, got empty")
}
if f.UIType != "int" {
t.Errorf("expected UIType 'int', got %q", f.UIType)
}
if f.Component == "" {
t.Error("expected auto-generated component, got empty")
}
}
func TestDefaultRegistryOverridesApply(t *testing.T) {
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
byPath := make(map[string]meta.FieldMeta, len(md.Fields))
for _, f := range md.Fields {
byPath[f.Path] = f
}
// Verify enriched fields got their overrides
tests := []struct {
path string
label string
description string
vramImpact bool
}{
{"context_size", "Context Size", "Maximum context window in tokens", true},
{"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true},
{"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false},
{"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false},
{"template.chat", "Chat Template", "Go template for chat completion requests", false},
}
for _, tt := range tests {
f, ok := byPath[tt.path]
if !ok {
t.Errorf("field %q not found", tt.path)
continue
}
if f.Label != tt.label {
t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
}
if f.Description != tt.description {
t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description)
}
if f.VRAMImpact != tt.vramImpact {
t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact)
}
}
}
func TestStaticOptionsFields(t *testing.T) {
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
byPath := make(map[string]meta.FieldMeta, len(md.Fields))
for _, f := range md.Fields {
byPath[f.Path] = f
}
// Fields with static options should have Options populated and no AutocompleteProvider
staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"}
for _, path := range staticFields {
f, ok := byPath[path]
if !ok {
t.Errorf("field %q not found", path)
continue
}
if len(f.Options) == 0 {
t.Errorf("field %q: expected Options to be populated", path)
}
if f.AutocompleteProvider != "" {
t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider)
}
}
}
func TestDynamicProviderFields(t *testing.T) {
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
byPath := make(map[string]meta.FieldMeta, len(md.Fields))
for _, f := range md.Fields {
byPath[f.Path] = f
}
// Fields with dynamic providers should have AutocompleteProvider and no Options
dynamicFields := map[string]string{
"backend": meta.ProviderBackends,
"pipeline.llm": meta.ProviderModelsChat,
"pipeline.tts": meta.ProviderModelsTTS,
"pipeline.transcription": meta.ProviderModelsTranscript,
"pipeline.vad": meta.ProviderModelsVAD,
}
for path, expectedProvider := range dynamicFields {
f, ok := byPath[path]
if !ok {
t.Errorf("field %q not found", path)
continue
}
if f.AutocompleteProvider != expectedProvider {
t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider)
}
if len(f.Options) != 0 {
t.Errorf("field %q: expected no Options, got %d", path, len(f.Options))
}
}
}
func TestVRAMImpactFields(t *testing.T) {
md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
var vramFields []string
for _, f := range md.Fields {
if f.VRAMImpact {
vramFields = append(vramFields, f.Path)
}
}
if len(vramFields) == 0 {
t.Error("expected some VRAM impact fields, got 0")
}
// context_size and gpu_layers should be marked
expected := map[string]bool{"context_size": true, "gpu_layers": true}
for _, path := range vramFields {
if expected[path] {
delete(expected, path)
}
}
for path := range expected {
t.Errorf("expected VRAM impact field %q not found", path)
}
}

View File

@@ -1,63 +0,0 @@
package meta
// Dynamic autocomplete provider constants (runtime lookup required).
const (
ProviderBackends = "backends"
ProviderModels = "models"
ProviderModelsChat = "models:chat"
ProviderModelsTTS = "models:tts"
ProviderModelsTranscript = "models:transcript"
ProviderModelsVAD = "models:vad"
)
// Static option lists embedded directly in field metadata.
var QuantizationOptions = []FieldOption{
{Value: "q4_0", Label: "Q4_0"},
{Value: "q4_1", Label: "Q4_1"},
{Value: "q5_0", Label: "Q5_0"},
{Value: "q5_1", Label: "Q5_1"},
{Value: "q8_0", Label: "Q8_0"},
{Value: "q2_K", Label: "Q2_K"},
{Value: "q3_K_S", Label: "Q3_K_S"},
{Value: "q3_K_M", Label: "Q3_K_M"},
{Value: "q3_K_L", Label: "Q3_K_L"},
{Value: "q4_K_S", Label: "Q4_K_S"},
{Value: "q4_K_M", Label: "Q4_K_M"},
{Value: "q5_K_S", Label: "Q5_K_S"},
{Value: "q5_K_M", Label: "Q5_K_M"},
{Value: "q6_K", Label: "Q6_K"},
}
var CacheTypeOptions = []FieldOption{
{Value: "f16", Label: "F16"},
{Value: "f32", Label: "F32"},
{Value: "q8_0", Label: "Q8_0"},
{Value: "q4_0", Label: "Q4_0"},
{Value: "q4_1", Label: "Q4_1"},
{Value: "q5_0", Label: "Q5_0"},
{Value: "q5_1", Label: "Q5_1"},
}
var DiffusersPipelineOptions = []FieldOption{
{Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"},
{Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"},
{Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"},
{Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"},
{Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"},
{Value: "DiffusionPipeline", Label: "DiffusionPipeline"},
{Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"},
}
var DiffusersSchedulerOptions = []FieldOption{
{Value: "ddim", Label: "DDIM"},
{Value: "ddpm", Label: "DDPM"},
{Value: "pndm", Label: "PNDM"},
{Value: "lms", Label: "LMS"},
{Value: "euler", Label: "Euler"},
{Value: "euler_a", Label: "Euler A"},
{Value: "dpm_multistep", Label: "DPM Multistep"},
{Value: "dpm_singlestep", Label: "DPM Singlestep"},
{Value: "heun", Label: "Heun"},
{Value: "unipc", Label: "UniPC"},
}

View File

@@ -1,241 +0,0 @@
package meta
import (
"reflect"
"strings"
"unicode"
)
// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields
// in the given struct type (expected to be config.ModelConfig) and returns a
// slice of FieldMeta with sensible defaults derived from the type information.
func WalkModelConfig(t reflect.Type) []FieldMeta {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
var fields []FieldMeta
walkStruct(t, "", &fields)
return fields
}
// walkStruct recursively walks a struct type, collecting FieldMeta entries.
// prefix is the dot-path prefix for nested structs (e.g. "function.grammar.").
func walkStruct(t reflect.Type, prefix string, out *[]FieldMeta) {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
if t.Kind() != reflect.Struct {
return
}
for sf := range t.Fields() {
if !sf.IsExported() {
continue
}
yamlTag := sf.Tag.Get("yaml")
if yamlTag == "-" {
continue
}
yamlKey, opts := parseTag(yamlTag)
// Handle inline embedding (e.g. LLMConfig `yaml:",inline"`)
if opts.contains("inline") {
ft := sf.Type
if ft.Kind() == reflect.Pointer {
ft = ft.Elem()
}
if ft.Kind() == reflect.Struct {
walkStruct(ft, prefix, out)
}
continue
}
// If no yaml key and it's an embedded struct without inline, skip unknown pattern
if yamlKey == "" {
ft := sf.Type
if ft.Kind() == reflect.Pointer {
ft = ft.Elem()
}
// Anonymous struct without yaml tag - treat as inline
if sf.Anonymous && ft.Kind() == reflect.Struct {
walkStruct(ft, prefix, out)
continue
}
// Named field without yaml tag - skip
continue
}
ft := sf.Type
isPtr := ft.Kind() == reflect.Pointer
if isPtr {
ft = ft.Elem()
}
// Named nested struct (not a special type) -> recurse with prefix
if ft.Kind() == reflect.Struct && !isSpecialType(ft) {
nestedPrefix := prefix + yamlKey + "."
walkStruct(ft, nestedPrefix, out)
continue
}
// Leaf field
path := prefix + yamlKey
goType := sf.Type.String()
uiType, component := inferUIType(sf.Type)
section := inferSection(prefix)
label := labelFromKey(yamlKey)
*out = append(*out, FieldMeta{
Path: path,
YAMLKey: yamlKey,
GoType: goType,
UIType: uiType,
Pointer: isPtr,
Section: section,
Label: label,
Component: component,
Order: len(*out),
})
}
}
// isSpecialType returns true for struct types that should be treated as leaf
// values rather than recursed into (e.g. custom JSON marshalers).
func isSpecialType(t reflect.Type) bool {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
name := t.Name()
// LogprobsValue, URI types are leaf values despite being structs
switch name {
case "LogprobsValue", "URI":
return true
}
return false
}
// inferUIType maps a Go reflect.Type to a UI type string and default component.
func inferUIType(t reflect.Type) (uiType, component string) {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
switch t.Kind() {
case reflect.Bool:
return "bool", "toggle"
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return "int", "number"
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return "int", "number"
case reflect.Float32, reflect.Float64:
return "float", "number"
case reflect.String:
return "string", "input"
case reflect.Slice:
elem := t.Elem()
if elem.Kind() == reflect.String {
return "[]string", "string-list"
}
if elem.Kind() == reflect.Pointer {
elem = elem.Elem()
}
if elem.Kind() == reflect.Struct {
return "[]object", "json-editor"
}
return "[]any", "json-editor"
case reflect.Map:
return "map", "map-editor"
case reflect.Struct:
// Special types treated as leaves
if isSpecialType(t) {
return "bool", "toggle" // LogprobsValue
}
return "object", "json-editor"
default:
return "any", "input"
}
}
// inferSection determines the config section from the dot-path prefix.
func inferSection(prefix string) string {
if prefix == "" {
return "general"
}
// Remove trailing dot
p := strings.TrimSuffix(prefix, ".")
// Use the top-level prefix to determine section
parts := strings.SplitN(p, ".", 2)
top := parts[0]
switch top {
case "parameters":
return "parameters"
case "template":
return "templates"
case "function":
return "functions"
case "reasoning":
return "reasoning"
case "diffusers":
return "diffusers"
case "tts":
return "tts"
case "pipeline":
return "pipeline"
case "grpc":
return "grpc"
case "agent":
return "agent"
case "mcp":
return "mcp"
case "feature_flags":
return "other"
case "limit_mm_per_prompt":
return "llm"
default:
return "other"
}
}
// labelFromKey converts a yaml key like "context_size" to "Context Size".
func labelFromKey(key string) string {
parts := strings.Split(key, "_")
for i, p := range parts {
if len(p) > 0 {
runes := []rune(p)
runes[0] = unicode.ToUpper(runes[0])
parts[i] = string(runes)
}
}
return strings.Join(parts, " ")
}
// tagOptions is a set of comma-separated yaml tag options.
type tagOptions string
func (o tagOptions) contains(optName string) bool {
s := string(o)
for s != "" {
var name string
if name, s, _ = strings.Cut(s, ","); name == optName {
return true
}
}
return false
}
// parseTag splits a yaml struct tag into the key name and options.
func parseTag(tag string) (string, tagOptions) {
if tag == "" {
return "", ""
}
before, after, found := strings.Cut(tag, ",")
if found {
return before, tagOptions(after)
}
return tag, ""
}

View File

@@ -1,208 +0,0 @@
package meta_test
import (
"reflect"
"testing"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/config/meta"
)
func TestWalkModelConfig(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
if len(fields) == 0 {
t.Fatal("expected fields from ModelConfig, got 0")
}
// Build a lookup by path
byPath := make(map[string]meta.FieldMeta, len(fields))
for _, f := range fields {
byPath[f.Path] = f
}
// Verify some top-level fields exist
for _, path := range []string{"name", "backend", "cuda", "step"} {
if _, ok := byPath[path]; !ok {
t.Errorf("expected field %q not found", path)
}
}
// Verify inline LLMConfig fields appear at top level (no prefix)
for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} {
if _, ok := byPath[path]; !ok {
t.Errorf("expected inline LLMConfig field %q not found", path)
}
}
// Verify nested struct fields have correct prefix
for _, path := range []string{
"template.chat",
"template.completion",
"template.use_tokenizer_template",
"function.grammar.parallel_calls",
"function.grammar.mixed_mode",
"diffusers.pipeline_type",
"diffusers.cuda",
"pipeline.llm",
"pipeline.tts",
"reasoning.disable",
"agent.max_iterations",
"grpc.attempts",
} {
if _, ok := byPath[path]; !ok {
t.Errorf("expected nested field %q not found", path)
}
}
// Verify PredictionOptions fields have parameters. prefix
for _, path := range []string{
"parameters.temperature",
"parameters.top_p",
"parameters.top_k",
"parameters.max_tokens",
"parameters.seed",
} {
if _, ok := byPath[path]; !ok {
t.Errorf("expected parameters field %q not found", path)
}
}
// Verify TTSConfig fields have tts. prefix
if _, ok := byPath["tts.voice"]; !ok {
t.Error("expected tts.voice field not found")
}
}
func TestSkipsYAMLDashFields(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
byPath := make(map[string]meta.FieldMeta, len(fields))
for _, f := range fields {
byPath[f.Path] = f
}
// modelConfigFile has yaml:"-" tag, should be skipped
for _, f := range fields {
if f.Path == "modelConfigFile" || f.Path == "modelTemplate" {
t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path)
}
}
}
func TestTypeMapping(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
byPath := make(map[string]meta.FieldMeta, len(fields))
for _, f := range fields {
byPath[f.Path] = f
}
tests := []struct {
path string
uiType string
pointer bool
}{
{"name", "string", false},
{"cuda", "bool", false},
{"context_size", "int", true},
{"gpu_layers", "int", true},
{"threads", "int", true},
{"f16", "bool", true},
{"mmap", "bool", true},
{"stopwords", "[]string", false},
{"roles", "map", false},
{"parameters.temperature", "float", true},
{"parameters.top_k", "int", true},
{"function.grammar.parallel_calls", "bool", false},
}
for _, tt := range tests {
f, ok := byPath[tt.path]
if !ok {
t.Errorf("field %q not found", tt.path)
continue
}
if f.UIType != tt.uiType {
t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType)
}
if f.Pointer != tt.pointer {
t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer)
}
}
}
func TestSectionAssignment(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
byPath := make(map[string]meta.FieldMeta, len(fields))
for _, f := range fields {
byPath[f.Path] = f
}
tests := []struct {
path string
section string
}{
{"name", "general"},
{"backend", "general"},
{"context_size", "general"}, // inline LLMConfig -> no prefix -> general
{"parameters.temperature", "parameters"},
{"template.chat", "templates"},
{"function.grammar.parallel_calls", "functions"},
{"diffusers.cuda", "diffusers"},
{"pipeline.llm", "pipeline"},
{"reasoning.disable", "reasoning"},
{"agent.max_iterations", "agent"},
{"grpc.attempts", "grpc"},
}
for _, tt := range tests {
f, ok := byPath[tt.path]
if !ok {
t.Errorf("field %q not found", tt.path)
continue
}
if f.Section != tt.section {
t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section)
}
}
}
func TestLabelGeneration(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
byPath := make(map[string]meta.FieldMeta, len(fields))
for _, f := range fields {
byPath[f.Path] = f
}
tests := []struct {
path string
label string
}{
{"context_size", "Context Size"},
{"gpu_layers", "Gpu Layers"},
{"name", "Name"},
{"cuda", "Cuda"},
}
for _, tt := range tests {
f, ok := byPath[tt.path]
if !ok {
t.Errorf("field %q not found", tt.path)
continue
}
if f.Label != tt.label {
t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
}
}
}
func TestFieldCount(t *testing.T) {
fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
// We expect a large number of fields (100+) given the config complexity
if len(fields) < 80 {
t.Errorf("expected at least 80 fields, got %d", len(fields))
}
t.Logf("Total fields discovered: %d", len(fields))
}

View File

@@ -1,314 +0,0 @@
package meta
// DefaultRegistry returns enrichment overrides for the ~30 most commonly used
// config fields. Fields not listed here still appear with auto-generated
// labels and type-inferred components.
func DefaultRegistry() map[string]FieldMetaOverride {
f64 := func(v float64) *float64 { return &v }
return map[string]FieldMetaOverride{
// --- General ---
"name": {
Section: "general",
Label: "Model Name",
Description: "Unique identifier for this model configuration",
Component: "input",
Order: 0,
},
"backend": {
Section: "general",
Label: "Backend",
Description: "The inference backend to use (e.g. llama-cpp, vllm, diffusers)",
Component: "select",
AutocompleteProvider: ProviderBackends,
Order: 1,
},
"description": {
Section: "general",
Label: "Description",
Description: "Human-readable description of what this model does",
Component: "textarea",
Order: 2,
},
"usage": {
Section: "general",
Label: "Usage",
Description: "Usage instructions or notes",
Component: "textarea",
Advanced: true,
Order: 3,
},
"cuda": {
Section: "general",
Label: "CUDA",
Description: "Explicitly enable CUDA acceleration",
Order: 5,
},
"known_usecases": {
Section: "general",
Label: "Known Use Cases",
Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)",
Component: "string-list",
Order: 6,
},
// --- LLM ---
"context_size": {
Section: "llm",
Label: "Context Size",
Description: "Maximum context window in tokens",
Component: "number",
VRAMImpact: true,
Order: 10,
},
"gpu_layers": {
Section: "llm",
Label: "GPU Layers",
Description: "Number of layers to offload to GPU (-1 = all)",
Component: "number",
Min: f64(-1),
VRAMImpact: true,
Order: 11,
},
"threads": {
Section: "llm",
Label: "Threads",
Description: "Number of CPU threads for inference",
Component: "number",
Min: f64(1),
Order: 12,
},
"f16": {
Section: "llm",
Label: "F16",
Description: "Use 16-bit floating point for key/value cache",
Order: 13,
},
"mmap": {
Section: "llm",
Label: "Memory Map",
Description: "Use memory-mapped files for model loading",
Order: 14,
},
"mmlock": {
Section: "llm",
Label: "Memory Lock",
Description: "Lock model memory to prevent swapping",
Advanced: true,
Order: 15,
},
"low_vram": {
Section: "llm",
Label: "Low VRAM",
Description: "Optimize for systems with limited GPU memory",
VRAMImpact: true,
Order: 16,
},
"embeddings": {
Section: "llm",
Label: "Embeddings",
Description: "Enable embedding generation mode",
Order: 17,
},
"quantization": {
Section: "llm",
Label: "Quantization",
Description: "Quantization method (e.g. q4_0, q5_1, q8_0)",
Component: "select",
Options: QuantizationOptions,
Advanced: true,
Order: 20,
},
"flash_attention": {
Section: "llm",
Label: "Flash Attention",
Description: "Enable flash attention for faster inference",
Component: "input",
Advanced: true,
Order: 21,
},
"cache_type_k": {
Section: "llm",
Label: "KV Cache Type (K)",
Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)",
Component: "select",
Options: CacheTypeOptions,
VRAMImpact: true,
Advanced: true,
Order: 22,
},
"cache_type_v": {
Section: "llm",
Label: "KV Cache Type (V)",
Description: "Quantization type for value cache",
Component: "select",
Options: CacheTypeOptions,
VRAMImpact: true,
Advanced: true,
Order: 23,
},
// --- Parameters ---
"parameters.temperature": {
Section: "parameters",
Label: "Temperature",
Description: "Sampling temperature (higher = more creative, lower = more deterministic)",
Component: "slider",
Min: f64(0),
Max: f64(2),
Step: f64(0.05),
Order: 30,
},
"parameters.top_p": {
Section: "parameters",
Label: "Top P",
Description: "Nucleus sampling threshold",
Component: "slider",
Min: f64(0),
Max: f64(1),
Step: f64(0.01),
Order: 31,
},
"parameters.top_k": {
Section: "parameters",
Label: "Top K",
Description: "Top-K sampling: consider only the K most likely tokens",
Component: "number",
Min: f64(0),
Order: 32,
},
"parameters.max_tokens": {
Section: "parameters",
Label: "Max Tokens",
Description: "Maximum number of tokens to generate (0 = unlimited)",
Component: "number",
Min: f64(0),
Order: 33,
},
"parameters.repeat_penalty": {
Section: "parameters",
Label: "Repeat Penalty",
Description: "Penalize repeated tokens (1.0 = no penalty)",
Component: "number",
Min: f64(0),
Advanced: true,
Order: 34,
},
"parameters.seed": {
Section: "parameters",
Label: "Seed",
Description: "Random seed (-1 = random)",
Component: "number",
Advanced: true,
Order: 35,
},
// --- Templates ---
"template.chat": {
Section: "templates",
Label: "Chat Template",
Description: "Go template for chat completion requests",
Component: "code-editor",
Order: 40,
},
"template.chat_message": {
Section: "templates",
Label: "Chat Message Template",
Description: "Go template for individual chat messages",
Component: "code-editor",
Order: 41,
},
"template.completion": {
Section: "templates",
Label: "Completion Template",
Description: "Go template for completion requests",
Component: "code-editor",
Order: 42,
},
"template.use_tokenizer_template": {
Section: "templates",
Label: "Use Tokenizer Template",
Description: "Use the chat template from the model's tokenizer config",
Order: 43,
},
// --- Pipeline ---
"pipeline.llm": {
Section: "pipeline",
Label: "LLM Model",
Description: "Model to use for LLM inference in the pipeline",
Component: "model-select",
AutocompleteProvider: ProviderModelsChat,
Order: 60,
},
"pipeline.tts": {
Section: "pipeline",
Label: "TTS Model",
Description: "Model to use for text-to-speech in the pipeline",
Component: "model-select",
AutocompleteProvider: ProviderModelsTTS,
Order: 61,
},
"pipeline.transcription": {
Section: "pipeline",
Label: "Transcription Model",
Description: "Model to use for speech-to-text in the pipeline",
Component: "model-select",
AutocompleteProvider: ProviderModelsTranscript,
Order: 62,
},
"pipeline.vad": {
Section: "pipeline",
Label: "VAD Model",
Description: "Model to use for voice activity detection in the pipeline",
Component: "model-select",
AutocompleteProvider: ProviderModelsVAD,
Order: 63,
},
// --- Functions ---
"function.grammar.parallel_calls": {
Section: "functions",
Label: "Parallel Calls",
Description: "Allow the LLM to return multiple function calls in one response",
Order: 70,
},
"function.grammar.mixed_mode": {
Section: "functions",
Label: "Mixed Mode",
Description: "Allow the LLM to return both text and function calls",
Order: 71,
},
"function.grammar.disable": {
Section: "functions",
Label: "Disable Grammar",
Description: "Disable grammar-constrained generation for function calls",
Advanced: true,
Order: 72,
},
// --- Diffusers ---
"diffusers.pipeline_type": {
Section: "diffusers",
Label: "Pipeline Type",
Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)",
Component: "select",
Options: DiffusersPipelineOptions,
Order: 80,
},
"diffusers.scheduler_type": {
Section: "diffusers",
Label: "Scheduler Type",
Description: "Noise scheduler type",
Component: "select",
Options: DiffusersSchedulerOptions,
Order: 81,
},
"diffusers.cuda": {
Section: "diffusers",
Label: "CUDA",
Description: "Enable CUDA for diffusers",
Order: 82,
},
}
}

View File

@@ -1,83 +0,0 @@
package meta
// FieldMeta describes a single configuration field for UI rendering and agent discovery.
type FieldMeta struct {
Path string `json:"path"` // dot-path: "context_size", "function.grammar.parallel_calls"
YAMLKey string `json:"yaml_key"` // leaf yaml key
GoType string `json:"go_type"` // "*int", "string", "[]string"
UIType string `json:"ui_type"` // "string", "int", "float", "bool", "[]string", "map", "object"
Pointer bool `json:"pointer,omitempty"` // true = nil means "not set"
Section string `json:"section"` // "general", "llm", "templates", etc.
Label string `json:"label"` // human-readable label
Description string `json:"description,omitempty"` // help text
Component string `json:"component"` // "input", "number", "toggle", "select", "slider", etc.
Placeholder string `json:"placeholder,omitempty"`
Default any `json:"default,omitempty"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
Step *float64 `json:"step,omitempty"`
Options []FieldOption `json:"options,omitempty"`
AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc.
VRAMImpact bool `json:"vram_impact,omitempty"`
Advanced bool `json:"advanced,omitempty"`
Order int `json:"order"`
}
// FieldOption represents a choice in a select/enum field.
type FieldOption struct {
Value string `json:"value"`
Label string `json:"label"`
}
// Section groups related fields in the UI.
type Section struct {
ID string `json:"id"`
Label string `json:"label"`
Icon string `json:"icon,omitempty"`
Order int `json:"order"`
}
// ConfigMetadata is the top-level response for the metadata API.
type ConfigMetadata struct {
Sections []Section `json:"sections"`
Fields []FieldMeta `json:"fields"`
}
// FieldMetaOverride holds registry overrides that are merged on top of
// the reflection-discovered defaults. Only non-zero fields override.
type FieldMetaOverride struct {
Section string
Label string
Description string
Component string
Placeholder string
Default any
Min *float64
Max *float64
Step *float64
Options []FieldOption
AutocompleteProvider string
VRAMImpact bool
Advanced bool
Order int
}
// DefaultSections defines the well-known config sections in display order.
func DefaultSections() []Section {
return []Section{
{ID: "general", Label: "General", Icon: "settings", Order: 0},
{ID: "llm", Label: "LLM", Icon: "cpu", Order: 10},
{ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20},
{ID: "templates", Label: "Templates", Icon: "file-text", Order: 30},
{ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40},
{ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45},
{ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50},
{ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55},
{ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60},
{ID: "grpc", Label: "gRPC", Icon: "server", Order: 65},
{ID: "agent", Label: "Agent", Icon: "bot", Order: 70},
{ID: "mcp", Label: "MCP", Icon: "plug", Order: 75},
{ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100},
}
}

View File

@@ -300,29 +300,14 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error
backend, ok := backends.Get(name)
if !ok {
// Not found by direct key — try matching by gallery name (metadata.Name)
// The UI may send gallery-style names like "localai@llama-cpp" which
// don't match the directory-based keys used in the backends map.
for _, b := range backends {
if b.Metadata != nil && b.Metadata.Name == name && !b.IsMeta {
backend = b
ok = true
break
}
}
if !ok {
return fmt.Errorf("backend %q: %w", name, ErrBackendNotFound)
}
return fmt.Errorf("backend %q: %w", name, ErrBackendNotFound)
}
if backend.IsSystem {
return fmt.Errorf("system backend %q cannot be deleted", name)
}
// Use the backend's actual Name (directory key) for path resolution,
// not the caller-supplied name which may be a gallery-style name.
dirName := backend.Name
backendDirectory := filepath.Join(systemState.Backend.BackendsPath, dirName)
backendDirectory := filepath.Join(systemState.Backend.BackendsPath, name)
// check if the backend dir exists
if _, err := os.Stat(backendDirectory); os.IsNotExist(err) {
@@ -340,7 +325,7 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error
if err != nil {
return err
}
if metadata != nil && (metadata.Alias == name || metadata.Alias == dirName) {
if metadata != nil && metadata.Alias == name {
backendDirectory = filepath.Join(systemState.Backend.BackendsPath, backend.Name())
foundBackend = true
break

View File

@@ -52,42 +52,9 @@ var quietPaths = []string{"/api/operations", "/api/resources", "/healthz", "/rea
// @license.name MIT
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
// @BasePath /
// @schemes http https
// @securityDefinitions.apikey BearerAuth
// @in header
// @name Authorization
// @tag.name inference
// @tag.description Chat completions, text completions, edits, and responses (OpenAI-compatible)
// @tag.name embeddings
// @tag.description Vector embeddings (OpenAI-compatible)
// @tag.name audio
// @tag.description Text-to-speech, transcription, voice activity detection, sound generation
// @tag.name images
// @tag.description Image generation and inpainting
// @tag.name video
// @tag.description Video generation from prompts
// @tag.name detection
// @tag.description Object detection in images
// @tag.name tokenize
// @tag.description Tokenization and token metrics
// @tag.name models
// @tag.description Model gallery browsing, installation, deletion, and listing
// @tag.name backends
// @tag.description Backend gallery browsing, installation, deletion, and listing
// @tag.name config
// @tag.description Model configuration metadata, autocomplete, PATCH updates, VRAM estimation
// @tag.name monitoring
// @tag.description Prometheus metrics, backend status, system information
// @tag.name mcp
// @tag.description Model Context Protocol — tool-augmented chat with MCP servers
// @tag.name agent-jobs
// @tag.description Agent task and job management
// @tag.name p2p
// @tag.description Peer-to-peer networking nodes and tokens
// @tag.name rerank
// @tag.description Document reranking
// @tag.name instructions
// @tag.description API instruction discovery — browse instruction areas and get endpoint guides
func API(application *application.Application) (*echo.Echo, error) {
e := echo.New()
@@ -393,7 +360,7 @@ func API(application *application.Application) (*echo.Echo, error) {
routes.RegisterOpenResponsesRoutes(e, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application, adminMiddleware)
routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), adminMiddleware)
routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), adminMiddleware)
// Serve React SPA from / with SPA fallback via 404 handler
reactFS, fsErr := fs.Sub(reactUI, "react-ui/dist")

View File

@@ -21,7 +21,6 @@ import (
// MessagesEndpoint is the Anthropic Messages API endpoint
// https://docs.anthropic.com/claude/reference/messages_post
// @Summary Generate a message response for the given messages and model.
// @Tags inference
// @Param request body schema.AnthropicRequest true "query params"
// @Success 200 {object} schema.AnthropicResponse "Response"
// @Router /v1/messages [post]
@@ -358,7 +357,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
// Send initial content_block_start event
contentBlockStart := schema.AnthropicStreamEvent{
Type: "content_block_start",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
ContentBlock: &schema.AnthropicContentBlock{Type: "text", Text: ""},
}
sendAnthropicSSE(c, contentBlockStart)
@@ -377,7 +376,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
if !inToolCall && currentBlockIndex == 0 {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
})
currentBlockIndex++
inToolCall = true
@@ -387,7 +386,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
tc := toolCalls[i]
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_start",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
ContentBlock: &schema.AnthropicContentBlock{
Type: "tool_use",
ID: fmt.Sprintf("toolu_%s_%d", id, i),
@@ -396,7 +395,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
Delta: &schema.AnthropicStreamDelta{
Type: "input_json_delta",
PartialJSON: tc.Arguments,
@@ -404,7 +403,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
})
currentBlockIndex++
}
@@ -417,7 +416,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
if !inToolCall {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(0),
Index: 0,
Delta: &schema.AnthropicStreamDelta{
Type: "text_delta",
Text: token,
@@ -517,7 +516,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
// Close the text content block
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
})
currentBlockIndex++
inToolCall = true
@@ -529,7 +528,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
}
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_start",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
ContentBlock: &schema.AnthropicContentBlock{
Type: "tool_use",
ID: toolCallID,
@@ -538,7 +537,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
Delta: &schema.AnthropicStreamDelta{
Type: "input_json_delta",
PartialJSON: fc.Arguments,
@@ -546,7 +545,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
Index: currentBlockIndex,
})
currentBlockIndex++
toolCallsEmitted++
@@ -558,7 +557,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
if !inToolCall {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(0),
Index: 0,
})
}
@@ -599,8 +598,6 @@ func convertFuncsToOpenAITools(funcs functions.Functions) []functions.Tool {
return tools
}
func intPtr(i int) *int { return &i }
func sendAnthropicSSE(c echo.Context, event schema.AnthropicStreamEvent) {
data, err := json.Marshal(event)
if err != nil {

View File

@@ -15,7 +15,6 @@ import (
// SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
// @Summary Generates audio from the input text.
// @Tags audio
// @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/sound-generation [post]

View File

@@ -15,7 +15,6 @@ import (
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
// @Summary Generates audio from the input text.
// @Tags audio
// @Param voice-id path string true "Account ID"
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "Response"

View File

@@ -15,7 +15,6 @@ import (
// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
// @Summary Reranks a list of phrases by relevance to a given text query.
// @Tags rerank
// @Param request body schema.JINARerankRequest true "query params"
// @Success 200 {object} schema.JINARerankResponse "Response"
// @Router /v1/rerank [post]

View File

@@ -30,15 +30,6 @@ func getJobService(app *application.Application, c echo.Context) *agentpool.Agen
return jobSvc
}
// CreateTaskEndpoint creates a new agent task definition.
// @Summary Create a new agent task
// @Tags agent-jobs
// @Accept json
// @Produce json
// @Param request body schema.Task true "Task definition"
// @Success 201 {object} map[string]string "id"
// @Failure 400 {object} map[string]string "error"
// @Router /api/agent/tasks [post]
func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
var task schema.Task
@@ -55,17 +46,6 @@ func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// UpdateTaskEndpoint updates an existing agent task.
// @Summary Update an agent task
// @Tags agent-jobs
// @Accept json
// @Produce json
// @Param id path string true "Task ID"
// @Param request body schema.Task true "Updated task definition"
// @Success 200 {object} map[string]string "message"
// @Failure 400 {object} map[string]string "error"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/tasks/{id} [put]
func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -85,14 +65,6 @@ func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// DeleteTaskEndpoint deletes an agent task.
// @Summary Delete an agent task
// @Tags agent-jobs
// @Produce json
// @Param id path string true "Task ID"
// @Success 200 {object} map[string]string "message"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/tasks/{id} [delete]
func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -107,13 +79,6 @@ func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// ListTasksEndpoint lists all agent tasks for the current user.
// @Summary List agent tasks
// @Tags agent-jobs
// @Produce json
// @Param all_users query string false "Set to 'true' for admin cross-user listing"
// @Success 200 {object} []schema.Task "tasks"
// @Router /api/agent/tasks [get]
func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
jobSvc := getJobService(app, c)
@@ -156,14 +121,6 @@ func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// GetTaskEndpoint returns a single agent task by ID.
// @Summary Get an agent task
// @Tags agent-jobs
// @Produce json
// @Param id path string true "Task ID"
// @Success 200 {object} schema.Task "task"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/tasks/{id} [get]
func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -176,15 +133,6 @@ func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// ExecuteJobEndpoint creates and runs a new job for a task.
// @Summary Execute an agent job
// @Tags agent-jobs
// @Accept json
// @Produce json
// @Param request body schema.JobExecutionRequest true "Job execution request"
// @Success 201 {object} schema.JobExecutionResponse "job created"
// @Failure 400 {object} map[string]string "error"
// @Router /api/agent/jobs/execute [post]
func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
var req schema.JobExecutionRequest
@@ -220,14 +168,6 @@ func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// GetJobEndpoint returns a single job by ID.
// @Summary Get an agent job
// @Tags agent-jobs
// @Produce json
// @Param id path string true "Job ID"
// @Success 200 {object} schema.Job "job"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/jobs/{id} [get]
func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -240,16 +180,6 @@ func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// ListJobsEndpoint lists jobs, optionally filtered by task or status.
// @Summary List agent jobs
// @Tags agent-jobs
// @Produce json
// @Param task_id query string false "Filter by task ID"
// @Param status query string false "Filter by status (pending, running, completed, failed, cancelled)"
// @Param limit query integer false "Max number of jobs to return"
// @Param all_users query string false "Set to 'true' for admin cross-user listing"
// @Success 200 {object} []schema.Job "jobs"
// @Router /api/agent/jobs [get]
func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
var taskID *string
@@ -311,15 +241,6 @@ func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// CancelJobEndpoint cancels a running job.
// @Summary Cancel an agent job
// @Tags agent-jobs
// @Produce json
// @Param id path string true "Job ID"
// @Success 200 {object} map[string]string "message"
// @Failure 400 {object} map[string]string "error"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/jobs/{id}/cancel [post]
func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -334,14 +255,6 @@ func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// DeleteJobEndpoint deletes a job by ID.
// @Summary Delete an agent job
// @Tags agent-jobs
// @Produce json
// @Param id path string true "Job ID"
// @Success 200 {object} map[string]string "message"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/jobs/{id} [delete]
func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
id := c.Param("id")
@@ -356,17 +269,6 @@ func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// ExecuteTaskByNameEndpoint looks up a task by name and executes it.
// @Summary Execute an agent task by name
// @Tags agent-jobs
// @Accept json
// @Produce json
// @Param name path string true "Task name"
// @Param parameters body object false "Optional template parameters"
// @Success 201 {object} schema.JobExecutionResponse "job created"
// @Failure 400 {object} map[string]string "error"
// @Failure 404 {object} map[string]string "error"
// @Router /api/agent/tasks/{name}/execute [post]
func ExecuteTaskByNameEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
name := c.Param("name")

View File

@@ -1,489 +0,0 @@
package localai
import (
"encoding/json"
"fmt"
"net/http"
"sort"
"strings"
"sync"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/swagger"
"github.com/mudler/xlog"
)
const swaggerDefsPrefix = "#/definitions/"
// instructionDef is a lightweight instruction definition that maps to swagger tags.
type instructionDef struct {
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags"`
Intro string `json:"-"` // brief context not in swagger
}
var instructionDefs = []instructionDef{
{
Name: "chat-inference",
Description: "OpenAI-compatible chat completions, text completions, and embeddings",
Tags: []string{"inference", "embeddings"},
Intro: "Set \"stream\": true for SSE streaming. Supports tool/function calling when the model config has function templates configured.",
},
{
Name: "audio",
Description: "Text-to-speech, voice activity detection, transcription, and sound generation",
Tags: []string{"audio"},
},
{
Name: "images",
Description: "Image generation and inpainting",
Tags: []string{"images"},
},
{
Name: "model-management",
Description: "Browse the gallery, install, delete, and manage models and backends",
Tags: []string{"models", "backends"},
},
{
Name: "config-management",
Description: "Discover, read, and modify model configuration fields with VRAM estimation",
Tags: []string{"config"},
Intro: "Fields with static options include an \"options\" array in metadata. Fields with dynamic values have an \"autocomplete_provider\" for runtime lookup.",
},
{
Name: "monitoring",
Description: "System metrics, backend status, API and backend traces, backend process logs, and system information",
Tags: []string{"monitoring"},
Intro: "Includes real-time backend log streaming via WebSocket at /ws/backend-logs/:modelId.",
},
{
Name: "mcp",
Description: "Model Context Protocol — tool-augmented chat with MCP servers",
Tags: []string{"mcp"},
Intro: "The model's config must define MCP servers. The endpoint handles tool execution automatically.",
},
{
Name: "agents",
Description: "Agent task and job management for CI/automation workflows",
Tags: []string{"agent-jobs"},
},
{
Name: "video",
Description: "Video generation from text prompts",
Tags: []string{"video"},
},
}
// swaggerState holds parsed swagger spec data, initialised once.
type swaggerState struct {
once sync.Once
spec map[string]any // full parsed swagger JSON
ready bool
}
var swState swaggerState
func (s *swaggerState) init() {
s.once.Do(func() {
var spec map[string]any
if err := json.Unmarshal(swagger.SwaggerJSON, &spec); err != nil {
xlog.Error("failed to parse embedded swagger spec", "err", err)
return
}
s.spec = spec
s.ready = true
})
}
// filterSwaggerByTags returns a swagger fragment containing only paths whose
// operations carry at least one of the given tags, plus the definitions they
// reference.
func filterSwaggerByTags(spec map[string]any, tags []string) map[string]any {
tagSet := make(map[string]bool, len(tags))
for _, t := range tags {
tagSet[t] = true
}
paths, _ := spec["paths"].(map[string]any)
allDefs, _ := spec["definitions"].(map[string]any)
filteredPaths := make(map[string]any)
for path, methods := range paths {
methodMap, ok := methods.(map[string]any)
if !ok {
continue
}
filteredMethods := make(map[string]any)
for method, opRaw := range methodMap {
op, ok := opRaw.(map[string]any)
if !ok {
continue
}
opTags, _ := op["tags"].([]any)
for _, t := range opTags {
if ts, ok := t.(string); ok && tagSet[ts] {
filteredMethods[method] = op
break
}
}
}
if len(filteredMethods) > 0 {
filteredPaths[path] = filteredMethods
}
}
// Collect all $ref definitions used by the filtered paths.
neededDefs := make(map[string]bool)
collectRefs(filteredPaths, neededDefs)
// Resolve nested refs from definitions themselves.
changed := true
for changed {
changed = false
for name := range neededDefs {
if def, ok := allDefs[name]; ok {
before := len(neededDefs)
collectRefs(def, neededDefs)
if len(neededDefs) > before {
changed = true
}
}
}
}
filteredDefs := make(map[string]any)
for name := range neededDefs {
if def, ok := allDefs[name]; ok {
filteredDefs[name] = def
}
}
result := map[string]any{
"paths": filteredPaths,
}
if len(filteredDefs) > 0 {
result["definitions"] = filteredDefs
}
return result
}
// collectRefs walks a JSON structure and collects all $ref definition names.
func collectRefs(v any, refs map[string]bool) {
switch val := v.(type) {
case map[string]any:
if ref, ok := val["$ref"].(string); ok {
if strings.HasPrefix(ref, swaggerDefsPrefix) {
refs[ref[len(swaggerDefsPrefix):]] = true
}
}
for _, child := range val {
collectRefs(child, refs)
}
case []any:
for _, child := range val {
collectRefs(child, refs)
}
}
}
// swaggerToMarkdown renders a filtered swagger fragment into concise markdown.
func swaggerToMarkdown(skillName, intro string, fragment map[string]any) string {
var b strings.Builder
b.WriteString("# ")
b.WriteString(skillName)
b.WriteString("\n")
if intro != "" {
b.WriteString("\n")
b.WriteString(intro)
b.WriteString("\n")
}
paths, _ := fragment["paths"].(map[string]any)
defs, _ := fragment["definitions"].(map[string]any)
// Sort paths for stable output.
sortedPaths := make([]string, 0, len(paths))
for p := range paths {
sortedPaths = append(sortedPaths, p)
}
sort.Strings(sortedPaths)
for _, path := range sortedPaths {
methods, ok := paths[path].(map[string]any)
if !ok {
continue
}
sortedMethods := sortMethods(methods)
for _, method := range sortedMethods {
op, ok := methods[method].(map[string]any)
if !ok {
continue
}
summary, _ := op["summary"].(string)
b.WriteString(fmt.Sprintf("\n## %s %s\n", strings.ToUpper(method), path))
if summary != "" {
b.WriteString(summary)
b.WriteString("\n")
}
// Parameters
params, _ := op["parameters"].([]any)
bodyParams, nonBodyParams := splitParams(params)
if len(nonBodyParams) > 0 {
b.WriteString("\n**Parameters:**\n")
b.WriteString("| Name | In | Type | Required | Description |\n")
b.WriteString("|------|----|------|----------|-------------|\n")
for _, p := range nonBodyParams {
pm, ok := p.(map[string]any)
if !ok {
continue
}
name, _ := pm["name"].(string)
in, _ := pm["in"].(string)
typ, _ := pm["type"].(string)
req, _ := pm["required"].(bool)
desc, _ := pm["description"].(string)
b.WriteString(fmt.Sprintf("| %s | %s | %s | %v | %s |\n", name, in, typ, req, desc))
}
}
if len(bodyParams) > 0 {
for _, p := range bodyParams {
pm, ok := p.(map[string]any)
if !ok {
continue
}
schema, _ := pm["schema"].(map[string]any)
refName := resolveRefName(schema)
if refName != "" {
b.WriteString(fmt.Sprintf("\n**Request body** (`%s`):\n", refName))
renderSchemaFields(&b, refName, defs)
}
}
}
// Responses
responses, _ := op["responses"].(map[string]any)
if len(responses) > 0 {
sortedCodes := make([]string, 0, len(responses))
for code := range responses {
sortedCodes = append(sortedCodes, code)
}
sort.Strings(sortedCodes)
for _, code := range sortedCodes {
resp, ok := responses[code].(map[string]any)
if !ok {
continue
}
desc, _ := resp["description"].(string)
respSchema, _ := resp["schema"].(map[string]any)
refName := resolveRefName(respSchema)
if refName != "" {
b.WriteString(fmt.Sprintf("\n**Response %s** (`%s`): %s\n", code, refName, desc))
renderSchemaFields(&b, refName, defs)
} else if desc != "" {
b.WriteString(fmt.Sprintf("\n**Response %s**: %s\n", code, desc))
}
}
}
}
}
return b.String()
}
// sortMethods returns HTTP methods in a conventional order.
func sortMethods(methods map[string]any) []string {
order := map[string]int{"get": 0, "post": 1, "put": 2, "patch": 3, "delete": 4}
keys := make([]string, 0, len(methods))
for k := range methods {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool {
oi, oki := order[keys[i]]
oj, okj := order[keys[j]]
if !oki {
oi = 99
}
if !okj {
oj = 99
}
return oi < oj
})
return keys
}
// splitParams separates body parameters from non-body parameters.
func splitParams(params []any) (body, nonBody []any) {
for _, p := range params {
pm, ok := p.(map[string]any)
if !ok {
continue
}
if in, _ := pm["in"].(string); in == "body" {
body = append(body, p)
} else {
nonBody = append(nonBody, p)
}
}
return
}
// resolveRefName extracts the definition name from a $ref or returns "".
func resolveRefName(schema map[string]any) string {
if schema == nil {
return ""
}
if ref, ok := schema["$ref"].(string); ok {
if strings.HasPrefix(ref, swaggerDefsPrefix) {
return ref[len(swaggerDefsPrefix):]
}
}
return ""
}
// renderSchemaFields writes a markdown field table for a definition.
func renderSchemaFields(b *strings.Builder, defName string, defs map[string]any) {
if defs == nil {
return
}
def, ok := defs[defName].(map[string]any)
if !ok {
return
}
props, ok := def["properties"].(map[string]any)
if !ok || len(props) == 0 {
return
}
// Sort fields
fields := make([]string, 0, len(props))
for f := range props {
fields = append(fields, f)
}
sort.Strings(fields)
b.WriteString("| Field | Type | Description |\n")
b.WriteString("|-------|------|-------------|\n")
for _, field := range fields {
prop, ok := props[field].(map[string]any)
if !ok {
continue
}
typ := schemaTypeString(prop)
desc, _ := prop["description"].(string)
b.WriteString(fmt.Sprintf("| %s | %s | %s |\n", field, typ, desc))
}
}
// schemaTypeString returns a human-readable type string for a schema property.
func schemaTypeString(prop map[string]any) string {
if ref := resolveRefName(prop); ref != "" {
return ref
}
typ, _ := prop["type"].(string)
if typ == "array" {
items, _ := prop["items"].(map[string]any)
if items != nil {
if ref := resolveRefName(items); ref != "" {
return "[]" + ref
}
it, _ := items["type"].(string)
if it != "" {
return "[]" + it
}
}
return "[]any"
}
if typ != "" {
return typ
}
return "object"
}
// APIInstructionResponse is the JSON response for a single instruction (?format=json).
type APIInstructionResponse struct {
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags"`
SwaggerFragment map[string]any `json:"swagger_fragment,omitempty"`
}
// ListAPIInstructionsEndpoint returns all instructions (compact list without guides).
// @Summary List available API instruction areas
// @Description Returns a compact list of instruction areas with descriptions and URLs for detailed guides
// @Tags instructions
// @Produce json
// @Success 200 {object} map[string]any "instructions list with hint"
// @Router /api/instructions [get]
func ListAPIInstructionsEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
type compactInstruction struct {
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags"`
URL string `json:"url"`
}
instructions := make([]compactInstruction, len(instructionDefs))
for i, s := range instructionDefs {
instructions[i] = compactInstruction{
Name: s.Name,
Description: s.Description,
Tags: s.Tags,
URL: "/api/instructions/" + s.Name,
}
}
return c.JSON(http.StatusOK, map[string]any{
"instructions": instructions,
"hint": "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment.",
})
}
}
// GetAPIInstructionEndpoint returns a single instruction by name.
// @Summary Get an instruction's API guide or OpenAPI fragment
// @Description Returns a markdown guide (default) or filtered OpenAPI fragment (format=json) for a named instruction
// @Tags instructions
// @Produce json
// @Produce text/markdown
// @Param name path string true "Instruction name (e.g. chat-inference, config-management)"
// @Param format query string false "Response format: json for OpenAPI fragment, omit for markdown"
// @Success 200 {object} APIInstructionResponse "instruction documentation"
// @Failure 404 {object} map[string]string "instruction not found"
// @Router /api/instructions/{name} [get]
func GetAPIInstructionEndpoint() echo.HandlerFunc {
byName := make(map[string]*instructionDef, len(instructionDefs))
for i := range instructionDefs {
byName[instructionDefs[i].Name] = &instructionDefs[i]
}
return func(c echo.Context) error {
name := c.Param("name")
inst, ok := byName[name]
if !ok {
return c.JSON(http.StatusNotFound, map[string]any{"error": "instruction not found: " + name})
}
swState.init()
if !swState.ready {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "swagger spec not available"})
}
fragment := filterSwaggerByTags(swState.spec, inst.Tags)
format := c.QueryParam("format")
if format == "json" {
return c.JSON(http.StatusOK, APIInstructionResponse{
Name: inst.Name,
Description: inst.Description,
Tags: inst.Tags,
SwaggerFragment: fragment,
})
}
guide := swaggerToMarkdown(inst.Name, inst.Intro, fragment)
return c.Blob(http.StatusOK, "text/markdown; charset=utf-8", []byte(guide))
}
}

View File

@@ -1,222 +0,0 @@
package localai_test
import (
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"github.com/labstack/echo/v4"
. "github.com/mudler/LocalAI/core/http/endpoints/localai"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("API Instructions Endpoints", func() {
var app *echo.Echo
BeforeEach(func() {
app = echo.New()
app.GET("/api/instructions", ListAPIInstructionsEndpoint())
app.GET("/api/instructions/:name", GetAPIInstructionEndpoint())
})
Context("GET /api/instructions", func() {
It("should return all instruction definitions", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
err := json.Unmarshal(rec.Body.Bytes(), &resp)
Expect(err).NotTo(HaveOccurred())
Expect(resp).To(HaveKey("hint"))
Expect(resp).To(HaveKey("instructions"))
instructions, ok := resp["instructions"].([]any)
Expect(ok).To(BeTrue())
Expect(instructions).To(HaveLen(9))
// Verify each instruction has required fields and correct URL format
for _, s := range instructions {
inst, ok := s.(map[string]any)
Expect(ok).To(BeTrue())
Expect(inst["name"]).NotTo(BeEmpty())
Expect(inst["description"]).NotTo(BeEmpty())
Expect(inst["tags"]).NotTo(BeNil())
Expect(inst["url"]).To(HavePrefix("/api/instructions/"))
Expect(inst["url"]).To(Equal("/api/instructions/" + inst["name"].(string)))
}
})
It("should include known instruction names", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
instructions := resp["instructions"].([]any)
names := make([]string, len(instructions))
for i, s := range instructions {
names[i] = s.(map[string]any)["name"].(string)
}
Expect(names).To(ContainElements(
"chat-inference",
"config-management",
"model-management",
"monitoring",
"agents",
))
})
})
Context("GET /api/instructions/:name", func() {
It("should return 404 for unknown instruction", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/nonexistent", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNotFound))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp["error"]).To(ContainSubstring("instruction not found"))
})
It("should return markdown by default", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
Expect(rec.Header().Get("Content-Type")).To(ContainSubstring("text/markdown"))
body, err := io.ReadAll(rec.Body)
Expect(err).NotTo(HaveOccurred())
md := string(body)
Expect(md).To(HavePrefix("# chat-inference"))
// Should contain at least one endpoint heading
Expect(md).To(MatchRegexp(`## (GET|POST|PUT|PATCH|DELETE) /`))
})
It("should include intro text for instructions that have one", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
body, _ := io.ReadAll(rec.Body)
// chat-inference has an intro about streaming
Expect(string(body)).To(ContainSubstring("stream"))
})
It("should return JSON fragment when format=json", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference?format=json", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp["name"]).To(Equal("chat-inference"))
Expect(resp["tags"]).To(ContainElements("inference", "embeddings"))
fragment, ok := resp["swagger_fragment"].(map[string]any)
Expect(ok).To(BeTrue())
Expect(fragment).To(HaveKey("paths"))
paths, ok := fragment["paths"].(map[string]any)
Expect(ok).To(BeTrue())
Expect(paths).NotTo(BeEmpty())
})
It("should include referenced definitions in JSON fragment", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference?format=json", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
fragment := resp["swagger_fragment"].(map[string]any)
Expect(fragment).To(HaveKey("definitions"))
defs, ok := fragment["definitions"].(map[string]any)
Expect(ok).To(BeTrue())
Expect(defs).NotTo(BeEmpty())
})
It("should only include paths matching the instruction tags in JSON fragment", func() {
req := httptest.NewRequest(http.MethodGet, "/api/instructions/config-management?format=json", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
fragment := resp["swagger_fragment"].(map[string]any)
paths := fragment["paths"].(map[string]any)
Expect(paths).NotTo(BeEmpty())
// Every operation in every path should have the "config" tag
for _, methods := range paths {
methodMap := methods.(map[string]any)
for _, opRaw := range methodMap {
op := opRaw.(map[string]any)
tags, _ := op["tags"].([]any)
tagStrs := make([]string, len(tags))
for i, t := range tags {
tagStrs[i] = t.(string)
}
Expect(tagStrs).To(ContainElement("config"))
}
}
})
It("should produce stable output across calls", func() {
req1 := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
rec1 := httptest.NewRecorder()
app.ServeHTTP(rec1, req1)
req2 := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
rec2 := httptest.NewRecorder()
app.ServeHTTP(rec2, req2)
body1, _ := io.ReadAll(rec1.Body)
body2, _ := io.ReadAll(rec2.Body)
Expect(string(body1)).To(Equal(string(body2)))
})
It("should return markdown for every defined instruction", func() {
// First get the list
listReq := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
listRec := httptest.NewRecorder()
app.ServeHTTP(listRec, listReq)
var listResp map[string]any
Expect(json.Unmarshal(listRec.Body.Bytes(), &listResp)).To(Succeed())
instructions := listResp["instructions"].([]any)
for _, s := range instructions {
name := s.(map[string]any)["name"].(string)
req := httptest.NewRequest(http.MethodGet, "/api/instructions/"+name, nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK),
"instruction %q should return 200", name)
body, _ := io.ReadAll(rec.Body)
Expect(strings.TrimSpace(string(body))).NotTo(BeEmpty(),
"instruction %q should return non-empty markdown", name)
}
})
})
})

View File

@@ -37,7 +37,6 @@ func CreateBackendEndpointService(galleries []config.Gallery, systemState *syste
// GetOpStatusEndpoint returns the job status
// @Summary Returns the job status
// @Tags backends
// @Success 200 {object} galleryop.OpStatus "Response"
// @Router /backends/jobs/{uuid} [get]
func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -52,7 +51,6 @@ func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
// GetAllStatusEndpoint returns all the jobs status progress
// @Summary Returns all the jobs status progress
// @Tags backends
// @Success 200 {object} map[string]galleryop.OpStatus "Response"
// @Router /backends/jobs [get]
func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -63,7 +61,6 @@ func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
// ApplyBackendEndpoint installs a new backend to a LocalAI instance
// @Summary Install backends to LocalAI.
// @Tags backends
// @Param request body GalleryBackend true "query params"
// @Success 200 {object} schema.BackendResponse "Response"
// @Router /backends/apply [post]
@@ -91,7 +88,6 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() echo.HandlerFunc {
// DeleteBackendEndpoint lets delete backends from a LocalAI instance
// @Summary delete backends from LocalAI.
// @Tags backends
// @Param name path string true "Backend name"
// @Success 200 {object} schema.BackendResponse "Response"
// @Router /backends/delete/{name} [post]
@@ -116,7 +112,6 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() echo.HandlerFunc {
// ListBackendsEndpoint list the available backends configured in LocalAI
// @Summary List all Backends
// @Tags backends
// @Success 200 {object} []gallery.GalleryBackend "Response"
// @Router /backends [get]
func (mgs *BackendEndpointService) ListBackendsEndpoint() echo.HandlerFunc {
@@ -131,7 +126,6 @@ func (mgs *BackendEndpointService) ListBackendsEndpoint() echo.HandlerFunc {
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
// @Summary List all Galleries
// @Tags backends
// @Success 200 {object} []config.Gallery "Response"
// @Router /backends/galleries [get]
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
@@ -148,7 +142,6 @@ func (mgs *BackendEndpointService) ListBackendGalleriesEndpoint() echo.HandlerFu
// ListAvailableBackendsEndpoint list the available backends in the galleries configured in LocalAI
// @Summary List all available Backends
// @Tags backends
// @Success 200 {object} []gallery.GalleryBackend "Response"
// @Router /backends/available [get]
func (mgs *BackendEndpointService) ListAvailableBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc {

View File

@@ -1,179 +0,0 @@
package localai
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"sync"
"time"
"github.com/gorilla/websocket"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
var backendLogsUpgrader = websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool {
origin := r.Header.Get("Origin")
if origin == "" {
return true // no origin header = same-origin or non-browser
}
u, err := url.Parse(origin)
if err != nil {
return false
}
return u.Host == r.Host
},
}
// backendLogsConn wraps a websocket connection with a mutex for safe concurrent writes
type backendLogsConn struct {
*websocket.Conn
mu sync.Mutex
}
func (c *backendLogsConn) writeJSON(v any) error {
c.mu.Lock()
defer c.mu.Unlock()
c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
data, err := json.Marshal(v)
if err != nil {
return fmt.Errorf("marshal error: %w", err)
}
return c.Conn.WriteMessage(websocket.TextMessage, data)
}
func (c *backendLogsConn) writePing() error {
c.mu.Lock()
defer c.mu.Unlock()
c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
return c.Conn.WriteMessage(websocket.PingMessage, nil)
}
// ListBackendLogsEndpoint returns model IDs that have log buffers
// @Summary List models with backend logs
// @Description Returns a sorted list of model IDs that have captured backend process output
// @Tags monitoring
// @Produce json
// @Success 200 {array} string "Model IDs with logs"
// @Router /api/backend-logs [get]
func ListBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
return func(c echo.Context) error {
return c.JSON(200, ml.BackendLogs().ListModels())
}
}
// GetBackendLogsEndpoint returns log lines for a specific model
// @Summary Get backend logs for a model
// @Description Returns all captured log lines (stdout/stderr) for the specified model's backend process
// @Tags monitoring
// @Produce json
// @Param modelId path string true "Model ID"
// @Success 200 {array} model.BackendLogLine "Log lines"
// @Router /api/backend-logs/{modelId} [get]
func GetBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
return func(c echo.Context) error {
modelID := c.Param("modelId")
return c.JSON(200, ml.BackendLogs().GetLines(modelID))
}
}
// ClearBackendLogsEndpoint clears log lines for a specific model
// @Summary Clear backend logs for a model
// @Description Removes all captured log lines for the specified model's backend process
// @Tags monitoring
// @Param modelId path string true "Model ID"
// @Success 204 "Logs cleared"
// @Router /api/backend-logs/{modelId}/clear [post]
func ClearBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
return func(c echo.Context) error {
ml.BackendLogs().Clear(c.Param("modelId"))
return c.NoContent(204)
}
}
// BackendLogsWebSocketEndpoint streams backend logs in real-time over WebSocket
// @Summary Stream backend logs via WebSocket
// @Description Opens a WebSocket connection for real-time backend log streaming. Sends an initial batch of existing lines (type "initial"), then streams new lines as they appear (type "line"). Supports ping/pong keepalive.
// @Tags monitoring
// @Param modelId path string true "Model ID"
// @Router /ws/backend-logs/{modelId} [get]
func BackendLogsWebSocketEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
return func(c echo.Context) error {
modelID := c.Param("modelId")
ws, err := backendLogsUpgrader.Upgrade(c.Response(), c.Request(), nil)
if err != nil {
return err
}
defer ws.Close()
ws.SetReadLimit(4096)
// Set up ping/pong for keepalive
ws.SetReadDeadline(time.Now().Add(90 * time.Second))
ws.SetPongHandler(func(string) error {
ws.SetReadDeadline(time.Now().Add(90 * time.Second))
return nil
})
conn := &backendLogsConn{Conn: ws}
// Send existing lines as initial batch
existingLines := ml.BackendLogs().GetLines(modelID)
initialMsg := map[string]any{
"type": "initial",
"lines": existingLines,
}
if err := conn.writeJSON(initialMsg); err != nil {
xlog.Debug("WebSocket backend-logs initial write failed", "error", err)
return nil
}
// Subscribe to new lines
lineCh, unsubscribe := ml.BackendLogs().Subscribe(modelID)
defer unsubscribe()
// Handle close from client side
closeCh := make(chan struct{})
go func() {
for {
_, _, err := ws.ReadMessage()
if err != nil {
close(closeCh)
return
}
}
}()
// Ping ticker for keepalive
pingTicker := time.NewTicker(30 * time.Second)
defer pingTicker.Stop()
// Forward new lines to WebSocket
for {
select {
case line, ok := <-lineCh:
if !ok {
return nil
}
lineMsg := map[string]any{
"type": "line",
"line": line,
}
if err := conn.writeJSON(lineMsg); err != nil {
xlog.Debug("WebSocket backend-logs write error", "error", err)
return nil
}
case <-pingTicker.C:
if err := conn.writePing(); err != nil {
return nil
}
case <-closeCh:
return nil
}
}
}
}

View File

@@ -1,196 +0,0 @@
package localai_test
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"time"
"github.com/gorilla/websocket"
"github.com/labstack/echo/v4"
. "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Backend Logs Endpoints", func() {
var (
app *echo.Echo
tempDir string
modelLoader *model.ModelLoader
)
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "backend-logs-test-*")
Expect(err).NotTo(HaveOccurred())
modelsPath := filepath.Join(tempDir, "models")
Expect(os.MkdirAll(modelsPath, 0750)).To(Succeed())
systemState, err := system.GetSystemState(
system.WithModelPath(modelsPath),
)
Expect(err).NotTo(HaveOccurred())
modelLoader = model.NewModelLoader(systemState)
app = echo.New()
app.GET("/api/backend-logs", ListBackendLogsEndpoint(modelLoader))
app.GET("/api/backend-logs/:modelId", GetBackendLogsEndpoint(modelLoader))
app.POST("/api/backend-logs/:modelId/clear", ClearBackendLogsEndpoint(modelLoader))
app.GET("/ws/backend-logs/:modelId", BackendLogsWebSocketEndpoint(modelLoader))
})
AfterEach(func() {
os.RemoveAll(tempDir)
})
Context("REST endpoints", func() {
It("should return empty list of models with logs", func() {
req := httptest.NewRequest(http.MethodGet, "/api/backend-logs", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var models []string
Expect(json.Unmarshal(rec.Body.Bytes(), &models)).To(Succeed())
Expect(models).To(BeEmpty())
})
It("should list models that have logs", func() {
modelLoader.BackendLogs().AppendLine("my-model", "stdout", "hello")
req := httptest.NewRequest(http.MethodGet, "/api/backend-logs", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var models []string
Expect(json.Unmarshal(rec.Body.Bytes(), &models)).To(Succeed())
Expect(models).To(ContainElement("my-model"))
})
It("should return log lines for a model", func() {
modelLoader.BackendLogs().AppendLine("my-model", "stdout", "line one")
modelLoader.BackendLogs().AppendLine("my-model", "stderr", "line two")
req := httptest.NewRequest(http.MethodGet, "/api/backend-logs/my-model", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var lines []model.BackendLogLine
Expect(json.Unmarshal(rec.Body.Bytes(), &lines)).To(Succeed())
Expect(lines).To(HaveLen(2))
Expect(lines[0].Text).To(Equal("line one"))
Expect(lines[0].Stream).To(Equal("stdout"))
Expect(lines[1].Text).To(Equal("line two"))
Expect(lines[1].Stream).To(Equal("stderr"))
})
It("should return empty log lines for unknown model", func() {
req := httptest.NewRequest(http.MethodGet, "/api/backend-logs/unknown-model", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("should clear logs for a model", func() {
modelLoader.BackendLogs().AppendLine("my-model", "stdout", "hello")
req := httptest.NewRequest(http.MethodPost, "/api/backend-logs/my-model/clear", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNoContent))
// Verify logs are cleared
req = httptest.NewRequest(http.MethodGet, "/api/backend-logs/my-model", nil)
rec = httptest.NewRecorder()
app.ServeHTTP(rec, req)
var lines []model.BackendLogLine
Expect(json.Unmarshal(rec.Body.Bytes(), &lines)).To(Succeed())
Expect(lines).To(BeEmpty())
})
})
Context("WebSocket endpoint", func() {
It("should send initial lines and stream new lines", func() {
// Seed some existing lines before connecting
modelLoader.BackendLogs().AppendLine("ws-model", "stdout", "existing line")
// Start a real HTTP server for WebSocket
srv := httptest.NewServer(app)
defer srv.Close()
// Dial the WebSocket
wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/ws/backend-logs/ws-model"
dialer := websocket.Dialer{HandshakeTimeout: 2 * time.Second}
conn, _, err := dialer.Dial(wsURL, nil)
Expect(err).NotTo(HaveOccurred())
defer conn.Close()
// Read the initial message
var initialMsg map[string]any
err = conn.ReadJSON(&initialMsg)
Expect(err).NotTo(HaveOccurred())
Expect(initialMsg["type"]).To(Equal("initial"))
initialLines, ok := initialMsg["lines"].([]any)
Expect(ok).To(BeTrue())
Expect(initialLines).To(HaveLen(1))
firstLine := initialLines[0].(map[string]any)
Expect(firstLine["text"]).To(Equal("existing line"))
// Now append a new line and verify it streams through
modelLoader.BackendLogs().AppendLine("ws-model", "stderr", "streamed line")
var lineMsg map[string]any
conn.SetReadDeadline(time.Now().Add(2 * time.Second))
err = conn.ReadJSON(&lineMsg)
Expect(err).NotTo(HaveOccurred())
Expect(lineMsg["type"]).To(Equal("line"))
lineData, ok := lineMsg["line"].(map[string]any)
Expect(ok).To(BeTrue())
Expect(lineData["text"]).To(Equal("streamed line"))
Expect(lineData["stream"]).To(Equal("stderr"))
})
It("should handle connection close gracefully", func() {
srv := httptest.NewServer(app)
defer srv.Close()
wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/ws/backend-logs/close-model"
dialer := websocket.Dialer{HandshakeTimeout: 2 * time.Second}
conn, _, err := dialer.Dial(wsURL, nil)
Expect(err).NotTo(HaveOccurred())
// Read initial message
var initialMsg map[string]any
err = conn.ReadJSON(&initialMsg)
Expect(err).NotTo(HaveOccurred())
Expect(initialMsg["type"]).To(Equal("initial"))
// Close the connection from client side
conn.Close()
// Give the server goroutine time to detect the close
time.Sleep(50 * time.Millisecond)
// No panic or hang — the test passing is the assertion
})
})
})

View File

@@ -8,7 +8,6 @@ import (
// BackendMonitorEndpoint returns the status of the specified backend
// @Summary Backend monitor endpoint
// @Tags monitoring
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Success 200 {object} proto.StatusResponse "Response"
// @Router /backend/monitor [get]
@@ -30,8 +29,7 @@ func BackendMonitorEndpoint(bm *monitoring.BackendMonitorService) echo.HandlerFu
}
// BackendShutdownEndpoint shuts down the specified backend
// @Summary Backend shutdown endpoint
// @Tags monitoring
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Router /backend/shutdown [post]
func BackendShutdownEndpoint(bm *monitoring.BackendMonitorService) echo.HandlerFunc {

View File

@@ -1,242 +0,0 @@
package localai
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"reflect"
"sort"
"strings"
"dario.cat/mergo"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/config/meta"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/xlog"
"gopkg.in/yaml.v3"
)
// ConfigMetadataEndpoint returns field metadata for config fields.
// Without ?section, returns just the section index (lightweight).
// With ?section=<id>, returns fields for that section only.
// With ?section=all, returns all fields grouped by section.
// @Summary List model configuration field metadata
// @Description Returns config field metadata. Use ?section=<id> to filter by section, or omit for a section index.
// @Tags config
// @Produce json
// @Param section query string false "Section ID to filter (e.g. 'general', 'llm', 'parameters') or 'all' for everything"
// @Success 200 {object} map[string]any "Section index or filtered field metadata"
// @Router /api/models/config-metadata [get]
func ConfigMetadataEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
sectionParam := c.QueryParam("section")
// No section param: return lightweight section index.
if sectionParam == "" {
sections := meta.DefaultSections()
type sectionInfo struct {
ID string `json:"id"`
Label string `json:"label"`
URL string `json:"url"`
}
index := make([]sectionInfo, len(sections))
for i, s := range sections {
index[i] = sectionInfo{
ID: s.ID,
Label: s.Label,
URL: "/api/models/config-metadata?section=" + s.ID,
}
}
return c.JSON(http.StatusOK, map[string]any{
"hint": "Fetch a section URL to see its fields. Use ?section=all for everything.",
"sections": index,
})
}
md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{}))
// section=all: return everything.
if sectionParam == "all" {
return c.JSON(http.StatusOK, md)
}
// Filter to requested section.
var filtered []meta.FieldMeta
for _, f := range md.Fields {
if f.Section == sectionParam {
filtered = append(filtered, f)
}
}
if len(filtered) == 0 {
return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown section: " + sectionParam})
}
return c.JSON(http.StatusOK, filtered)
}
}
// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields.
// Static option lists (quantizations, cache types, diffusers pipelines/schedulers)
// are embedded directly in the field metadata Options; only truly dynamic values
// that require runtime lookup are served here.
// @Summary Get dynamic autocomplete values for a config field
// @Description Returns runtime-resolved values for dynamic providers (backends, models)
// @Tags config
// @Produce json
// @Param provider path string true "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)"
// @Success 200 {object} map[string]any "values array"
// @Router /api/models/config-metadata/autocomplete/{provider} [get]
func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
provider := c.Param("provider")
var values []string
switch {
case provider == meta.ProviderBackends:
installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
if err == nil {
for name := range installedBackends {
values = append(values, name)
}
}
sort.Strings(values)
case provider == meta.ProviderModels:
modelConfigs := cl.GetAllModelsConfigs()
for _, cfg := range modelConfigs {
values = append(values, cfg.Name)
}
modelsWithoutConfig, _ := galleryop.ListModels(cl, ml, config.NoFilterFn, galleryop.LOOSE_ONLY)
values = append(values, modelsWithoutConfig...)
sort.Strings(values)
case strings.HasPrefix(provider, "models:"):
capability := strings.TrimPrefix(provider, "models:")
var filterFn config.ModelConfigFilterFn
switch capability {
case "chat":
filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
case "tts":
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
case "vad":
filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
case "transcript":
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
default:
filterFn = config.NoFilterFn
}
filteredConfigs := cl.GetModelConfigsByFilter(filterFn)
for _, cfg := range filteredConfigs {
values = append(values, cfg.Name)
}
sort.Strings(values)
default:
return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider})
}
return c.JSON(http.StatusOK, map[string]any{"values": values})
}
}
// PatchConfigEndpoint handles PATCH requests to partially update a model config
// using nested JSON merge.
// @Summary Partially update a model configuration
// @Description Deep-merges the JSON patch body into the existing model config
// @Tags config
// @Accept json
// @Produce json
// @Param name path string true "Model name"
// @Success 200 {object} map[string]any "success message"
// @Router /api/models/config-json/{name} [patch]
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
modelName := c.Param("name")
if decoded, err := url.PathUnescape(modelName); err == nil {
modelName = decoded
}
if modelName == "" {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
}
modelConfig, exists := cl.GetModelConfig(modelName)
if !exists {
return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
}
patchBody, err := io.ReadAll(c.Request().Body)
if err != nil || len(patchBody) == 0 {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"})
}
var patchMap map[string]any
if err := json.Unmarshal(patchBody, &patchMap); err != nil {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()})
}
existingJSON, err := json.Marshal(modelConfig)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"})
}
var existingMap map[string]any
if err := json.Unmarshal(existingJSON, &existingMap); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"})
}
if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()})
}
mergedJSON, err := json.Marshal(existingMap)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"})
}
var updatedConfig config.ModelConfig
if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()})
}
if valid, err := updatedConfig.Validate(); !valid {
errMsg := "validation failed"
if err != nil {
errMsg = err.Error()
}
return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg})
}
configPath := modelConfig.GetModelConfigFile()
if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()})
}
yamlData, err := yaml.Marshal(updatedConfig)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"})
}
if err := os.WriteFile(configPath, yamlData, 0644); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"})
}
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()})
}
if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil {
xlog.Warn("Failed to preload after PATCH", "error", err)
}
return c.JSON(http.StatusOK, map[string]any{
"success": true,
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
})
}
}

View File

@@ -1,243 +0,0 @@
package localai_test
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Config Metadata Endpoints", func() {
var (
app *echo.Echo
tempDir string
configLoader *config.ModelConfigLoader
modelLoader *model.ModelLoader
appConfig *config.ApplicationConfig
)
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "config-meta-test-*")
Expect(err).NotTo(HaveOccurred())
systemState, err := system.GetSystemState(
system.WithModelPath(tempDir),
)
Expect(err).NotTo(HaveOccurred())
appConfig = config.NewApplicationConfig(
config.WithSystemState(systemState),
)
configLoader = config.NewModelConfigLoader(tempDir)
modelLoader = model.NewModelLoader(systemState)
app = echo.New()
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
})
AfterEach(func() {
os.RemoveAll(tempDir)
})
Context("GET /api/models/config-metadata", func() {
It("should return section index when no section param", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp).To(HaveKey("hint"))
Expect(resp).To(HaveKey("sections"))
sections, ok := resp["sections"].([]any)
Expect(ok).To(BeTrue())
Expect(sections).NotTo(BeEmpty())
// Verify known section IDs are present
ids := make([]string, len(sections))
for i, s := range sections {
sec := s.(map[string]any)
Expect(sec).To(HaveKey("id"))
Expect(sec).To(HaveKey("label"))
Expect(sec).To(HaveKey("url"))
ids[i] = sec["id"].(string)
}
Expect(ids).To(ContainElements("general", "parameters"))
})
It("should return all fields when section=all", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=all", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp).To(HaveKey("fields"))
fields, ok := resp["fields"].([]any)
Expect(ok).To(BeTrue())
Expect(len(fields)).To(BeNumerically(">=", 80))
})
It("should filter by section", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=general", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var fields []map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &fields)).To(Succeed())
Expect(fields).NotTo(BeEmpty())
for _, f := range fields {
Expect(f["section"]).To(Equal("general"))
}
})
It("should return 404 for unknown section", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=nonexistent", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNotFound))
})
})
Context("GET /api/models/config-metadata/autocomplete/:provider", func() {
It("should return values for backends provider", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/backends", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp).To(HaveKey("values"))
})
It("should return model names for models provider", func() {
// Seed a model config
seedConfig := `name: test-model
backend: llama-cpp
`
Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/models", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
values, ok := resp["values"].([]any)
Expect(ok).To(BeTrue())
Expect(values).To(ContainElement("test-model"))
})
It("should return 404 for unknown provider", func() {
req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/unknown", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNotFound))
})
})
Context("PATCH /api/models/config-json/:name", func() {
It("should return 404 for nonexistent model", func() {
body := bytes.NewBufferString(`{"backend": "bar"}`)
req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/nonexistent", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNotFound))
})
It("should return 400 for empty body", func() {
// Seed a model config
seedConfig := `name: test-model
backend: llama-cpp
`
Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", nil)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusBadRequest))
})
It("should return 400 for invalid JSON", func() {
seedConfig := `name: test-model
backend: llama-cpp
`
Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
body := bytes.NewBufferString(`not json`)
req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusBadRequest))
})
It("should merge a field update and persist to disk", func() {
seedConfig := `name: test-model
backend: llama-cpp
`
configPath := filepath.Join(tempDir, "test-model.yaml")
Expect(os.WriteFile(configPath, []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
body := bytes.NewBufferString(`{"backend": "vllm"}`)
req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp["success"]).To(BeTrue())
// Verify the reloaded config has the updated value
updatedConfig, exists := configLoader.GetModelConfig("test-model")
Expect(exists).To(BeTrue())
Expect(updatedConfig.Backend).To(Equal("vllm"))
// Verify the file on disk was updated
data, err := os.ReadFile(configPath)
Expect(err).NotTo(HaveOccurred())
Expect(string(data)).To(ContainSubstring("vllm"))
})
})
})

View File

@@ -13,7 +13,6 @@ import (
// DetectionEndpoint is the LocalAI Detection endpoint https://localai.io/docs/api-reference/detection
// @Summary Detects objects in the input image.
// @Tags detection
// @Param request body schema.DetectionRequest true "query params"
// @Success 200 {object} schema.DetectionResponse "Response"
// @Router /v1/detection [post]

View File

@@ -40,7 +40,6 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGaller
// GetOpStatusEndpoint returns the job status
// @Summary Returns the job status
// @Tags models
// @Success 200 {object} galleryop.OpStatus "Response"
// @Router /models/jobs/{uuid} [get]
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -55,7 +54,6 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
// GetAllStatusEndpoint returns all the jobs status progress
// @Summary Returns all the jobs status progress
// @Tags models
// @Success 200 {object} map[string]galleryop.OpStatus "Response"
// @Router /models/jobs [get]
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -66,7 +64,6 @@ func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc
// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
// @Summary Install models to LocalAI.
// @Tags models
// @Param request body GalleryModel true "query params"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/apply [post]
@@ -96,7 +93,6 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() echo.Handler
// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
// @Summary delete models to LocalAI.
// @Tags models
// @Param name path string true "Model name"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/delete/{name} [post]
@@ -122,8 +118,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() echo.Handle
// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
// @Summary List installable models.
// @Tags models
// @Success 200 {object} []gallery.Metadata "Response"
// @Success 200 {object} []gallery.GalleryModel "Response"
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState *system.SystemState) echo.HandlerFunc {
return func(c echo.Context) error {
@@ -154,7 +149,6 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
// @Summary List all Galleries
// @Tags models
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [get]
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!

View File

@@ -16,7 +16,6 @@ import (
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
//
// @Summary Get TokenMetrics for Active Slot.
// @Tags tokenize
// @Accept json
// @Produce audio/x-wav
// @Success 200 {string} binary "generated audio/wav file"

View File

@@ -53,7 +53,6 @@ type MCPErrorEvent struct {
// which handles MCP tool injection and server-side execution.
// Both streaming and non-streaming modes use standard OpenAI response format.
// @Summary MCP chat completions with automatic tool execution
// @Tags mcp
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/mcp/chat/completions [post]

View File

@@ -10,9 +10,7 @@ import (
// LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
// @Summary Prometheus metrics endpoint
// @Tags monitoring
// @Produce text/plain
// @Success 200 {string} string "Prometheus metrics"
// @Param request body config.Gallery true "Gallery details"
// @Router /metrics [get]
func LocalAIMetricsEndpoint() echo.HandlerFunc {
return echo.WrapHandler(promhttp.Handler())

View File

@@ -9,7 +9,6 @@ import (
// ShowP2PNodes returns the P2P Nodes
// @Summary Returns available P2P nodes
// @Tags p2p
// @Success 200 {object} []schema.P2PNodesResponse "Response"
// @Router /api/p2p [get]
func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {
@@ -25,7 +24,6 @@ func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {
// ShowP2PToken returns the P2P token
// @Summary Show the P2P token
// @Tags p2p
// @Success 200 {string} string "Response"
// @Router /api/p2p/token [get]
func ShowP2PToken(appConfig *config.ApplicationConfig) echo.HandlerFunc {

View File

@@ -9,7 +9,6 @@ import (
// SystemInformations returns the system informations
// @Summary Show the LocalAI instance information
// @Tags monitoring
// @Success 200 {object} schema.SystemInformationResponse "Response"
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {

View File

@@ -11,7 +11,6 @@ import (
// TokenizeEndpoint exposes a REST API to tokenize the content
// @Summary Tokenize the input.
// @Tags tokenize
// @Param request body schema.TokenizeRequest true "Request"
// @Success 200 {object} schema.TokenizeResponse "Response"
// @Router /v1/tokenize [post]

View File

@@ -1,59 +0,0 @@
package localai
import (
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/trace"
)
// GetAPITracesEndpoint returns all API request/response traces
// @Summary List API request/response traces
// @Description Returns captured API exchange traces (request/response pairs) in reverse chronological order
// @Tags monitoring
// @Produce json
// @Success 200 {object} map[string]any "Traced API exchanges"
// @Router /api/traces [get]
func GetAPITracesEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
return c.JSON(200, middleware.GetTraces())
}
}
// ClearAPITracesEndpoint clears all API traces
// @Summary Clear API traces
// @Description Removes all captured API request/response traces from the buffer
// @Tags monitoring
// @Success 204 "Traces cleared"
// @Router /api/traces/clear [post]
func ClearAPITracesEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
middleware.ClearTraces()
return c.NoContent(204)
}
}
// GetBackendTracesEndpoint returns all backend operation traces
// @Summary List backend operation traces
// @Description Returns captured backend traces (LLM calls, embeddings, TTS, etc.) in reverse chronological order
// @Tags monitoring
// @Produce json
// @Success 200 {object} map[string]any "Backend operation traces"
// @Router /api/backend-traces [get]
func GetBackendTracesEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
return c.JSON(200, trace.GetBackendTraces())
}
}
// ClearBackendTracesEndpoint clears all backend traces
// @Summary Clear backend traces
// @Description Removes all captured backend operation traces from the buffer
// @Tags monitoring
// @Success 204 "Traces cleared"
// @Router /api/backend-traces/clear [post]
func ClearBackendTracesEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
trace.ClearBackendTraces()
return c.NoContent(204)
}
}

View File

@@ -1,55 +0,0 @@
package localai_test
import (
"net/http"
"net/http/httptest"
"github.com/labstack/echo/v4"
. "github.com/mudler/LocalAI/core/http/endpoints/localai"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Traces Endpoints", func() {
var app *echo.Echo
BeforeEach(func() {
app = echo.New()
app.GET("/api/traces", GetAPITracesEndpoint())
app.POST("/api/traces/clear", ClearAPITracesEndpoint())
app.GET("/api/backend-traces", GetBackendTracesEndpoint())
app.POST("/api/backend-traces/clear", ClearBackendTracesEndpoint())
})
It("should return API traces", func() {
req := httptest.NewRequest(http.MethodGet, "/api/traces", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("should clear API traces", func() {
req := httptest.NewRequest(http.MethodPost, "/api/traces/clear", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNoContent))
})
It("should return backend traces", func() {
req := httptest.NewRequest(http.MethodGet, "/api/backend-traces", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("should clear backend traces", func() {
req := httptest.NewRequest(http.MethodPost, "/api/backend-traces/clear", nil)
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNoContent))
})
})

View File

@@ -17,7 +17,6 @@ import (
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
//
// @Summary Generates audio from the input text.
// @Tags audio
// @Accept json
// @Produce audio/x-wav
// @Param request body schema.TTSRequest true "query params"

View File

@@ -12,7 +12,6 @@ import (
// VADEndpoint is Voice-Activation-Detection endpoint
// @Summary Detect voice fragments in an audio stream
// @Tags audio
// @Accept json
// @Param request body schema.VADRequest true "query params"
// @Success 200 {object} proto.VADResponse "Response"

View File

@@ -62,7 +62,6 @@ func downloadFile(url string) (string, error) {
*/
// VideoEndpoint
// @Summary Creates a video given a prompt.
// @Tags video
// @Param request body schema.VideoRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /video [post]

View File

@@ -1,145 +0,0 @@
package localai
import (
"context"
"fmt"
"net/http"
"path/filepath"
"strings"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/vram"
)
type vramEstimateRequest struct {
Model string `json:"model"` // model name (must be installed)
ContextSize uint32 `json:"context_size,omitempty"` // context length to estimate for (default 8192)
GPULayers int `json:"gpu_layers,omitempty"` // number of layers to offload to GPU (0 = all)
KVQuantBits int `json:"kv_quant_bits,omitempty"` // KV cache quantization bits (0 = fp16)
}
type vramEstimateResponse struct {
vram.EstimateResult
ContextNote string `json:"context_note,omitempty"` // note when context_size was defaulted
ModelMaxContext uint64 `json:"model_max_context,omitempty"` // model's trained maximum context length
}
// resolveModelURI converts a relative model path to a file:// URI so the
// size resolver can stat it on disk. URIs that already have a scheme are
// returned unchanged.
func resolveModelURI(uri, modelsPath string) string {
if strings.Contains(uri, "://") {
return uri
}
return "file://" + filepath.Join(modelsPath, uri)
}
// addWeightFile appends a resolved weight file to files and tracks the first GGUF.
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) {
if !vram.IsWeightFile(uri) {
return
}
resolved := resolveModelURI(uri, modelsPath)
if seen[resolved] {
return
}
seen[resolved] = true
*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
if *firstGGUF == "" && vram.IsGGUF(uri) {
*firstGGUF = resolved
}
}
// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
// installed model configuration. For uninstalled models (gallery URLs), use
// the gallery-level estimates in /api/models instead.
// @Summary Estimate VRAM usage for a model
// @Description Estimates VRAM based on model weight files, context size, and GPU layers
// @Tags config
// @Accept json
// @Produce json
// @Param request body vramEstimateRequest true "VRAM estimation parameters"
// @Success 200 {object} vramEstimateResponse "VRAM estimate"
// @Router /api/models/vram-estimate [post]
func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
var req vramEstimateRequest
if err := c.Bind(&req); err != nil {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"})
}
if req.Model == "" {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
}
modelConfig, exists := cl.GetModelConfig(req.Model)
if !exists {
return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
}
modelsPath := appConfig.SystemState.Model.ModelsPath
var files []vram.FileInput
var firstGGUF string
seen := make(map[string]bool)
for _, f := range modelConfig.DownloadFiles {
addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen)
}
if modelConfig.Model != "" {
addWeightFile(modelConfig.Model, modelsPath, &files, &firstGGUF, seen)
}
if modelConfig.MMProj != "" {
addWeightFile(modelConfig.MMProj, modelsPath, &files, &firstGGUF, seen)
}
if len(files) == 0 {
return c.JSON(http.StatusOK, map[string]any{
"message": "no weight files found for estimation",
})
}
contextDefaulted := false
opts := vram.EstimateOptions{
ContextLength: req.ContextSize,
GPULayers: req.GPULayers,
KVQuantBits: req.KVQuantBits,
}
if opts.ContextLength == 0 {
if modelConfig.ContextSize != nil {
opts.ContextLength = uint32(*modelConfig.ContextSize)
} else {
opts.ContextLength = 8192
contextDefaulted = true
}
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
}
resp := vramEstimateResponse{EstimateResult: result}
// When context was defaulted to 8192, read the GGUF metadata to report
// the model's trained maximum context length so callers know the estimate
// may be conservative.
if contextDefaulted && firstGGUF != "" {
ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF)
if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
resp.ModelMaxContext = ggufMeta.MaximumContextLength
resp.ContextNote = fmt.Sprintf(
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
ggufMeta.MaximumContextLength,
)
}
}
return c.JSON(http.StatusOK, resp)
}
}

View File

@@ -1,133 +0,0 @@
package localai_test
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/pkg/system"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("VRAM Estimate Endpoint", func() {
var (
app *echo.Echo
tempDir string
configLoader *config.ModelConfigLoader
appConfig *config.ApplicationConfig
)
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "vram-test-*")
Expect(err).NotTo(HaveOccurred())
systemState, err := system.GetSystemState(
system.WithModelPath(tempDir),
)
Expect(err).NotTo(HaveOccurred())
appConfig = config.NewApplicationConfig(
config.WithSystemState(systemState),
)
configLoader = config.NewModelConfigLoader(tempDir)
app = echo.New()
app.POST("/api/models/vram-estimate", VRAMEstimateEndpoint(configLoader, appConfig))
})
AfterEach(func() {
os.RemoveAll(tempDir)
})
It("should return 400 for invalid request body", func() {
body := bytes.NewBufferString(`not json`)
req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusBadRequest))
})
It("should return 400 when model name is missing", func() {
body := bytes.NewBufferString(`{"context_size": 4096}`)
req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusBadRequest))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp["error"]).To(ContainSubstring("model name is required"))
})
It("should return 404 when model config does not exist", func() {
body := bytes.NewBufferString(`{"model": "nonexistent"}`)
req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusNotFound))
})
It("should return no-weight-files message when model has no weight files", func() {
seedConfig := "name: test-model\nbackend: llama-cpp\n"
Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
body := bytes.NewBufferString(`{"model": "test-model"}`)
req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
Expect(resp["message"]).To(ContainSubstring("no weight files"))
})
It("should return an estimate for a model with a weight file on disk", func() {
// Create a dummy GGUF file (not valid GGUF, but the size resolver
// will stat it and Estimate falls back to size-only estimation).
dummyData := make([]byte, 1024*1024) // 1 MiB
Expect(os.WriteFile(filepath.Join(tempDir, "model.gguf"), dummyData, 0644)).To(Succeed())
seedConfig := "name: test-model\nbackend: llama-cpp\nparameters:\n model: model.gguf\n"
Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
body := bytes.NewBufferString(`{"model": "test-model", "context_size": 4096}`)
req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
// The response should have non-zero size and vram estimates.
// JSON numbers unmarshal as float64.
sizeBytes, ok := resp["sizeBytes"].(float64)
Expect(ok).To(BeTrue(), "sizeBytes should be a number, got: %v (response: %s)", resp["sizeBytes"], rec.Body.String())
Expect(sizeBytes).To(BeNumerically(">", 0))
vramBytes, ok := resp["vramBytes"].(float64)
Expect(ok).To(BeTrue(), "vramBytes should be a number")
Expect(vramBytes).To(BeNumerically(">", 0))
Expect(resp["sizeDisplay"]).NotTo(BeEmpty())
Expect(resp["vramDisplay"]).NotTo(BeEmpty())
})
})

View File

@@ -55,7 +55,6 @@ func mergeToolCallDeltas(existing []schema.ToolCall, deltas []schema.ToolCall) [
// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
// @Summary Generate a chat completions for a given prompt and model.
// @Tags inference
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
@@ -82,23 +81,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
extractor := reason.NewReasoningExtractor(thinkingStartToken, config.ReasoningConfig)
_, _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
var reasoningDelta, contentDelta string
// Always keep the Go-side extractor in sync with raw tokens so it
// can serve as fallback for backends without an autoparser (e.g. vLLM).
goReasoning, goContent := extractor.ProcessToken(s)
// When C++ autoparser chat deltas are available, prefer them — they
// handle model-specific formats (Gemma 4, etc.) without Go-side tags.
// Otherwise fall back to Go-side extraction.
if tokenUsage.HasChatDeltaContent() {
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
contentDelta = cd
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
} else {
reasoningDelta = goReasoning
contentDelta = goContent
}
reasoningDelta, contentDelta := extractor.ProcessToken(s)
usage := schema.OpenAIUsage{
PromptTokens: tokenUsage.Prompt,
@@ -150,19 +133,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
_, tokenUsage, chatDeltas, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
result += s
var reasoningDelta, contentDelta string
goReasoning, goContent := extractor.ProcessToken(s)
if usage.HasChatDeltaContent() {
rawReasoning, cd := usage.ChatDeltaReasoningAndContent()
contentDelta = cd
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
} else {
reasoningDelta = goReasoning
contentDelta = goContent
}
reasoningDelta, contentDelta := extractor.ProcessToken(s)
// Emit reasoning deltas in their own SSE chunks before any tool-call chunks
// (OpenAI spec: reasoning and tool_calls never share a delta)
@@ -992,24 +963,6 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return err
}
// For non-tool requests: prefer C++ autoparser chat deltas over
// Go-side tag extraction (which can mangle output when thinkingStartToken
// differs from the model's actual reasoning tags, e.g. Gemma 4).
if !shouldUseFn && len(chatDeltas) > 0 {
deltaContent := functions.ContentFromChatDeltas(chatDeltas)
deltaReasoning := functions.ReasoningFromChatDeltas(chatDeltas)
if deltaContent != "" || deltaReasoning != "" {
xlog.Debug("[ChatDeltas] non-SSE no-tools: overriding result with C++ autoparser deltas",
"content_len", len(deltaContent), "reasoning_len", len(deltaReasoning))
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &deltaContent}
if deltaReasoning != "" {
message.Reasoning = &deltaReasoning
}
result = []schema.Choice{{FinishReason: &stopReason, Index: 0, Message: message}}
}
}
// Tool parsing is deferred here (only when shouldUseFn) so chat deltas are available
if shouldUseFn {
var funcResults []functions.FuncCallResults

View File

@@ -21,7 +21,6 @@ import (
// CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
// @Summary Generate completions for a given prompt and model.
// @Tags inference
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/completions [post]

View File

@@ -20,7 +20,6 @@ import (
// EditEndpoint is the OpenAI edit API endpoint
// @Summary OpenAI edit endpoint
// @Tags inference
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/edits [post]

View File

@@ -42,7 +42,6 @@ func embeddingItem(embeddings []float32, index int, encodingFormat string) schem
// EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
// @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
// @Tags embeddings
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/embeddings [post]

View File

@@ -68,7 +68,6 @@ func downloadFile(url string) (string, error) {
*/
// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
// @Summary Creates an image given a prompt.
// @Tags images
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/images/generations [post]

View File

@@ -398,124 +398,5 @@ var _ = Describe("ComputeChoices", func() {
Expect(choices).To(HaveLen(1))
Expect(streamedTokens).To(Equal([]string{"Hello", " world"}))
})
It("should pass chat deltas through TokenUsage during streaming", func() {
var receivedDeltas [][]*pb.ChatDelta
backend.ModelInferenceFunc = func(
ctx context.Context, s string, messages schema.Messages,
images, videos, audios []string,
loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader,
o *config.ApplicationConfig,
tokenCallback func(string, backend.TokenUsage) bool,
tools, toolChoice string,
logprobs, topLogprobs *int,
logitBias map[string]float64,
metadata map[string]string,
) (func() (backend.LLMResponse, error), error) {
predFunc := func() (backend.LLMResponse, error) {
if tokenCallback != nil {
// Simulate C++ autoparser sending reasoning in chat deltas
tokenCallback("<|channel>thought\nthinking\n<channel|>", backend.TokenUsage{
Prompt: 5,
ChatDeltas: []*pb.ChatDelta{
{ReasoningContent: "thinking"},
},
})
tokenCallback("Hello!", backend.TokenUsage{
Prompt: 5, Completion: 3,
ChatDeltas: []*pb.ChatDelta{
{Content: "Hello!"},
},
})
}
return backend.LLMResponse{
Response: "<|channel>thought\nthinking\n<channel|>Hello!",
Usage: backend.TokenUsage{Prompt: 5, Completion: 3},
ChatDeltas: []*pb.ChatDelta{
{ReasoningContent: "thinking"},
{Content: "Hello!"},
},
}, nil
}
return predFunc, nil
}
choices, _, deltas, err := ComputeChoices(
makeReq(), "test", cfg, nil, appCfg, nil,
func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s})
},
func(s string, usage backend.TokenUsage) bool {
// Capture chat deltas received per-chunk
if len(usage.ChatDeltas) > 0 {
receivedDeltas = append(receivedDeltas, usage.ChatDeltas)
}
return true
},
)
Expect(err).ToNot(HaveOccurred())
Expect(choices).To(HaveLen(1))
// Verify per-chunk deltas were received during streaming
Expect(receivedDeltas).To(HaveLen(2))
Expect(receivedDeltas[0][0].ReasoningContent).To(Equal("thinking"))
Expect(receivedDeltas[1][0].Content).To(Equal("Hello!"))
// Verify final accumulated deltas are also returned
Expect(deltas).To(HaveLen(2))
Expect(deltas[0].ReasoningContent).To(Equal("thinking"))
Expect(deltas[1].Content).To(Equal("Hello!"))
})
It("should prefer chat deltas over raw text when HasChatDeltaContent is true", func() {
// Verify that the callback can distinguish between
// chunks with and without chat deltas
var withDeltas, withoutDeltas int
backend.ModelInferenceFunc = func(
ctx context.Context, s string, messages schema.Messages,
images, videos, audios []string,
loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader,
o *config.ApplicationConfig,
tokenCallback func(string, backend.TokenUsage) bool,
tools, toolChoice string,
logprobs, topLogprobs *int,
logitBias map[string]float64,
metadata map[string]string,
) (func() (backend.LLMResponse, error), error) {
predFunc := func() (backend.LLMResponse, error) {
if tokenCallback != nil {
// Chunk with chat deltas (C++ autoparser active)
tokenCallback("raw-text", backend.TokenUsage{
ChatDeltas: []*pb.ChatDelta{{Content: "parsed-content"}},
})
// Chunk without chat deltas (fallback)
tokenCallback("fallback-text", backend.TokenUsage{})
}
return backend.LLMResponse{Response: "raw-textfallback-text"}, nil
}
return predFunc, nil
}
_, _, _, err := ComputeChoices(
makeReq(), "test", cfg, nil, appCfg, nil,
func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s})
},
func(s string, usage backend.TokenUsage) bool {
if usage.HasChatDeltaContent() {
withDeltas++
r, c := usage.ChatDeltaReasoningAndContent()
Expect(c).To(Equal("parsed-content"))
Expect(r).To(BeEmpty())
} else {
withoutDeltas++
}
return true
},
)
Expect(err).ToNot(HaveOccurred())
Expect(withDeltas).To(Equal(1))
Expect(withoutDeltas).To(Equal(1))
})
})
})

View File

@@ -12,7 +12,6 @@ import (
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
// @Summary List and describe the various models available in the API.
// @Tags models
// @Success 200 {object} schema.ModelsDataResponse "Response"
// @Router /v1/models [get]
func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, db ...*gorm.DB) echo.HandlerFunc {

View File

@@ -20,7 +20,6 @@ import (
// TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
// @Summary Transcribes audio into the input language.
// @Tags audio
// @accept multipart/form-data
// @Param model formData string true "model"
// @Param file formData file true "file"

View File

@@ -25,7 +25,6 @@ import (
// ResponsesEndpoint is the Open Responses API endpoint
// https://www.openresponses.org/specification
// @Summary Create a response using the Open Responses API
// @Tags inference
// @Param request body schema.OpenResponsesRequest true "Request body"
// @Success 200 {object} schema.ORResponseResource "Response"
// @Router /v1/responses [post]
@@ -1820,17 +1819,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
// If no tool calls detected yet, handle reasoning and text
if !inToolCallMode {
var reasoningDelta, contentDelta string
goReasoning, goContent := extractor.ProcessToken(token)
if tokenUsage.HasChatDeltaContent() {
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
contentDelta = cd
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
} else {
reasoningDelta = goReasoning
contentDelta = goContent
}
reasoningDelta, contentDelta := extractor.ProcessToken(token)
// Handle reasoning item
if extractor.Reasoning() != "" {
@@ -2349,18 +2338,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
// Stream text deltas with reasoning extraction
tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
accumulatedText += token
var reasoningDelta, contentDelta string
goReasoning, goContent := extractor.ProcessToken(token)
if tokenUsage.HasChatDeltaContent() {
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
contentDelta = cd
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
} else {
reasoningDelta = goReasoning
contentDelta = goContent
}
reasoningDelta, contentDelta := extractor.ProcessToken(token)
// Handle reasoning item
if extractor.Reasoning() != "" {
@@ -2953,7 +2931,6 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T
// GetResponseEndpoint returns a handler for GET /responses/:id
// This endpoint is used for polling background responses or resuming streaming
// @Summary Get a response by ID
// @Tags inference
// @Description Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.
// @Param id path string true "Response ID"
// @Param stream query string false "Set to 'true' to resume streaming"
@@ -3095,7 +3072,6 @@ func handleStreamResume(c echo.Context, store *ResponseStore, responseID string,
// CancelResponseEndpoint returns a handler for POST /responses/:id/cancel
// This endpoint cancels a background response if it's still in progress
// @Summary Cancel a response
// @Tags inference
// @Description Cancel a background response if it's still in progress
// @Param id path string true "Response ID"
// @Success 200 {object} schema.ORResponseResource "Response"

View File

@@ -2,8 +2,8 @@ import { Navigate } from 'react-router-dom'
import { useAuth } from '../context/AuthContext'
export default function RequireAuth({ children }) {
const { authEnabled, staticApiKeyRequired, user, loading } = useAuth()
const { authEnabled, user, loading } = useAuth()
if (loading) return null
if ((authEnabled || staticApiKeyRequired) && !user) return <Navigate to="/login" replace />
if (authEnabled && !user) return <Navigate to="/login" replace />
return children
}

View File

@@ -7,7 +7,6 @@ export function AuthProvider({ children }) {
const [state, setState] = useState({
loading: true,
authEnabled: false,
staticApiKeyRequired: false,
user: null,
permissions: {},
})
@@ -21,13 +20,12 @@ export function AuthProvider({ children }) {
setState({
loading: false,
authEnabled: data.authEnabled || false,
staticApiKeyRequired: data.staticApiKeyRequired || false,
user,
permissions,
})
})
.catch(() => {
setState({ loading: false, authEnabled: false, staticApiKeyRequired: false, user: null, permissions: {} })
setState({ loading: false, authEnabled: false, user: null, permissions: {} })
})
}
@@ -47,20 +45,17 @@ export function AuthProvider({ children }) {
const refresh = () => fetchStatus()
const noAuthRequired = !state.authEnabled && !state.staticApiKeyRequired
const hasFeature = (name) => {
if (state.user?.role === 'admin' || noAuthRequired) return true
if (state.user?.role === 'admin' || !state.authEnabled) return true
return !!state.permissions[name]
}
const value = {
loading: state.loading,
authEnabled: state.authEnabled,
staticApiKeyRequired: state.staticApiKeyRequired,
user: state.user,
permissions: state.permissions,
isAdmin: state.user?.role === 'admin' || noAuthRequired,
isAdmin: state.user?.role === 'admin' || !state.authEnabled,
hasFeature,
logout,
refresh,

View File

@@ -2,9 +2,9 @@ import { useState, useCallback, useRef, useEffect } from 'react'
import { API_CONFIG } from '../utils/config'
import { apiUrl } from '../utils/basePath'
const thinkingTagRegex = /<thinking>([\s\S]*?)<\/thinking>|<think>([\s\S]*?)<\/think>|<\|channel>thought([\s\S]*?)<channel\|>/g
const openThinkTagRegex = /<thinking>|<think>|<\|channel>thought/
const closeThinkTagRegex = /<\/thinking>|<\/think>|<channel\|>/
const thinkingTagRegex = /<thinking>([\s\S]*?)<\/thinking>|<think>([\s\S]*?)<\/think>/g
const openThinkTagRegex = /<thinking>|<think>/
const closeThinkTagRegex = /<\/thinking>|<\/think>/
async function extractHttpError(response) {
let errorMsg = `HTTP ${response.status}`
@@ -23,7 +23,7 @@ function extractThinking(text) {
thinkingTagRegex.lastIndex = 0
while ((match = thinkingTagRegex.exec(text)) !== null) {
regularContent += text.slice(lastIdx, match.index)
thinkingContent += match[1] || match[2] || match[3] || ''
thinkingContent += match[1] || match[2] || ''
lastIdx = match.index + match[0].length
}
regularContent += text.slice(lastIdx)
@@ -578,9 +578,9 @@ export function useChat(initialModel = '') {
}
if (insideThinkTag) {
const lastOpen = Math.max(rawContent.lastIndexOf('<thinking>'), rawContent.lastIndexOf('<think>'), rawContent.lastIndexOf('<|channel>thought'))
const lastOpen = Math.max(rawContent.lastIndexOf('<thinking>'), rawContent.lastIndexOf('<think>'))
if (lastOpen >= 0) {
const partial = rawContent.slice(lastOpen).replace(/<thinking>|<think>|<\|channel>thought/, '')
const partial = rawContent.slice(lastOpen).replace(/<thinking>|<think>/, '')
setStreamingReasoning(partial)
const beforeThink = rawContent.slice(0, lastOpen)
const { regularContent: contentBeforeThink } = extractThinking(beforeThink)

View File

@@ -8,7 +8,7 @@ export default function Login() {
const navigate = useNavigate()
const { code: urlInviteCode } = useParams()
const [searchParams] = useSearchParams()
const { authEnabled, staticApiKeyRequired, user, loading: authLoading, refresh } = useAuth()
const { authEnabled, user, loading: authLoading, refresh } = useAuth()
const [providers, setProviders] = useState([])
const [hasUsers, setHasUsers] = useState(true)
const [registrationMode, setRegistrationMode] = useState('open')
@@ -66,7 +66,7 @@ export default function Login() {
// Redirect if auth is disabled or user is already logged in
useEffect(() => {
if (!authLoading && ((!authEnabled && !staticApiKeyRequired) || user)) {
if (!authLoading && (!authEnabled || user)) {
navigate('/app', { replace: true })
}
}, [authLoading, authEnabled, user, navigate])
@@ -176,40 +176,6 @@ export default function Login() {
if (authLoading || statusLoading) return null
// Legacy API key-only mode: show a simplified login with just the token input
if (staticApiKeyRequired && !authEnabled) {
return (
<div className="login-page">
<div className="card login-card">
<div className="login-header">
<img src={apiUrl('/static/logo.png')} alt="LocalAI" className="login-logo" />
<p className="login-subtitle">Enter your API key to continue</p>
</div>
{error && (
<div className="login-alert login-alert-error">{error}</div>
)}
<form onSubmit={handleTokenLogin}>
<div className="form-group">
<input
className="input"
type="password"
value={token}
onChange={(e) => { setToken(e.target.value); setError('') }}
placeholder="Enter API key..."
autoFocus
/>
</div>
<button type="submit" className="btn btn-primary login-btn-full" disabled={submitting}>
{submitting ? 'Signing in...' : 'Sign In'}
</button>
</form>
</div>
</div>
)
}
const hasGitHub = providers.includes('github')
const hasOIDC = providers.includes('oidc')
const hasLocal = providers.includes('local')

View File

@@ -157,11 +157,10 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
}
resp := map[string]any{
"authEnabled": authEnabled,
"staticApiKeyRequired": !authEnabled && len(appConfig.ApiKeys) > 0,
"providers": providers,
"hasUsers": hasUsers,
"registrationMode": registrationMode,
"authEnabled": authEnabled,
"providers": providers,
"hasUsers": hasUsers,
"registrationMode": registrationMode,
}
// Include current user if authenticated

View File

@@ -45,10 +45,9 @@ func newTestAuthApp(db *gorm.DB, appConfig *config.ApplicationConfig) *echo.Echo
}
resp := map[string]any{
"authEnabled": authEnabled,
"staticApiKeyRequired": !authEnabled && len(appConfig.ApiKeys) > 0,
"providers": providers,
"hasUsers": hasUsers,
"authEnabled": authEnabled,
"providers": providers,
"hasUsers": hasUsers,
}
user := auth.GetUser(c)
@@ -408,29 +407,6 @@ var _ = Describe("Auth Routes", Label("auth"), func() {
json.Unmarshal(rec.Body.Bytes(), &resp)
Expect(resp["hasUsers"]).To(BeFalse())
})
It("returns staticApiKeyRequired=true when no DB but API keys configured", func() {
cfg := config.NewApplicationConfig()
config.WithApiKeys([]string{"test-key-123"})(cfg)
app := newTestAuthApp(nil, cfg)
rec := doAuthRequest(app, "GET", "/api/auth/status", nil)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
json.Unmarshal(rec.Body.Bytes(), &resp)
Expect(resp["authEnabled"]).To(BeFalse())
Expect(resp["staticApiKeyRequired"]).To(BeTrue())
})
It("returns staticApiKeyRequired=false when no DB and no API keys", func() {
app := newTestAuthApp(nil, config.NewApplicationConfig())
rec := doAuthRequest(app, "GET", "/api/auth/status", nil)
Expect(rec.Code).To(Equal(http.StatusOK))
var resp map[string]any
json.Unmarshal(rec.Body.Bytes(), &resp)
Expect(resp["staticApiKeyRequired"]).To(BeFalse())
})
})
Context("POST /api/auth/logout", func() {

View File

@@ -29,9 +29,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
mcpJobsMw echo.MiddlewareFunc,
mcpMw echo.MiddlewareFunc) {
router.GET("/swagger/*", echoswagger.EchoWrapHandler(func(c *echoswagger.Config) {
c.URLs = []string{"doc.json"}
}))
router.GET("/swagger/*", echoswagger.WrapHandler) // default
// LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint {
@@ -126,19 +124,6 @@ func RegisterLocalAIRoutes(router *echo.Echo,
router.GET("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService), adminMiddleware)
router.POST("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService), adminMiddleware)
// Traces and backend logs (monitoring)
router.GET("/api/traces", localai.GetAPITracesEndpoint(), adminMiddleware)
router.POST("/api/traces/clear", localai.ClearAPITracesEndpoint(), adminMiddleware)
router.GET("/api/backend-traces", localai.GetBackendTracesEndpoint(), adminMiddleware)
router.POST("/api/backend-traces/clear", localai.ClearBackendTracesEndpoint(), adminMiddleware)
// Backend logs — standalone only (distributed mode uses node-proxied routes)
if !appConfig.Distributed.Enabled {
router.GET("/api/backend-logs", localai.ListBackendLogsEndpoint(ml), adminMiddleware)
router.GET("/api/backend-logs/:modelId", localai.GetBackendLogsEndpoint(ml), adminMiddleware)
router.POST("/api/backend-logs/:modelId/clear", localai.ClearBackendLogsEndpoint(ml), adminMiddleware)
router.GET("/ws/backend-logs/:modelId", localai.BackendLogsWebSocketEndpoint(ml), adminMiddleware)
}
// p2p
router.GET("/api/p2p", localai.ShowP2PNodes(appConfig), adminMiddleware)
router.GET("/api/p2p/token", localai.ShowP2PToken(appConfig), adminMiddleware)
@@ -149,127 +134,6 @@ func RegisterLocalAIRoutes(router *echo.Echo,
}{Version: internal.PrintableVersion()})
})
// Agent discovery endpoint
router.GET("/.well-known/localai.json", func(c echo.Context) error {
monitoringRoutes := map[string]string{
"metrics": "/metrics",
"backend_monitor": "/backend/monitor",
"backend_shutdown": "/backend/shutdown",
"system": "/system",
"version": "/version",
"traces": "/api/traces",
"traces_clear": "/api/traces/clear",
"backend_traces": "/api/backend-traces",
"backend_traces_clear": "/api/backend-traces/clear",
}
if !appConfig.Distributed.Enabled {
monitoringRoutes["backend_logs"] = "/api/backend-logs"
monitoringRoutes["backend_logs_model"] = "/api/backend-logs/:modelId"
monitoringRoutes["backend_logs_clear"] = "/api/backend-logs/:modelId/clear"
monitoringRoutes["backend_logs_ws"] = "/ws/backend-logs/:modelId"
} else {
monitoringRoutes["node_backend_logs"] = "/api/nodes/:id/backend-logs"
monitoringRoutes["node_backend_logs_model"] = "/api/nodes/:id/backend-logs/:modelId"
monitoringRoutes["node_backend_logs_ws"] = "/ws/nodes/:id/backend-logs/:modelId"
}
return c.JSON(200, map[string]any{
"version": internal.PrintableVersion(),
// Flat endpoint list for backwards compatibility
"endpoints": map[string]any{
"models": "/v1/models",
"chat_completions": "/v1/chat/completions",
"completions": "/v1/completions",
"embeddings": "/v1/embeddings",
"config_metadata": "/api/models/config-metadata",
"config_json": "/api/models/config-json/:name",
"config_patch": "/api/models/config-json/:name",
"autocomplete": "/api/models/config-metadata/autocomplete/:provider",
"vram_estimate": "/api/models/vram-estimate",
"tts": "/tts",
"transcription": "/v1/audio/transcriptions",
"image_generation": "/v1/images/generations",
"swagger": "/swagger/index.html",
"instructions": "/api/instructions",
},
// Categorized endpoint groups for structured discovery
"endpoint_groups": map[string]any{
"openai_compatible": map[string]string{
"models": "/v1/models",
"chat_completions": "/v1/chat/completions",
"completions": "/v1/completions",
"embeddings": "/v1/embeddings",
"transcription": "/v1/audio/transcriptions",
"image_generation": "/v1/images/generations",
},
"config_management": map[string]string{
"config_metadata": "/api/models/config-metadata",
"config_json": "/api/models/config-json/:name",
"config_patch": "/api/models/config-json/:name",
"autocomplete": "/api/models/config-metadata/autocomplete/:provider",
"vram_estimate": "/api/models/vram-estimate",
},
"model_management": map[string]string{
"list_gallery": "/models/available",
"install": "/models/apply",
"delete": "/models/delete/:name",
"edit": "/models/edit/:name",
"import": "/models/import",
"reload": "/models/reload",
},
"ai_functions": map[string]string{
"tts": "/tts",
"vad": "/vad",
"video": "/video",
"detection": "/v1/detection",
"tokenize": "/v1/tokenize",
},
"monitoring": monitoringRoutes,
"mcp": map[string]string{
"chat_completions": "/v1/mcp/chat/completions",
"servers": "/v1/mcp/servers/:model",
"prompts": "/v1/mcp/prompts/:model",
"resources": "/v1/mcp/resources/:model",
},
"p2p": map[string]string{
"nodes": "/api/p2p",
"token": "/api/p2p/token",
},
"agents": map[string]string{
"tasks": "/api/agent/tasks",
"jobs": "/api/agent/jobs",
"execute": "/api/agent/jobs/execute",
},
"settings": map[string]string{
"get": "/api/settings",
"update": "/api/settings",
},
"stores": map[string]string{
"set": "/stores/set",
"get": "/stores/get",
"find": "/stores/find",
"delete": "/stores/delete",
},
"docs": map[string]string{
"swagger": "/swagger/index.html",
"instructions": "/api/instructions",
},
},
"capabilities": map[string]bool{
"config_metadata": true,
"config_patch": true,
"vram_estimate": true,
"mcp": !appConfig.DisableMCP,
"agents": appConfig.AgentPool.Enabled,
"p2p": appConfig.P2PToken != "",
"tracing": true,
},
})
})
// API instructions for agent discovery (no auth — agents should discover these without credentials)
router.GET("/api/instructions", localai.ListAPIInstructionsEndpoint())
router.GET("/api/instructions/:name", localai.GetAPIInstructionEndpoint())
router.GET("/api/features", func(c echo.Context) error {
return c.JSON(200, map[string]bool{
"agents": appConfig.AgentPool.Enabled,

View File

@@ -2,15 +2,41 @@ package routes
import (
"cmp"
"encoding/json"
"fmt"
"net/http"
"net/url"
"slices"
"sync"
"time"
"github.com/gorilla/websocket"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
var backendLogsUpgrader = websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool {
origin := r.Header.Get("Origin")
if origin == "" {
return true // no origin header = same-origin or non-browser
}
u, err := url.Parse(origin)
if err != nil {
return false
}
return u.Host == r.Host
},
}
func RegisterUIRoutes(app *echo.Echo,
cl *config.ModelConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *galleryop.GalleryService,
adminMiddleware echo.MiddlewareFunc) {
@@ -52,4 +78,142 @@ func RegisterUIRoutes(app *echo.Echo,
return c.JSON(200, models)
})
app.GET("/api/traces", func(c echo.Context) error {
return c.JSON(200, middleware.GetTraces())
}, adminMiddleware)
app.POST("/api/traces/clear", func(c echo.Context) error {
middleware.ClearTraces()
return c.NoContent(204)
}, adminMiddleware)
app.GET("/api/backend-traces", func(c echo.Context) error {
return c.JSON(200, trace.GetBackendTraces())
}, adminMiddleware)
app.POST("/api/backend-traces/clear", func(c echo.Context) error {
trace.ClearBackendTraces()
return c.NoContent(204)
}, adminMiddleware)
// Backend logs endpoints — only in standalone mode.
// In distributed mode, backend processes run on workers and logs are
// streamed via /api/nodes/:id/backend-logs and /ws/nodes/:id/backend-logs/:modelId.
if !appConfig.Distributed.Enabled {
app.GET("/api/backend-logs", func(c echo.Context) error {
return c.JSON(200, ml.BackendLogs().ListModels())
}, adminMiddleware)
app.GET("/api/backend-logs/:modelId", func(c echo.Context) error {
modelID := c.Param("modelId")
return c.JSON(200, ml.BackendLogs().GetLines(modelID))
}, adminMiddleware)
app.POST("/api/backend-logs/:modelId/clear", func(c echo.Context) error {
ml.BackendLogs().Clear(c.Param("modelId"))
return c.NoContent(204)
}, adminMiddleware)
// Backend logs WebSocket endpoint for real-time streaming
app.GET("/ws/backend-logs/:modelId", func(c echo.Context) error {
modelID := c.Param("modelId")
ws, err := backendLogsUpgrader.Upgrade(c.Response(), c.Request(), nil)
if err != nil {
return err
}
defer ws.Close()
ws.SetReadLimit(4096)
// Set up ping/pong for keepalive
ws.SetReadDeadline(time.Now().Add(90 * time.Second))
ws.SetPongHandler(func(string) error {
ws.SetReadDeadline(time.Now().Add(90 * time.Second))
return nil
})
conn := &backendLogsConn{Conn: ws}
// Send existing lines as initial batch
existingLines := ml.BackendLogs().GetLines(modelID)
initialMsg := map[string]any{
"type": "initial",
"lines": existingLines,
}
if err := conn.writeJSON(initialMsg); err != nil {
xlog.Debug("WebSocket backend-logs initial write failed", "error", err)
return nil
}
// Subscribe to new lines
lineCh, unsubscribe := ml.BackendLogs().Subscribe(modelID)
defer unsubscribe()
// Handle close from client side
closeCh := make(chan struct{})
go func() {
for {
_, _, err := ws.ReadMessage()
if err != nil {
close(closeCh)
return
}
}
}()
// Ping ticker for keepalive
pingTicker := time.NewTicker(30 * time.Second)
defer pingTicker.Stop()
// Forward new lines to WebSocket
for {
select {
case line, ok := <-lineCh:
if !ok {
return nil
}
lineMsg := map[string]any{
"type": "line",
"line": line,
}
if err := conn.writeJSON(lineMsg); err != nil {
xlog.Debug("WebSocket backend-logs write error", "error", err)
return nil
}
case <-pingTicker.C:
if err := conn.writePing(); err != nil {
return nil
}
case <-closeCh:
return nil
}
}
}, adminMiddleware)
}
}
// backendLogsConn wraps a websocket connection with a mutex for safe concurrent writes
type backendLogsConn struct {
*websocket.Conn
mu sync.Mutex
}
func (c *backendLogsConn) writeJSON(v any) error {
c.mu.Lock()
defer c.mu.Unlock()
c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
data, err := json.Marshal(v)
if err != nil {
return fmt.Errorf("marshal error: %w", err)
}
return c.Conn.WriteMessage(websocket.TextMessage, data)
}
func (c *backendLogsConn) writePing() error {
c.mu.Lock()
defer c.mu.Unlock()
c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
return c.Conn.WriteMessage(websocket.PingMessage, nil)
}

View File

@@ -690,18 +690,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
return c.JSON(http.StatusOK, modelConfig)
}, adminMiddleware)
// Config metadata API - returns field metadata for all ~170 config fields
app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware)
// Autocomplete providers for config fields (dynamic values only)
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
// PATCH config endpoint - partial update using nested JSON merge
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
// VRAM estimation endpoint
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
// Get installed model YAML config for the React model editor
app.GET("/api/models/edit/:name", func(c echo.Context) error {
modelName := c.Param("name")
@@ -1325,4 +1313,3 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
})
}, adminMiddleware)
}

View File

@@ -10,18 +10,21 @@ type Task struct {
Name string `json:"name"` // User-friendly name
Description string `json:"description"` // Optional description
Model string `json:"model"` // Model name (must have MCP config)
Prompt string `json:"prompt"` // Template prompt (supports Go template .param syntax)
Prompt string `json:"prompt"` // Template prompt (supports {{.param}} syntax)
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Enabled bool `json:"enabled"` // Can be disabled without deletion
Cron string `json:"cron,omitempty"` // Optional cron expression
CronParameters map[string]string `json:"cron_parameters,omitempty"` // Parameters to use when executing cron jobs
// Webhook configuration (for notifications).
// Supports multiple webhook endpoints.
// Webhook configuration (for notifications)
// Support multiple webhook endpoints
// Webhooks can handle both success and failure cases using template variables:
// .Job (Job object), .Task (Task object), .Result (if successful),
// .Error (if failed), .Status (job status string).
// - {{.Job}} - Job object with all fields
// - {{.Task}} - Task object
// - {{.Result}} - Job result (if successful)
// - {{.Error}} - Error message (if failed, empty string if successful)
// - {{.Status}} - Job status string
Webhooks []WebhookConfig `json:"webhooks,omitempty"` // Webhook configs for job completion notifications
// Multimedia sources (for cron jobs)
@@ -36,8 +39,13 @@ type WebhookConfig struct {
Method string `json:"method"` // HTTP method (POST, PUT, PATCH) - default: POST
Headers map[string]string `json:"headers,omitempty"` // Custom headers (e.g., Authorization)
PayloadTemplate string `json:"payload_template,omitempty"` // Optional template for payload
// If PayloadTemplate is empty, uses default JSON structure.
// Available template variables: .Job, .Task, .Result, .Error, .Status.
// If PayloadTemplate is empty, uses default JSON structure
// Available template variables:
// - {{.Job}} - Job object with all fields
// - {{.Task}} - Task object
// - {{.Result}} - Job result (if successful)
// - {{.Error}} - Error message (if failed, empty string if successful)
// - {{.Status}} - Job status string
}
// MultimediaSourceConfig represents configuration for fetching multimedia content
@@ -118,9 +126,9 @@ type JobExecutionRequest struct {
// JobExecutionResponse represents the response after creating a job
type JobExecutionResponse struct {
JobID string `json:"job_id"` // unique job identifier
Status string `json:"status"` // initial status (pending)
URL string `json:"url"` // URL to poll for job status
JobID string `json:"job_id"`
Status string `json:"status"`
URL string `json:"url"` // URL to check job status
}
// TasksFile represents the structure of agent_tasks.json

View File

@@ -78,7 +78,7 @@ type AnthropicMessage struct {
// AnthropicContentBlock represents a content block in an Anthropic message
type AnthropicContentBlock struct {
Type string `json:"type"`
Text string `json:"text"`
Text string `json:"text,omitempty"`
Source *AnthropicImageSource `json:"source,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
@@ -116,7 +116,7 @@ type AnthropicUsage struct {
// AnthropicStreamEvent represents a streaming event from the Anthropic API
type AnthropicStreamEvent struct {
Type string `json:"type"`
Index *int `json:"index,omitempty"`
Index int `json:"index,omitempty"`
ContentBlock *AnthropicContentBlock `json:"content_block,omitempty"`
Delta *AnthropicStreamDelta `json:"delta,omitempty"`
Message *AnthropicStreamMessage `json:"message,omitempty"`

View File

@@ -33,31 +33,31 @@ type GalleryResponse struct {
type VideoRequest struct {
BasicModelRequest
Prompt string `json:"prompt" yaml:"prompt"` // text description of the video to generate
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` // things to avoid in the output
StartImage string `json:"start_image" yaml:"start_image"` // URL or base64 of the first frame
EndImage string `json:"end_image" yaml:"end_image"` // URL or base64 of the last frame
Width int32 `json:"width" yaml:"width"` // output width in pixels
Height int32 `json:"height" yaml:"height"` // output height in pixels
NumFrames int32 `json:"num_frames" yaml:"num_frames"` // total number of frames to generate
FPS int32 `json:"fps" yaml:"fps"` // frames per second
Seconds string `json:"seconds,omitempty" yaml:"seconds,omitempty"` // duration in seconds (alternative to num_frames)
Size string `json:"size,omitempty" yaml:"size,omitempty"` // WxH shorthand (e.g. "512x512")
InputReference string `json:"input_reference,omitempty" yaml:"input_reference,omitempty"` // reference image or video URL
Seed int32 `json:"seed" yaml:"seed"` // random seed for reproducibility
CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"` // classifier-free guidance scale
Step int32 `json:"step" yaml:"step"` // number of diffusion steps
ResponseFormat string `json:"response_format" yaml:"response_format"` // output format (url or b64_json)
Prompt string `json:"prompt" yaml:"prompt"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
StartImage string `json:"start_image" yaml:"start_image"`
EndImage string `json:"end_image" yaml:"end_image"`
Width int32 `json:"width" yaml:"width"`
Height int32 `json:"height" yaml:"height"`
NumFrames int32 `json:"num_frames" yaml:"num_frames"`
FPS int32 `json:"fps" yaml:"fps"`
Seconds string `json:"seconds,omitempty" yaml:"seconds,omitempty"`
Size string `json:"size,omitempty" yaml:"size,omitempty"`
InputReference string `json:"input_reference,omitempty" yaml:"input_reference,omitempty"`
Seed int32 `json:"seed" yaml:"seed"`
CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"`
Step int32 `json:"step" yaml:"step"`
ResponseFormat string `json:"response_format" yaml:"response_format"`
}
// @Description TTS request body
type TTSRequest struct {
BasicModelRequest
Input string `json:"input" yaml:"input"` // text input
Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id
Backend string `json:"backend" yaml:"backend"` // backend engine override
Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
Format string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
Input string `json:"input" yaml:"input"` // text input
Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id
Backend string `json:"backend" yaml:"backend"`
Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
Format string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
Stream bool `json:"stream,omitempty" yaml:"stream,omitempty"` // (optional) enable streaming TTS
SampleRate int `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"` // (optional) desired output sample rate
}
@@ -65,7 +65,7 @@ type TTSRequest struct {
// @Description VAD request body
type VADRequest struct {
BasicModelRequest
Audio []float32 `json:"audio" yaml:"audio"` // raw audio samples as float32 PCM
Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
}
type VADSegment struct {
@@ -146,13 +146,13 @@ type SysInfoModel struct {
}
type SystemInformationResponse struct {
Backends []string `json:"backends"` // available backend engines
Models []SysInfoModel `json:"loaded_models"` // currently loaded models
Backends []string `json:"backends"`
Models []SysInfoModel `json:"loaded_models"`
}
type DetectionRequest struct {
BasicModelRequest
Image string `json:"image"` // URL or base64-encoded image to analyze
Image string `json:"image"`
}
type DetectionResponse struct {

View File

@@ -2,9 +2,9 @@ package schema
type TokenizeRequest struct {
BasicModelRequest
Content string `json:"content"` // text to tokenize
Content string `json:"content"`
}
type TokenizeResponse struct {
Tokens []int32 `json:"tokens"` // token IDs
Tokens []int32 `json:"tokens"`
}

View File

@@ -231,9 +231,6 @@ func (c *fakeBackendClient) QuantizationProgress(_ context.Context, _ *pb.Quanti
func (c *fakeBackendClient) StopQuantization(_ context.Context, _ *pb.QuantizationStopRequest, _ ...ggrpc.CallOption) (*pb.Result, error) {
return nil, nil
}
func (c *fakeBackendClient) Free(_ context.Context) error {
return nil
}
// --- fakeBackendClientFactory ---

View File

@@ -175,10 +175,6 @@ func (f *fakeGRPCBackend) StopQuantization(_ context.Context, _ *pb.Quantization
return &pb.Result{}, nil
}
func (f *fakeGRPCBackend) Free(_ context.Context) error {
return nil
}
// --- Tests ---
var _ = Describe("InFlightTrackingClient", func() {

View File

@@ -11,7 +11,6 @@ import (
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
"github.com/nats-io/nats.go"
)
// DistributedModelManager wraps a local ModelManager and adds NATS fan-out
@@ -85,13 +84,6 @@ func (d *DistributedBackendManager) DeleteBackend(name string) error {
continue
}
if _, delErr := d.adapter.DeleteBackend(node.ID, name); delErr != nil {
if errors.Is(delErr, nats.ErrNoResponders) {
// Node's NATS subscription is gone — likely restarted with a new ID.
// Mark it unhealthy so future fan-outs skip it.
xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
d.registry.MarkUnhealthy(context.Background(), node.ID)
continue
}
xlog.Warn("Failed to propagate backend deletion to worker", "node", node.Name, "backend", name, "error", delErr)
errs = append(errs, fmt.Errorf("node %s: %w", node.Name, delErr))
}
@@ -113,11 +105,6 @@ func (d *DistributedBackendManager) ListBackends() (gallery.SystemBackends, erro
}
reply, err := d.adapter.ListBackends(node.ID)
if err != nil {
if errors.Is(err, nats.ErrNoResponders) {
xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
d.registry.MarkUnhealthy(context.Background(), node.ID)
continue
}
xlog.Warn("Failed to list backends on worker", "node", node.Name, "error", err)
continue
}
@@ -158,11 +145,6 @@ func (d *DistributedBackendManager) InstallBackend(ctx context.Context, op *gall
}
reply, err := d.adapter.InstallBackend(node.ID, backendName, "", string(galleriesJSON))
if err != nil {
if errors.Is(err, nats.ErrNoResponders) {
xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
d.registry.MarkUnhealthy(context.Background(), node.ID)
continue
}
xlog.Warn("Failed to install backend on worker", "node", node.Name, "backend", backendName, "error", err)
continue
}

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "GPU Acceleration"
title = "GPU acceleration"
weight = 9
url = "/features/gpu-acceleration/"
+++

View File

@@ -27,7 +27,8 @@ LocalAI provides a comprehensive set of features for running AI models locally.
- **[Realtime API](openai-realtime/)** - Low-latency multi-modal conversations (voice+text) over WebSocket
- **[Constrained Grammars](constrained_grammars/)** - Control model output format with BNF grammars
- **[GPU Acceleration](GPU-acceleration/)** - Optimize performance with GPU support
- **[Distribution](distribution/)** - Scale inference across multiple nodes (P2P federation or production distributed mode)
- **[Distributed Inference](distributed_inferencing/)** - Scale inference across multiple nodes
- **[Distributed Mode](distributed-mode/)** - Horizontal scaling with PostgreSQL, NATS, and remote backend nodes
- **[P2P API](p2p/)** - Monitor and manage P2P worker and federated nodes
- **[Model Context Protocol (MCP)](mcp/)** - Enable agentic capabilities with MCP integration
- **[Agents](agents/)** - Autonomous AI agents with tools, knowledge base, and skills

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Agents"
title = "🤖 Agents"
weight = 21
url = '/features/agents'
+++

View File

@@ -1,223 +0,0 @@
+++
title = "API Discovery & Instructions"
weight = 27
toc = true
description = "Programmatic API discovery for agents, tools, and automation"
tags = ["API", "Agents", "Instructions", "Configuration", "Advanced"]
categories = ["Features"]
+++
LocalAI exposes a set of discovery endpoints that let external agents, coding assistants, and automation tools programmatically learn what the instance can do and how to control it — without reading documentation ahead of time.
## Quick start
```bash
# 1. Discover what's available
curl http://localhost:8080/.well-known/localai.json
# 2. Browse instruction areas
curl http://localhost:8080/api/instructions
# 3. Get an API guide for a specific instruction
curl http://localhost:8080/api/instructions/config-management
```
## Well-Known Discovery Endpoint
`GET /.well-known/localai.json`
Returns the instance version, all available endpoint URLs (flat and categorized), and runtime capabilities.
**Example response (abbreviated):**
```json
{
"version": "v2.28.0",
"endpoints": {
"chat_completions": "/v1/chat/completions",
"models": "/v1/models",
"config_metadata": "/api/models/config-metadata",
"instructions": "/api/instructions",
"swagger": "/swagger/index.html"
},
"endpoint_groups": {
"openai_compatible": { "chat_completions": "/v1/chat/completions", "..." : "..." },
"config_management": { "config_metadata": "/api/models/config-metadata", "..." : "..." },
"model_management": { "..." : "..." },
"monitoring": { "..." : "..." }
},
"capabilities": {
"config_metadata": true,
"config_patch": true,
"vram_estimate": true,
"mcp": true,
"agents": false,
"p2p": false
}
}
```
The `capabilities` object reflects the current runtime configuration — for example, `mcp` is only `true` if MCP is enabled, and `agents` is `true` only if the agent pool is running.
## Instructions API
Instructions are curated groups of related API endpoints. Each instruction maps to one or more Swagger tags and provides a focused, LLM-readable guide.
### List all instructions
`GET /api/instructions`
```bash
curl http://localhost:8080/api/instructions
```
Returns a compact list of instruction areas:
```json
{
"instructions": [
{
"name": "chat-inference",
"description": "OpenAI-compatible chat completions, text completions, and embeddings",
"tags": ["inference", "embeddings"],
"url": "/api/instructions/chat-inference"
},
{
"name": "config-management",
"description": "Discover, read, and modify model configuration fields with VRAM estimation",
"tags": ["config"],
"url": "/api/instructions/config-management"
}
],
"hint": "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment."
}
```
**Available instructions:**
| Instruction | Description |
|-------------|-------------|
| `chat-inference` | Chat completions, text completions, embeddings (OpenAI-compatible) |
| `audio` | Text-to-speech, transcription, voice activity detection, sound generation |
| `images` | Image generation and inpainting |
| `model-management` | Browse gallery, install, delete, manage models and backends |
| `config-management` | Discover, read, and modify model config fields with VRAM estimation |
| `monitoring` | System metrics, backend status, system information |
| `mcp` | Model Context Protocol — tool-augmented chat with MCP servers |
| `agents` | Agent task and job management |
| `video` | Video generation from text prompts |
### Get an instruction guide
`GET /api/instructions/:name`
By default, returns a **markdown guide** suitable for LLMs and humans:
```bash
curl http://localhost:8080/api/instructions/config-management
```
Add `?format=json` to get a raw **OpenAPI fragment** (filtered Swagger spec with only the relevant paths and definitions):
```bash
curl http://localhost:8080/api/instructions/config-management?format=json
```
## Configuration Management APIs
These endpoints let agents discover model configuration fields, read current settings, modify them, and estimate VRAM usage.
### Config metadata
`GET /api/models/config-metadata`
Returns structured metadata for all model configuration fields, organized by section. Each field includes its YAML path, Go type, UI type, label, description, default value, validation constraints, and available options.
```bash
# All fields
curl http://localhost:8080/api/models/config-metadata
# Filter by section
curl http://localhost:8080/api/models/config-metadata?section=parameters
```
### Autocomplete values
`GET /api/models/config-metadata/autocomplete/:provider`
Returns runtime values for dynamic fields. Providers include `backends`, `models`, `models:chat`, `models:tts`, `models:transcript`, `models:vad`.
```bash
# List available backends
curl http://localhost:8080/api/models/config-metadata/autocomplete/backends
# List chat-capable models
curl http://localhost:8080/api/models/config-metadata/autocomplete/models:chat
```
### Read model config
`GET /api/models/config-json/:name`
Returns the full model configuration as JSON:
```bash
curl http://localhost:8080/api/models/config-json/my-model
```
### Update model config
`PATCH /api/models/config-json/:name`
Deep-merges a JSON patch into the existing model configuration. Only include the fields you want to change:
```bash
curl -X PATCH http://localhost:8080/api/models/config-json/my-model \
-H "Content-Type: application/json" \
-d '{"context_size": 16384, "gpu_layers": 40}'
```
The endpoint validates the merged config and writes it to disk as YAML.
{{% notice context="warning" %}}
Config management endpoints require **admin authentication** when API keys are configured. The discovery and instructions endpoints are unauthenticated.
{{% /notice %}}
### VRAM estimation
`POST /api/models/vram-estimate`
Estimates VRAM usage for an installed model based on its weight files, context size, and GPU layer offloading:
```bash
curl -X POST http://localhost:8080/api/models/vram-estimate \
-H "Content-Type: application/json" \
-d '{"model": "my-model", "context_size": 8192}'
```
```json
{
"sizeBytes": 4368438272,
"sizeDisplay": "4.4 GB",
"vramBytes": 6123456789,
"vramDisplay": "6.1 GB",
"context_note": "Estimate used default context_size=8192. The model's trained maximum context is 131072; VRAM usage will be higher at larger context sizes.",
"model_max_context": 131072
}
```
Optional parameters: `gpu_layers` (number of layers to offload, 0 = all), `kv_quant_bits` (KV cache quantization, 0 = fp16).
## Integration guide
A recommended workflow for agent/tool builders:
1. **Discover**: Fetch `/.well-known/localai.json` to learn available endpoints and capabilities
2. **Browse instructions**: Fetch `/api/instructions` for an overview of instruction areas
3. **Deep dive**: Fetch `/api/instructions/:name` for a markdown API guide on a specific area
4. **Explore config**: Use `/api/models/config-metadata` to understand configuration fields
5. **Interact**: Use the standard OpenAI-compatible endpoints for inference, and the config management endpoints for runtime tuning
## Swagger UI
The full interactive API documentation is available at `/swagger/index.html`. All annotated endpoints can be explored and tested directly from the browser.

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Audio to Text"
title = "🔈 Audio to text"
weight = 16
url = "/features/audio-to-text/"
+++

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Authentication & Authorization"
title = "🔐 Authentication & Authorization"
weight = 26
url = '/features/authentication'
+++

View File

@@ -1,5 +1,5 @@
---
title: "Backends"
title: "⚙️ Backends"
description: "Learn how to use, manage, and develop backends in LocalAI"
weight: 4
url: "/backends/"

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Constrained Grammars"
title = "✍️ Constrained Grammars"
weight = 15
url = "/features/constrained_grammars/"
+++

View File

@@ -5,7 +5,7 @@ weight = 14
url = "/features/distributed-mode/"
+++
Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach]({{% relref "features/distributed_inferencing" %}}), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach](/features/distribute/), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
{{% notice note %}}
Distributed mode requires authentication enabled with a **PostgreSQL** database — SQLite is not supported. This is because the node registry, job store, and other distributed state are stored in PostgreSQL tables.

View File

@@ -1,12 +1,12 @@
+++
disableToc = false
title = "P2P / Federated Inference"
title = "🆕🖧 Distributed Inference"
weight = 15
url = "/features/distribute/"
+++
{{% notice tip %}}
Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode]({{% relref "features/distributed-mode" %}}).
Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode](/features/distributed-mode/).
{{% /notice %}}
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.

View File

@@ -1,34 +0,0 @@
+++
disableToc = false
title = "Distribution"
weight = 13
url = "/features/distribution/"
+++
LocalAI supports distributing inference workloads across multiple machines. There are two approaches, each suited to different use cases:
## Distributed Mode (PostgreSQL + NATS)
Production-grade horizontal scaling with centralized management. Frontends are stateless LocalAI instances behind a load balancer; workers self-register and receive backends dynamically via NATS. State lives in PostgreSQL.
**Best for:** production deployments, Kubernetes, managed infrastructure.
[Read more]({{% relref "features/distributed-mode" %}})
## P2P / Federated Inference
Peer-to-peer networking via libp2p. Share a token to form a cluster with automatic discovery — no central server required. Supports federated load balancing and worker-mode weight sharding.
**Best for:** ad-hoc clusters, community sharing, quick experimentation.
[Read more]({{% relref "features/distributed_inferencing" %}})
## Quick Comparison
| | P2P / Federation | Distributed Mode |
|---|---|---|
| **Discovery** | Automatic via libp2p token | Self-registration to frontend URL |
| **State storage** | In-memory / ledger | PostgreSQL |
| **Coordination** | Gossip protocol | NATS messaging |
| **Node management** | Automatic | REST API + WebUI |
| **Setup complexity** | Minimal (share a token) | Requires PostgreSQL + NATS |

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Embeddings"
title = "🧠 Embeddings"
weight = 13
url = "/features/embeddings/"
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "GPT Vision"
title = "🥽 GPT Vision"
weight = 14
url = "/features/gpt-vision/"
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Image Generation"
title = "🎨 Image generation"
weight = 12
url = "/features/image-generation/"
+++

View File

@@ -1,5 +1,5 @@
+++
title = "Model Context Protocol (MCP)"
title = "🔗 Model Context Protocol (MCP)"
weight = 20
toc = true
description = "Agentic capabilities with Model Context Protocol integration"

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Model Gallery"
title = "🖼️ Model gallery"
weight = 18
url = '/models'
+++

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Object Detection"
title = "🔍 Object detection"
weight = 13
url = "/features/object-detection/"
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "OpenAI Functions and Tools"
title = "🔥 OpenAI functions and tools"
weight = 17
url = "/features/openai-functions/"
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Reranker"
title = "📈 Reranker"
weight = 11
url = "/features/reranker/"
+++

View File

@@ -1,6 +1,6 @@
+++
disableToc = false
title = "Runtime Settings"
title = "⚙️ Runtime Settings"
weight = 25
url = '/features/runtime-settings'
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Stores"
title = "💾 Stores"
weight = 18
url = '/stores'
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Text Generation (GPT)"
title = "📖 Text generation (GPT)"
weight = 10
url = "/features/text-generation/"
+++

View File

@@ -1,7 +1,7 @@
+++
disableToc = false
title = "Text to Audio (TTS)"
title = "🗣 Text to audio (TTS)"
weight = 11
url = "/features/text-to-audio/"
+++

Some files were not shown because too many files have changed in this diff Show More