mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-10 01:36:57 -04:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7e4df67556 | ||
|
|
5b24b4dacc | ||
|
|
52fdb46892 | ||
|
|
b389f0fe5f | ||
|
|
74281be340 | ||
|
|
cacf2f7a2c | ||
|
|
4a2cc64d07 | ||
|
|
4647770316 | ||
|
|
3c9b9529c0 | ||
|
|
fc2bd0986c | ||
|
|
a473a32678 | ||
|
|
3e220373b0 | ||
|
|
fbcd886a47 | ||
|
|
e1a782b70f | ||
|
|
73cfedc023 | ||
|
|
b982c977d5 | ||
|
|
532ca1b3a2 | ||
|
|
00ad55b590 | ||
|
|
4c58fd302f | ||
|
|
66582e7035 | ||
|
|
1d13949588 | ||
|
|
c8ad67bbca | ||
|
|
1c92b00918 |
@@ -1,10 +1,10 @@
|
||||
# ds4 backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as DS4_VERSION?=072bc0feb187be5f374c08b16d0045e1ad7bc9bc
|
||||
# Upstream pin lives below as DS4_VERSION?=22393e770ea8eb7501d8718d6f66c6374004e03f
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# llama-cpp / ik-llama-cpp / turboquant convention.
|
||||
|
||||
DS4_VERSION?=072bc0feb187be5f374c08b16d0045e1ad7bc9bc
|
||||
DS4_VERSION?=22393e770ea8eb7501d8718d6f66c6374004e03f
|
||||
DS4_REPO?=https://github.com/antirez/ds4
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=3bf7e836c2c5a895e8d12d3eb7e398ae7ab2f9ce
|
||||
IK_LLAMA_VERSION?=8960c5ba5ee9db30ba838304373aa4dbec9f7cbd
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=aa50b2c2ae91326d5aad956ceeb015d1d48e626b
|
||||
LLAMA_VERSION?=751ebd17a58a8a513994509214373bb9e6a3d66c
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -573,8 +573,12 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
// checkpoint_min_step: minimum spacing between context checkpoints in
|
||||
// tokens (0 disables the minimum). Match upstream's default (256). This
|
||||
// field was renamed from `checkpoint_every_nt` in llama.cpp; the semantics
|
||||
// also shifted from a fixed cadence to a minimum spacing.
|
||||
// also shifted from a fixed cadence to a minimum spacing. The turboquant
|
||||
// fork branched before the field existed, so skip it on the legacy path
|
||||
// (LOCALAI_LEGACY_LLAMA_CPP_SPEC is injected by patch-grpc-server.sh).
|
||||
#ifndef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
params.checkpoint_min_step = 256;
|
||||
#endif
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
for (int i = 0; i < request->options_size(); i++) {
|
||||
@@ -748,11 +752,18 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.cache_idle_slots = false;
|
||||
}
|
||||
|
||||
#ifndef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
// --- minimum context-checkpoint spacing (upstream -cms / --checkpoint-min-step) ---
|
||||
// 0 disables the minimum-spacing gate. Old option names (`checkpoint_every_nt`,
|
||||
// `checkpoint_every_n_tokens`) are kept as aliases for backward compatibility
|
||||
// with existing user configs: upstream renamed the field and shifted its
|
||||
// semantics from a fixed cadence to a minimum spacing.
|
||||
//
|
||||
// Gated out for the turboquant fork, which lacks common_params::
|
||||
// checkpoint_min_step. The leading `}` closing the cache_idle_slots
|
||||
// branch is removed with this block; the next `} else if` (n_ubatch)
|
||||
// then closes cache_idle_slots, so braces stay balanced under both
|
||||
// preprocessor branches.
|
||||
} else if (!strcmp(optname, "checkpoint_min_step") || !strcmp(optname, "checkpoint_min_spacing") ||
|
||||
!strcmp(optname, "checkpoint_every_nt") || !strcmp(optname, "checkpoint_every_n_tokens")) {
|
||||
if (optval != NULL) {
|
||||
@@ -762,6 +773,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
// If conversion fails, keep default value (256)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// --- physical batch size (upstream -ub / --ubatch-size) ---
|
||||
// Note: line ~482 already aliases n_ubatch to n_batch as a default; this
|
||||
@@ -1165,9 +1177,15 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
// The draft tensor_buft_overrides are only populated under the modern
|
||||
// (post-#22838) layout, whose population code is itself gated by
|
||||
// LOCALAI_LEGACY_LLAMA_CPP_SPEC above. The turboquant fork lacks
|
||||
// common_params_speculative::draft entirely, so skip the sentinel there too.
|
||||
#ifndef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
|
||||
@@ -124,8 +124,11 @@ fi
|
||||
# 5. Define LOCALAI_LEGACY_LLAMA_CPP_SPEC at the top of the file so the
|
||||
# grpc-server option parser skips the new option-handler blocks (ngram_mod,
|
||||
# ngram_map_k, ngram_map_k4v, ngram_cache, draft.cache_type_*, draft.cpuparams*,
|
||||
# draft.tensor_buft_overrides) introduced for the post-#22838 layout. Those
|
||||
# blocks reference struct fields that simply do not exist in the fork.
|
||||
# draft.tensor_buft_overrides) introduced for the post-#22838 layout, the
|
||||
# draft.tensor_buft_overrides sentinel termination, and the
|
||||
# common_params::checkpoint_min_step default/option (added with the
|
||||
# 35c9b1f3 bump). Those blocks reference struct fields that simply do not
|
||||
# exist in the fork.
|
||||
if grep -q '^#define LOCALAI_LEGACY_LLAMA_CPP_SPEC' "$SRC"; then
|
||||
echo "==> $SRC already defines LOCALAI_LEGACY_LLAMA_CPP_SPEC, skipping"
|
||||
else
|
||||
|
||||
@@ -11,7 +11,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
# build; leaving this on `master` always picks up the latest C-API surface
|
||||
# (incl. the per-detection accessor functions used by gorfdetrcpp.go).
|
||||
RFDETR_REPO?=https://github.com/mudler/rf-detr.cpp.git
|
||||
RFDETR_VERSION?=main
|
||||
RFDETR_VERSION?=65c0ffcc9a9bc9dae38252f63d0417c9845a6cf7
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=29ab511fc75f89fbab148665eab1a8e10a139a72
|
||||
STABLEDIFFUSION_GGML_VERSION?=0e4ee04488159b81d95a9ffcd983a077fd5dcb77
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=6dcdd6536456158667747f724d6bd3a2ceaa8d88
|
||||
WHISPER_CPP_VERSION?=f24588a272ae8e23280d9c220536437164e6ed28
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -3,5 +3,5 @@
|
||||
# on a cu130 host. Pull the cu130-flavoured wheel from vLLM's per-tag index
|
||||
# instead — the cublas13 case in install.sh adds --index-strategy=unsafe-best-match
|
||||
# so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://wheels.vllm.ai/0.21.0/cu130
|
||||
vllm==0.21.0
|
||||
--extra-index-url https://wheels.vllm.ai/0.22.0/cu130
|
||||
vllm==0.22.0
|
||||
|
||||
@@ -90,6 +90,8 @@ type Application struct {
|
||||
// LocalAI Assistant in-process MCP server. nil when DisableLocalAIAssistant
|
||||
// is set; otherwise initialised in start() after galleryService.
|
||||
localAIAssistant *mcpTools.LocalAIAssistantHolder
|
||||
|
||||
shutdownOnce sync.Once
|
||||
}
|
||||
|
||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||
@@ -320,6 +322,24 @@ func (a *Application) IsDistributed() bool {
|
||||
return a.distributed != nil
|
||||
}
|
||||
|
||||
// Shutdown stops backend gRPC processes and distributed services
|
||||
// synchronously on the caller's stack. The context-cancel goroutine wired
|
||||
// in New does the same work asynchronously, which races test-binary exit
|
||||
// and CLI shutdown — orphaning spawned mock-backend / llama.cpp / etc.
|
||||
// children to init. Callers that need a guarantee that cleanup has
|
||||
// finished before they proceed (AfterSuite/AfterEach, signal handlers)
|
||||
// must call this. Safe to call multiple times.
|
||||
func (a *Application) Shutdown() error {
|
||||
var err error
|
||||
a.shutdownOnce.Do(func() {
|
||||
a.distributed.Shutdown()
|
||||
if a.modelLoader != nil {
|
||||
err = a.modelLoader.StopAllGRPC()
|
||||
}
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// waitForHealthyWorker blocks until at least one healthy backend worker is registered.
|
||||
// This prevents the agent pool from failing during startup when workers haven't connected yet.
|
||||
func (a *Application) waitForHealthyWorker() {
|
||||
|
||||
@@ -449,13 +449,15 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
|
||||
application.ModelLoader().SetBackendLoggingEnabled(options.EnableBackendLogging)
|
||||
|
||||
// turn off any process that was started by GRPC if the context is canceled
|
||||
// Safety-net cleanup if the application context is cancelled without
|
||||
// the caller invoking Shutdown directly. This is fire-and-forget — it
|
||||
// races binary exit and is unreliable in tests; the deterministic path
|
||||
// is application.Shutdown(), which Shutdown's sync.Once dedupes with
|
||||
// this goroutine.
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
xlog.Debug("Context canceled, shutting down")
|
||||
application.distributed.Shutdown()
|
||||
err := application.ModelLoader().StopAllGRPC()
|
||||
if err != nil {
|
||||
if err := application.Shutdown(); err != nil {
|
||||
xlog.Error("error while stopping all grpc backends", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
@@ -42,3 +43,35 @@ var _ = Describe("grpcModelOpts EngineArgs", func() {
|
||||
Expect(opts.EngineArgs).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
// Guards the DisableReasoning -> enable_thinking metadata conversion that the
|
||||
// per-request reasoning_effort feature (issue #10072) relies on: the request
|
||||
// merge sets ReasoningConfig.DisableReasoning, and gRPCPredictOpts is where it
|
||||
// becomes the gRPC PredictOptions.Metadata the backend reads.
|
||||
var _ = Describe("gRPCPredictOpts enable_thinking metadata", func() {
|
||||
// withReasoning builds a fully-defaulted config (gRPCPredictOpts dereferences
|
||||
// many pointer fields) and overrides only the reasoning toggle.
|
||||
withReasoning := func(disable *bool) config.ModelConfig {
|
||||
cfg := config.ModelConfig{}
|
||||
cfg.SetDefaults()
|
||||
cfg.ReasoningConfig = reasoning.Config{DisableReasoning: disable}
|
||||
return cfg
|
||||
}
|
||||
disabled := true
|
||||
enabled := false
|
||||
|
||||
It("emits enable_thinking=false when reasoning is disabled", func() {
|
||||
opts := gRPCPredictOpts(withReasoning(&disabled), "/tmp/models")
|
||||
Expect(opts.Metadata).To(HaveKeyWithValue("enable_thinking", "false"))
|
||||
})
|
||||
|
||||
It("emits enable_thinking=true when reasoning is enabled", func() {
|
||||
opts := gRPCPredictOpts(withReasoning(&enabled), "/tmp/models")
|
||||
Expect(opts.Metadata).To(HaveKeyWithValue("enable_thinking", "true"))
|
||||
})
|
||||
|
||||
It("omits enable_thinking when reasoning is unset", func() {
|
||||
opts := gRPCPredictOpts(withReasoning(nil), "/tmp/models")
|
||||
Expect(opts.Metadata).ToNot(HaveKey("enable_thinking"))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -577,12 +577,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
|
||||
signals.RegisterGracefulTerminationHandler(func() {
|
||||
if err := app.ModelLoader().StopAllGRPC(); err != nil {
|
||||
xlog.Error("error while stopping all grpc backends", "error", err)
|
||||
}
|
||||
// Clean up distributed services (idempotent — safe if already called)
|
||||
if d := app.Distributed(); d != nil {
|
||||
d.Shutdown()
|
||||
if err := app.Shutdown(); err != nil {
|
||||
xlog.Error("error while shutting down application", "error", err)
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
@@ -732,6 +732,17 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Proxy.Mode = ProxyModePassthrough
|
||||
}
|
||||
|
||||
// When templating is delegated to the backend (use_tokenizer_template),
|
||||
// the backend also owns tool-call grammar generation and parsing. Sending
|
||||
// a LocalAI-generated grammar alongside overrides the backend's native
|
||||
// (name-first) tool pipeline and makes it stream the tool-call JSON back as
|
||||
// plain content (issue #10052). The GGUF auto-import path already couples
|
||||
// these two flags; enforce it here so gallery and hand-written configs that
|
||||
// set use_tokenizer_template directly stay consistent.
|
||||
if cfg.TemplateConfig.UseTokenizerTemplate {
|
||||
cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
|
||||
}
|
||||
|
||||
// Apply model-family-specific inference defaults before generic fallbacks.
|
||||
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
||||
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
||||
|
||||
@@ -471,4 +471,33 @@ concurrency_groups:
|
||||
Expect(configs[0].GetConcurrencyGroups()).To(Equal([]string{"vram-heavy", "120b"}))
|
||||
})
|
||||
})
|
||||
|
||||
// When templating is delegated to the backend (use_tokenizer_template),
|
||||
// the backend also owns tool-call grammar generation and parsing. A
|
||||
// LocalAI-generated grammar sent alongside would override the backend's
|
||||
// native (name-first) tool pipeline and make it stream the tool-call JSON
|
||||
// back as plain content (issue #10052). SetDefaults must therefore couple
|
||||
// the two: tokenizer template implies grammar generation is disabled.
|
||||
Context("use_tokenizer_template couples with grammar disable (issue #10052)", func() {
|
||||
It("disables Go grammar generation when the tokenizer template is used", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
}
|
||||
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse())
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeTrue(),
|
||||
"use_tokenizer_template must imply grammar.disable so tools go to the backend's native pipeline")
|
||||
})
|
||||
|
||||
It("leaves grammar generation enabled when the tokenizer template is not used", func() {
|
||||
cfg := &ModelConfig{}
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse(),
|
||||
"models that template in Go still rely on the Go-generated grammar")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -308,6 +308,11 @@ var _ = Describe("API test", func() {
|
||||
var cancel context.CancelFunc
|
||||
var tmpdir string
|
||||
var modelDir string
|
||||
// localAIApp captures the Application so AfterEach can synchronously
|
||||
// stop the spawned gRPC backend processes. application.New cancels
|
||||
// them asynchronously on context cancel, which races with test-binary
|
||||
// exit and leaks mock-backend children to init.
|
||||
var localAIApp *application.Application
|
||||
|
||||
commonOpts := []config.AppOption{
|
||||
config.WithDebug(true),
|
||||
@@ -736,14 +741,14 @@ parameters:
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
application, err := application.New(
|
||||
localAIApp, err = application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithSystemState(systemState),
|
||||
)...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
app, err = API(application)
|
||||
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
app, err = API(localAIApp)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
go func() {
|
||||
if err := app.Start("127.0.0.1:9090"); err != nil && err != http.ErrServerClosed {
|
||||
@@ -765,6 +770,11 @@ parameters:
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() {
|
||||
// Synchronous shutdown — context-cancel cleanup is async and races
|
||||
// test-binary exit, orphaning mock-backend children to init.
|
||||
if localAIApp != nil {
|
||||
_ = localAIApp.Shutdown()
|
||||
}
|
||||
cancel()
|
||||
if app != nil {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
@@ -976,15 +986,15 @@ parameters:
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
application, err := application.New(
|
||||
localAIApp, err = application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithSystemState(systemState),
|
||||
config.WithConfigFile(configFile))...,
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
app, err = API(application)
|
||||
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
app, err = API(localAIApp)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
go func() {
|
||||
@@ -1005,6 +1015,11 @@ parameters:
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() {
|
||||
// Synchronous shutdown — context-cancel cleanup is async and races
|
||||
// test-binary exit, orphaning mock-backend children to init.
|
||||
if localAIApp != nil {
|
||||
_ = localAIApp.Shutdown()
|
||||
}
|
||||
cancel()
|
||||
if app != nil {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
|
||||
@@ -341,6 +341,19 @@ func processStreamWithTools(
|
||||
}
|
||||
}
|
||||
|
||||
// Issue #9722: when the C++ autoparser is already producing tool
|
||||
// calls (it delivers them via ChatDeltas, which are flushed at
|
||||
// end-of-stream by ToolCallsFromChatDeltas -> buildDeferredToolCallChunks),
|
||||
// skip the Go-side iterative parser below. Running both parsers makes
|
||||
// the same logical tool call surface at multiple `index` values.
|
||||
// The deferred flush is guarded by lastEmittedCount, so the race where
|
||||
// the Go parser already emitted before this flag flipped also stays
|
||||
// single-emission. Backends without an autoparser (e.g. vLLM) keep
|
||||
// hasChatDeltaToolCalls=false and are unaffected.
|
||||
if hasChatDeltaToolCalls {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try incremental XML parsing for streaming support using iterative parser
|
||||
// This allows emitting partial tool calls as they're being generated
|
||||
cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig)
|
||||
|
||||
@@ -310,6 +310,26 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
// Map the per-request reasoning_effort onto the reasoning toggle the
|
||||
// backend reads (enable_thinking metadata, set in gRPCPredictOpts).
|
||||
// "none" disables thinking for this request - the use case from #10072,
|
||||
// running a single Qwen3-style model and turning reasoning off per
|
||||
// request. Any explicit effort level enables thinking, UNLESS the model
|
||||
// config explicitly disabled it (DisableReasoning==true wins): an
|
||||
// operator who deliberately turned reasoning off should not be overridden
|
||||
// by a request. A value of "none" always disables, since that never
|
||||
// conflicts with a config that also disables.
|
||||
switch strings.ToLower(input.ReasoningEffort) {
|
||||
case "none":
|
||||
disable := true
|
||||
config.ReasoningConfig.DisableReasoning = &disable
|
||||
case "minimal", "low", "medium", "high":
|
||||
if config.ReasoningConfig.DisableReasoning == nil || !*config.ReasoningConfig.DisableReasoning {
|
||||
enable := false
|
||||
config.ReasoningConfig.DisableReasoning = &enable
|
||||
}
|
||||
}
|
||||
|
||||
// Collapse the modern max_completion_tokens alias into the
|
||||
// legacy Maxtokens field so downstream code reads exactly one.
|
||||
// MaxCompletionTokens wins on conflict — it's the canonical
|
||||
|
||||
@@ -597,3 +597,137 @@ var _ = Describe("SetModelAndConfig tool_choice parsing (chat completions)", fun
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// These tests cover the per-request reasoning_effort -> enable_thinking mapping.
|
||||
// The merge lives in mergeOpenAIRequestAndModelConfig (called from
|
||||
// SetOpenAIRequest), so they drive the full middleware chain like the
|
||||
// production /v1/chat/completions route does. The block builds its own app per
|
||||
// test so the model config can be varied (some cases need reasoning.disable set
|
||||
// in the model YAML to assert that an explicit config disable wins).
|
||||
//
|
||||
// Mapping under test (issue #10072):
|
||||
// - reasoning_effort=none -> DisableReasoning=true
|
||||
// - reasoning_effort=low/medium/high -> DisableReasoning=false, UNLESS the
|
||||
// model config explicitly set true
|
||||
// - empty / unrecognized -> no change
|
||||
var _ = Describe("SetModelAndConfig reasoning_effort parsing (chat completions)", func() {
|
||||
var modelDir string
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
modelDir, err = os.MkdirTemp("", "localai-test-models-*")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
_ = os.RemoveAll(modelDir)
|
||||
})
|
||||
|
||||
// buildApp writes a model config with the given YAML body and returns an app
|
||||
// plus a pointer to the captured per-request config.
|
||||
buildApp := func(cfgYAML string) (*echo.Echo, **config.ModelConfig) {
|
||||
Expect(os.WriteFile(filepath.Join(modelDir, "test-model.yaml"), []byte(cfgYAML), 0644)).To(Succeed())
|
||||
|
||||
ss := &system.SystemState{Model: system.Model{ModelsPath: modelDir}}
|
||||
appConfig := config.NewApplicationConfig()
|
||||
appConfig.SystemState = ss
|
||||
mcl := config.NewModelConfigLoader(modelDir)
|
||||
ml := model.NewModelLoader(ss)
|
||||
re := NewRequestExtractor(mcl, ml, appConfig)
|
||||
|
||||
captured := new(*config.ModelConfig)
|
||||
app := echo.New()
|
||||
app.POST("/v1/chat/completions",
|
||||
func(c echo.Context) error {
|
||||
if cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig); ok {
|
||||
*captured = cfg
|
||||
}
|
||||
return c.String(http.StatusOK, "ok")
|
||||
},
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
if err := re.SetOpenAIRequest(c); err != nil {
|
||||
return err
|
||||
}
|
||||
return next(c)
|
||||
}
|
||||
},
|
||||
)
|
||||
return app, captured
|
||||
}
|
||||
|
||||
chatReq := func(effort string) string {
|
||||
return `{"model":"test-model",` +
|
||||
`"messages":[{"role":"user","content":"hi"}],` +
|
||||
`"reasoning_effort":` + effort + `}`
|
||||
}
|
||||
|
||||
plainCfg := "name: test-model\nbackend: llama-cpp\n"
|
||||
|
||||
It("disables thinking for reasoning_effort=none", func() {
|
||||
app, captured := buildApp(plainCfg)
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"none"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured).ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("enables thinking for reasoning_effort=high when config is unset", func() {
|
||||
app, captured := buildApp(plainCfg)
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"high"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured).ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
|
||||
It("enables thinking for reasoning_effort=high when config explicitly set false", func() {
|
||||
app, captured := buildApp(plainCfg + "reasoning:\n disable: false\n")
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"high"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured).ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
|
||||
It("config wins: reasoning_effort=high cannot re-enable when config explicitly disabled", func() {
|
||||
app, captured := buildApp(plainCfg + "reasoning:\n disable: true\n")
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"high"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured).ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("is a no-op when reasoning_effort is empty", func() {
|
||||
app, captured := buildApp(plainCfg)
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
`{"model":"test-model","messages":[{"role":"user","content":"hi"}]}`)
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).To(BeNil())
|
||||
})
|
||||
|
||||
It("is case-insensitive (None disables, HIGH enables)", func() {
|
||||
app, captured := buildApp(plainCfg)
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"None"`))
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured).ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
|
||||
app2, captured2 := buildApp(plainCfg)
|
||||
rec2 := postJSON(app2, "/v1/chat/completions", chatReq(`"HIGH"`))
|
||||
Expect(rec2.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured2).ToNot(BeNil())
|
||||
Expect((*captured2).ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*(*captured2).ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,28 +1,52 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
import { test, expect } from "./coverage-fixtures.js";
|
||||
|
||||
const MOCK_MODELS_RESPONSE = {
|
||||
models: [
|
||||
{ name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['chat'] },
|
||||
{ name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['transcript'] },
|
||||
{ name: 'stablediffusion-model', description: 'An image model', backend: 'stablediffusion', installed: false, tags: ['sd'] },
|
||||
{ name: 'unknown-model', description: 'No backend', backend: '', installed: false, tags: [] },
|
||||
{
|
||||
name: "llama-model",
|
||||
description: "A llama model",
|
||||
backend: "llama-cpp",
|
||||
installed: false,
|
||||
tags: ["chat"],
|
||||
},
|
||||
{
|
||||
name: "whisper-model",
|
||||
description: "A whisper model",
|
||||
backend: "whisper",
|
||||
installed: true,
|
||||
tags: ["transcript"],
|
||||
},
|
||||
{
|
||||
name: "stablediffusion-model",
|
||||
description: "An image model",
|
||||
backend: "stablediffusion",
|
||||
installed: false,
|
||||
tags: ["sd"],
|
||||
},
|
||||
{
|
||||
name: "unknown-model",
|
||||
description: "No backend",
|
||||
backend: "",
|
||||
installed: false,
|
||||
tags: [],
|
||||
},
|
||||
],
|
||||
allBackends: ['llama-cpp', 'stablediffusion', 'whisper'],
|
||||
allTags: ['chat', 'sd', 'transcript'],
|
||||
allBackends: ["llama-cpp", "stablediffusion", "whisper"],
|
||||
allTags: ["chat", "sd", "transcript"],
|
||||
availableModels: 4,
|
||||
installedModels: 1,
|
||||
totalPages: 1,
|
||||
currentPage: 1,
|
||||
}
|
||||
};
|
||||
|
||||
const MOCK_GPU_RESOURCES_RESPONSE = {
|
||||
type: 'gpu',
|
||||
type: "gpu",
|
||||
available: true,
|
||||
gpus: [
|
||||
{
|
||||
index: 0,
|
||||
name: 'Mock GPU',
|
||||
vendor: 'nvidia',
|
||||
name: "Mock GPU",
|
||||
vendor: "nvidia",
|
||||
total_vram: 12 * 1024 * 1024 * 1024,
|
||||
used_vram: 2 * 1024 * 1024 * 1024,
|
||||
free_vram: 10 * 1024 * 1024 * 1024,
|
||||
@@ -36,272 +60,374 @@ const MOCK_GPU_RESOURCES_RESPONSE = {
|
||||
usage_percent: 16.7,
|
||||
gpu_count: 1,
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
const MOCK_ESTIMATES = {
|
||||
'llama-model': {
|
||||
"llama-model": {
|
||||
sizeBytes: 4 * 1024 * 1024 * 1024,
|
||||
sizeDisplay: '4.00 GB',
|
||||
sizeDisplay: "4.00 GB",
|
||||
estimates: {
|
||||
'8192': {
|
||||
8192: {
|
||||
vramBytes: 8 * 1024 * 1024 * 1024,
|
||||
vramDisplay: '8.00 GB',
|
||||
vramDisplay: "8.00 GB",
|
||||
},
|
||||
},
|
||||
},
|
||||
'whisper-model': {
|
||||
"whisper-model": {
|
||||
sizeBytes: 1 * 1024 * 1024 * 1024,
|
||||
sizeDisplay: '1.00 GB',
|
||||
sizeDisplay: "1.00 GB",
|
||||
estimates: {
|
||||
'8192': {
|
||||
8192: {
|
||||
vramBytes: 2 * 1024 * 1024 * 1024,
|
||||
vramDisplay: '2.00 GB',
|
||||
vramDisplay: "2.00 GB",
|
||||
},
|
||||
},
|
||||
},
|
||||
'stablediffusion-model': {
|
||||
"stablediffusion-model": {
|
||||
sizeBytes: 8 * 1024 * 1024 * 1024,
|
||||
sizeDisplay: '8.00 GB',
|
||||
sizeDisplay: "8.00 GB",
|
||||
estimates: {
|
||||
'8192': {
|
||||
8192: {
|
||||
vramBytes: 16 * 1024 * 1024 * 1024,
|
||||
vramDisplay: '16.00 GB',
|
||||
vramDisplay: "16.00 GB",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
test.describe('Models Gallery - Backend Features', () => {
|
||||
test.describe("Models Gallery - Backend Features", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.route('**/api/models*', (route) => {
|
||||
await page.route("**/api/models*", (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(MOCK_MODELS_RESPONSE),
|
||||
})
|
||||
})
|
||||
await page.goto('/app/models')
|
||||
});
|
||||
});
|
||||
await page.goto("/app/models");
|
||||
// Wait for the table to render
|
||||
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
|
||||
})
|
||||
await expect(page.locator("th", { hasText: "Backend" })).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
});
|
||||
|
||||
test('backend column header is visible', async ({ page }) => {
|
||||
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible()
|
||||
})
|
||||
test("backend column header is visible", async ({ page }) => {
|
||||
await expect(page.locator("th", { hasText: "Backend" })).toBeVisible();
|
||||
});
|
||||
|
||||
test('backend badges shown in table rows', async ({ page }) => {
|
||||
const table = page.locator('table')
|
||||
await expect(table.locator('.badge', { hasText: 'llama-cpp' })).toBeVisible()
|
||||
await expect(table.locator('.badge', { hasText: /^whisper$/ })).toBeVisible()
|
||||
})
|
||||
test("backend badges shown in table rows", async ({ page }) => {
|
||||
const table = page.locator("table");
|
||||
await expect(
|
||||
table.locator(".badge", { hasText: "llama-cpp" }),
|
||||
).toBeVisible();
|
||||
await expect(
|
||||
table.locator(".badge", { hasText: /^whisper$/ }),
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('backend dropdown is visible', async ({ page }) => {
|
||||
await expect(page.locator('button', { hasText: 'All Backends' })).toBeVisible()
|
||||
})
|
||||
test("backend dropdown is visible", async ({ page }) => {
|
||||
await expect(
|
||||
page.locator("button", { hasText: "All Backends" }),
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('clicking backend dropdown opens searchable panel', async ({ page }) => {
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
await expect(page.locator('input[placeholder="Search backends..."]')).toBeVisible()
|
||||
})
|
||||
test("clicking backend dropdown opens searchable panel", async ({ page }) => {
|
||||
await page.locator("button", { hasText: "All Backends" }).click();
|
||||
await expect(
|
||||
page.locator('input[placeholder="Search backends..."]'),
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('typing in search filters dropdown options', async ({ page }) => {
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
const searchInput = page.locator('input[placeholder="Search backends..."]')
|
||||
await searchInput.fill('llama')
|
||||
test("typing in search filters dropdown options", async ({ page }) => {
|
||||
await page.locator("button", { hasText: "All Backends" }).click();
|
||||
const searchInput = page.locator('input[placeholder="Search backends..."]');
|
||||
await searchInput.fill("llama");
|
||||
|
||||
// llama-cpp option should be visible, whisper should not
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..') .locator('..')
|
||||
await expect(dropdown.locator('text=llama-cpp')).toBeVisible()
|
||||
await expect(dropdown.locator('text=whisper')).not.toBeVisible()
|
||||
})
|
||||
const dropdown = page
|
||||
.locator('input[placeholder="Search backends..."]')
|
||||
.locator("..")
|
||||
.locator("..");
|
||||
await expect(dropdown.locator("text=llama-cpp")).toBeVisible();
|
||||
await expect(dropdown.locator("text=whisper")).not.toBeVisible();
|
||||
});
|
||||
|
||||
test('selecting a backend updates the dropdown label', async ({ page }) => {
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
test("selecting a backend updates the dropdown label", async ({ page }) => {
|
||||
await page.locator("button", { hasText: "All Backends" }).click();
|
||||
// Click the llama-cpp option within the dropdown (not the table badge)
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
|
||||
await dropdown.locator('text=llama-cpp').click()
|
||||
const dropdown = page
|
||||
.locator('input[placeholder="Search backends..."]')
|
||||
.locator("..")
|
||||
.locator("..");
|
||||
await dropdown.locator("text=llama-cpp").click();
|
||||
|
||||
// The dropdown button should now show the selected backend instead of "All Backends"
|
||||
await expect(page.locator('button span', { hasText: 'llama-cpp' })).toBeVisible()
|
||||
})
|
||||
await expect(
|
||||
page.locator("button span", { hasText: "llama-cpp" }),
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('expanded row shows backend in detail', async ({ page }) => {
|
||||
test("expanded row shows backend in detail", async ({ page }) => {
|
||||
// Click the first model row to expand it
|
||||
await page.locator('tr', { hasText: 'llama-model' }).click()
|
||||
await page.locator("tr", { hasText: "llama-model" }).click();
|
||||
|
||||
// The detail view should show Backend label and value
|
||||
const detail = page.locator('td[colspan="8"]')
|
||||
await expect(detail.locator('text=Backend')).toBeVisible()
|
||||
await expect(detail.locator('text=llama-cpp')).toBeVisible()
|
||||
})
|
||||
})
|
||||
const detail = page.locator('td[colspan="8"]');
|
||||
await expect(detail.locator("text=Backend")).toBeVisible();
|
||||
await expect(detail.locator("text=llama-cpp")).toBeVisible();
|
||||
});
|
||||
});
|
||||
|
||||
const BACKEND_USECASES_MOCK = {
|
||||
'llama-cpp': ['chat', 'embeddings', 'vision'],
|
||||
'whisper': ['transcript'],
|
||||
'stablediffusion': ['image'],
|
||||
}
|
||||
"llama-cpp": ["chat", "embeddings", "vision"],
|
||||
whisper: ["transcript"],
|
||||
stablediffusion: ["image"],
|
||||
};
|
||||
|
||||
test.describe('Models Gallery - Multi-select Filters', () => {
|
||||
const EMPTY_FILTERED_RESPONSE = {
|
||||
...MOCK_MODELS_RESPONSE,
|
||||
models: [],
|
||||
availableModels: 0,
|
||||
totalPages: 1,
|
||||
currentPage: 1,
|
||||
};
|
||||
|
||||
test.describe("Models Gallery - Multi-select Filters", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.route('**/api/models*', (route) => {
|
||||
await page.route("**/api/models*", (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(MOCK_MODELS_RESPONSE),
|
||||
})
|
||||
})
|
||||
await page.route('**/api/backends/usecases', (route) => {
|
||||
});
|
||||
});
|
||||
await page.route("**/api/backends/usecases", (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(BACKEND_USECASES_MOCK),
|
||||
})
|
||||
})
|
||||
await page.goto('/app/models')
|
||||
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
|
||||
})
|
||||
});
|
||||
});
|
||||
await page.goto("/app/models");
|
||||
await expect(page.locator("th", { hasText: "Backend" })).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
});
|
||||
|
||||
test('multi-select toggle: click Chat, TTS, then Chat again', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
test("multi-select toggle: click Chat, TTS, then Chat again", async ({
|
||||
page,
|
||||
}) => {
|
||||
const chatBtn = page.locator(".filter-btn", { hasText: "Chat" });
|
||||
const ttsBtn = page.locator(".filter-btn", { hasText: "TTS" });
|
||||
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
await chatBtn.click();
|
||||
await expect(chatBtn).toHaveClass(/active/);
|
||||
|
||||
await ttsBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
await ttsBtn.click();
|
||||
await expect(chatBtn).toHaveClass(/active/);
|
||||
await expect(ttsBtn).toHaveClass(/active/);
|
||||
|
||||
// Click Chat again to deselect it
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).not.toHaveClass(/active/)
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
})
|
||||
await chatBtn.click();
|
||||
await expect(chatBtn).not.toHaveClass(/active/);
|
||||
await expect(ttsBtn).toHaveClass(/active/);
|
||||
});
|
||||
|
||||
test('"All" clears selection', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const allBtn = page.locator('.filter-btn', { hasText: 'All' })
|
||||
const chatBtn = page.locator(".filter-btn", { hasText: "Chat" });
|
||||
const allBtn = page.locator(".filter-btn", { hasText: "All" });
|
||||
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
await chatBtn.click();
|
||||
await expect(chatBtn).toHaveClass(/active/);
|
||||
|
||||
await allBtn.click()
|
||||
await expect(allBtn).toHaveClass(/active/)
|
||||
await expect(chatBtn).not.toHaveClass(/active/)
|
||||
})
|
||||
await allBtn.click();
|
||||
await expect(allBtn).toHaveClass(/active/);
|
||||
await expect(chatBtn).not.toHaveClass(/active/);
|
||||
});
|
||||
|
||||
test('query param sent correctly with multiple filters', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
test("query param sent correctly with multiple filters", async ({ page }) => {
|
||||
const chatBtn = page.locator(".filter-btn", { hasText: "Chat" });
|
||||
const ttsBtn = page.locator(".filter-btn", { hasText: "TTS" });
|
||||
|
||||
// Click Chat and wait for its request to settle
|
||||
await chatBtn.click()
|
||||
await page.waitForResponse(resp => resp.url().includes('/api/models'))
|
||||
await chatBtn.click();
|
||||
await page.waitForResponse((resp) => resp.url().includes("/api/models"));
|
||||
|
||||
// Now click TTS and capture the resulting request
|
||||
const [request] = await Promise.all([
|
||||
page.waitForRequest(req => {
|
||||
if (!req.url().includes('/api/models')) return false
|
||||
const u = new URL(req.url())
|
||||
const tag = u.searchParams.get('tag')
|
||||
return tag && tag.split(',').length >= 2
|
||||
page.waitForRequest((req) => {
|
||||
if (!req.url().includes("/api/models")) return false;
|
||||
const u = new URL(req.url());
|
||||
const tag = u.searchParams.get("tag");
|
||||
return tag && tag.split(",").length >= 2;
|
||||
}),
|
||||
ttsBtn.click(),
|
||||
])
|
||||
]);
|
||||
|
||||
const url = new URL(request.url())
|
||||
const tags = url.searchParams.get('tag').split(',').sort()
|
||||
expect(tags).toEqual(['chat', 'tts'])
|
||||
})
|
||||
const url = new URL(request.url());
|
||||
const tags = url.searchParams.get("tag").split(",").sort();
|
||||
expect(tags).toEqual(["chat", "tts"]);
|
||||
});
|
||||
|
||||
test('backend greys out unavailable filters', async ({ page }) => {
|
||||
test("backend greys out unavailable filters", async ({ page }) => {
|
||||
// Select llama-cpp backend via dropdown
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
|
||||
await dropdown.locator('text=llama-cpp').click()
|
||||
await page.locator("button", { hasText: "All Backends" }).click();
|
||||
const dropdown = page
|
||||
.locator('input[placeholder="Search backends..."]')
|
||||
.locator("..")
|
||||
.locator("..");
|
||||
await dropdown.locator("text=llama-cpp").click();
|
||||
|
||||
// Wait for filter state to update
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
const sttBtn = page.locator('.filter-btn', { hasText: 'STT' })
|
||||
const imageBtn = page.locator('.filter-btn', { hasText: 'Image' })
|
||||
const ttsBtn = page.locator(".filter-btn", { hasText: "TTS" });
|
||||
const sttBtn = page.locator(".filter-btn", { hasText: "STT" });
|
||||
const imageBtn = page.locator(".filter-btn", { hasText: "Image" });
|
||||
|
||||
// TTS, STT, Image should be disabled for llama-cpp
|
||||
await expect(ttsBtn).toBeDisabled()
|
||||
await expect(sttBtn).toBeDisabled()
|
||||
await expect(imageBtn).toBeDisabled()
|
||||
await expect(ttsBtn).toBeDisabled();
|
||||
await expect(sttBtn).toBeDisabled();
|
||||
await expect(imageBtn).toBeDisabled();
|
||||
|
||||
// Chat, Embeddings, Vision should remain enabled
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const embBtn = page.locator('.filter-btn', { hasText: 'Embeddings' })
|
||||
const visBtn = page.locator('.filter-btn', { hasText: 'Vision' })
|
||||
await expect(chatBtn).toBeEnabled()
|
||||
await expect(embBtn).toBeEnabled()
|
||||
await expect(visBtn).toBeEnabled()
|
||||
})
|
||||
const chatBtn = page.locator(".filter-btn", { hasText: "Chat" });
|
||||
const embBtn = page.locator(".filter-btn", { hasText: "Embeddings" });
|
||||
const visBtn = page.locator(".filter-btn", { hasText: "Vision" });
|
||||
await expect(chatBtn).toBeEnabled();
|
||||
await expect(embBtn).toBeEnabled();
|
||||
await expect(visBtn).toBeEnabled();
|
||||
});
|
||||
|
||||
test('backend clears incompatible filters', async ({ page }) => {
|
||||
test("backend clears incompatible filters", async ({ page }) => {
|
||||
// Select TTS filter first
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
await ttsBtn.click()
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
const ttsBtn = page.locator(".filter-btn", { hasText: "TTS" });
|
||||
await ttsBtn.click();
|
||||
await expect(ttsBtn).toHaveClass(/active/);
|
||||
|
||||
// Now select llama-cpp backend (which doesn't support TTS)
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
|
||||
await dropdown.locator('text=llama-cpp').click()
|
||||
await page.locator("button", { hasText: "All Backends" }).click();
|
||||
const dropdown = page
|
||||
.locator('input[placeholder="Search backends..."]')
|
||||
.locator("..")
|
||||
.locator("..");
|
||||
await dropdown.locator("text=llama-cpp").click();
|
||||
|
||||
// TTS should be auto-removed from selection
|
||||
await expect(ttsBtn).not.toHaveClass(/active/)
|
||||
})
|
||||
})
|
||||
await expect(ttsBtn).not.toHaveClass(/active/);
|
||||
});
|
||||
});
|
||||
|
||||
test.describe('Models Gallery - Fits In GPU Filter', () => {
|
||||
test.describe("Models Gallery - Fits In GPU Filter", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.route('**/api/models*', (route) => {
|
||||
await page.route("**/api/models*", (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(MOCK_MODELS_RESPONSE),
|
||||
})
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
await page.route('**/api/resources', (route) => {
|
||||
await page.route("**/api/resources", (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(MOCK_GPU_RESOURCES_RESPONSE),
|
||||
})
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
await page.route('**/api/models/estimate/*', (route) => {
|
||||
const url = new URL(route.request().url())
|
||||
const id = decodeURIComponent(url.pathname.split('/').pop() || '')
|
||||
await page.route("**/api/models/estimate/*", (route) => {
|
||||
const url = new URL(route.request().url());
|
||||
const id = decodeURIComponent(url.pathname.split("/").pop() || "");
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(MOCK_ESTIMATES[id] || {}),
|
||||
})
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
await page.goto('/app/models')
|
||||
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
|
||||
})
|
||||
await page.goto("/app/models");
|
||||
await expect(page.locator("th", { hasText: "Backend" })).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
});
|
||||
|
||||
test('fits toggle is visible when GPU resources are available', async ({ page }) => {
|
||||
await expect(page.getByText('Fits in GPU')).toBeVisible()
|
||||
})
|
||||
test("fits toggle is visible when GPU resources are available", async ({
|
||||
page,
|
||||
}) => {
|
||||
await expect(page.getByText("Fits in GPU")).toBeVisible();
|
||||
});
|
||||
|
||||
test('enabling fits filter hides models that exceed available VRAM', async ({ page }) => {
|
||||
await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toBeVisible()
|
||||
test("enabling fits filter hides models that exceed available VRAM", async ({
|
||||
page,
|
||||
}) => {
|
||||
await expect(
|
||||
page.locator("tr", { hasText: "stablediffusion-model" }),
|
||||
).toBeVisible();
|
||||
|
||||
// The shared <Toggle> visually hides its native input (opacity:0;w:0;h:0),
|
||||
// so .check() can't interact with it directly — click the visible track.
|
||||
await page.locator('label.filter-bar-group__toggle', { hasText: 'Fits in GPU' }).locator('.toggle__track').click()
|
||||
await page
|
||||
.locator("label.filter-bar-group__toggle", { hasText: "Fits in GPU" })
|
||||
.locator(".toggle__track")
|
||||
.click();
|
||||
|
||||
await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toHaveCount(0)
|
||||
await expect(page.locator('tr', { hasText: 'llama-model' })).toBeVisible()
|
||||
await expect(
|
||||
page.locator("tr", { hasText: "stablediffusion-model" }),
|
||||
).toHaveCount(0);
|
||||
await expect(page.locator("tr", { hasText: "llama-model" })).toBeVisible();
|
||||
// Unknown estimate stays visible until an explicit non-fit verdict exists.
|
||||
await expect(page.locator('tr', { hasText: 'unknown-model' })).toBeVisible()
|
||||
})
|
||||
await expect(
|
||||
page.locator("tr", { hasText: "unknown-model" }),
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('fits filter state persists after reload', async ({ page }) => {
|
||||
await page.locator('label.filter-bar-group__toggle', { hasText: 'Fits in GPU' }).locator('.toggle__track').click()
|
||||
await page.reload()
|
||||
await expect(page.getByLabel('Fits in GPU')).toBeChecked()
|
||||
})
|
||||
})
|
||||
test("fits filter state persists after reload", async ({ page }) => {
|
||||
await page
|
||||
.locator("label.filter-bar-group__toggle", { hasText: "Fits in GPU" })
|
||||
.locator(".toggle__track")
|
||||
.click();
|
||||
await page.reload();
|
||||
await expect(page.getByLabel("Fits in GPU")).toBeChecked();
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Models Gallery - Empty State", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.route("**/api/models*", (route) => {
|
||||
const url = new URL(route.request().url());
|
||||
const tag = url.searchParams.get("tag");
|
||||
const body =
|
||||
tag === "chat" ? EMPTY_FILTERED_RESPONSE : MOCK_MODELS_RESPONSE;
|
||||
|
||||
route.fulfill({
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
});
|
||||
|
||||
await page.goto("/app/models");
|
||||
await expect(page.locator("th", { hasText: "Backend" })).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
});
|
||||
|
||||
test("shows empty state for filtered-out results and clear filters restores the gallery", async ({
|
||||
page,
|
||||
}) => {
|
||||
const chatBtn = page.locator(".filter-btn", { hasText: "Chat" });
|
||||
const allBtn = page.locator(".filter-btn", { hasText: "All" });
|
||||
|
||||
await chatBtn.click();
|
||||
|
||||
await expect(page.locator(".empty-state-title")).toHaveText(
|
||||
"No models found",
|
||||
);
|
||||
await expect(page.locator(".empty-state-text")).toHaveText(
|
||||
"No models match your current search or filters.",
|
||||
);
|
||||
|
||||
const clearBtn = page.getByRole("button", { name: "Clear filters" });
|
||||
await expect(clearBtn).toBeVisible();
|
||||
await expect(page.locator("tr", { hasText: "llama-model" })).toHaveCount(0);
|
||||
|
||||
await clearBtn.click();
|
||||
|
||||
await expect(allBtn).toHaveClass(/active/);
|
||||
await expect(chatBtn).not.toHaveClass(/active/);
|
||||
await expect(page.locator(".empty-state")).toHaveCount(0);
|
||||
await expect(page.locator("tr", { hasText: "llama-model" })).toBeVisible();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -412,7 +412,10 @@ These load-time options control how the backend parses `<think>` reasoning block
|
||||
| `prefill_assistant` | bool | `true` | When `false`, the trailing assistant message is not pre-filled by the chat template. |
|
||||
|
||||
{{% notice note %}}
|
||||
This is the load-time reasoning configuration. The orthogonal per-request `enable_thinking` chat-template kwarg (set via the YAML `reasoning.disable` field) toggles thinking on/off per call without restarting the model.
|
||||
This is the load-time reasoning configuration. The orthogonal per-request `enable_thinking` chat-template kwarg toggles thinking on/off per call without restarting the model. It can be driven either by the YAML `reasoning.disable` field (model default) or per request via the OpenAI `reasoning_effort` field on `/v1/chat/completions`:
|
||||
|
||||
- `reasoning_effort: "none"` disables thinking for that request (`enable_thinking=false`) - useful to run a single reasoning model like Qwen3 for low-latency tasks while still enabling reasoning on other requests.
|
||||
- `reasoning_effort: "minimal" | "low" | "medium" | "high"` enables thinking, unless the model config explicitly set `reasoning.disable: true` (an operator's explicit disable wins and is never re-enabled by a request).
|
||||
{{% /notice %}}
|
||||
|
||||
### Multimodal Backend Options
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.3.1"
|
||||
"version": "v4.3.4"
|
||||
}
|
||||
|
||||
@@ -1,4 +1,37 @@
|
||||
---
|
||||
- name: "lfm2.5-8b-a1b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-8B-A1B-GGUF
|
||||
description: "Try LFM •\nDocs •\nLEAP •\nDiscord\n\n# LFM2.5-8B-A1B\n\nLFM2.5 is a new family of hybrid models designed for on-device deployment. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.\n\n - **On-device personal assistant**: Designed to power real-life applications, chaining tool calls, and following complex instructions on all devices.\n - **Compressed performance**: Competitive with much larger dense and MoE models on instruction following and agentic tasks.\n - **Unmatched throughput**: Fastest in its size class on both CPU and GPU inference, with day-one support for llama.cpp, MLX, vLLM, and SGLang.\n\nFind more information about LFM2.5-8B-A1B in our blog post.\n\n**AA-Omniscience Index (higher is better) rewards correct answers and penalizes hallucinations. Scores range from -100 to 100. See more results on Artificial Analysis.*\n\n## \U0001F5D2️ Model Details\n\nLFM2.5-8B-A1B is a general-purpose text-only model with the following features:\n\n...\n"
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/qUZVGkns1bg3sZUShBbhv.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.15
|
||||
model: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
repeat_penalty: 1.05
|
||||
temperature: 0.1
|
||||
top_k: 50
|
||||
top_p: 0.1
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
uri: https://huggingface.co/LiquidAI/LFM2.5-8B-A1B-GGUF/resolve/main/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
sha256: 4923ec14f06b968b74d663e5949867d2d9c3bf13a20b8be1a9f9af39989b2bb0
|
||||
- name: "qwopus3.5-9b-coder-mtp"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -17,6 +17,13 @@ config_file: |
|
||||
# "pure content" PEG parser that leaks reasoning tags into content.
|
||||
options:
|
||||
- use_jinja:true
|
||||
# With use_tokenizer_template the backend (llama.cpp) owns tool-call
|
||||
# grammar generation and parsing too. Disabling LocalAI's own grammar lets
|
||||
# llama.cpp's native name-first tool pipeline run; otherwise the generated
|
||||
# grammar overrides it and the tool-call JSON leaks into content (#10052).
|
||||
function:
|
||||
grammar:
|
||||
disable: true
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
name: qwen3
|
||||
|
||||
@@ -155,12 +155,22 @@ func (sc *JSONSchemaConverter) visit(schema map[string]any, name string, rootSch
|
||||
propName string
|
||||
propSchema map[string]any
|
||||
}) int {
|
||||
aOrder := propOrder[a.propName]
|
||||
bOrder := propOrder[b.propName]
|
||||
if aOrder != 0 && bOrder != 0 {
|
||||
// Use presence in the order map (not a non-zero sentinel) so that
|
||||
// the first listed key — index 0 — is honored. Keys present in
|
||||
// properties_order sort by their index and ahead of any key that
|
||||
// isn't listed; unlisted keys keep a stable alphabetical order.
|
||||
aOrder, aOK := propOrder[a.propName]
|
||||
bOrder, bOK := propOrder[b.propName]
|
||||
switch {
|
||||
case aOK && bOK:
|
||||
return cmp.Compare(aOrder, bOrder)
|
||||
case aOK:
|
||||
return -1
|
||||
case bOK:
|
||||
return 1
|
||||
default:
|
||||
return cmp.Compare(a.propName, b.propName)
|
||||
}
|
||||
return cmp.Compare(a.propName, b.propName)
|
||||
})
|
||||
|
||||
var rule strings.Builder
|
||||
|
||||
@@ -547,3 +547,61 @@ realvalue
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("JSON schema property ordering (issue #10052)", func() {
|
||||
// A function-call shaped schema. The grammar must honor the configured
|
||||
// properties_order. Before the fix, the sort guard `aOrder != 0 && bOrder != 0`
|
||||
// treated the first listed key (index 0) as "unset" and fell back to
|
||||
// alphabetical order, so "arguments" was emitted before "name" even when
|
||||
// properties_order put name first.
|
||||
const schema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}}
|
||||
}
|
||||
}`
|
||||
|
||||
// keyIndex finds the position of an object-key literal (escaped as \"key\"
|
||||
// in GBNF), which only appears where the key is emitted in the rule — not
|
||||
// in derived rule names like root-name.
|
||||
keyIndex := func(grammar, key string) int {
|
||||
return strings.Index(grammar, `\"`+key+`\"`)
|
||||
}
|
||||
|
||||
It("honors properties_order with name listed first (index 0)", func() {
|
||||
grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schema))
|
||||
Expect(err).To(BeNil())
|
||||
ni := keyIndex(grammar, "name")
|
||||
ai := keyIndex(grammar, "arguments")
|
||||
Expect(ni).To(BeNumerically(">=", 0))
|
||||
Expect(ai).To(BeNumerically(">=", 0))
|
||||
Expect(ni).To(BeNumerically("<", ai),
|
||||
"properties_order lists name first, so the grammar must emit \"name\" before \"arguments\"")
|
||||
})
|
||||
|
||||
It("keeps alphabetical order when properties_order is empty", func() {
|
||||
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(schema))
|
||||
Expect(err).To(BeNil())
|
||||
// No explicit order: keys fall back to alphabetical, so "arguments"
|
||||
// precedes "name". This is the documented default and must not change.
|
||||
Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "name")))
|
||||
})
|
||||
|
||||
It("sorts keys present in properties_order ahead of unlisted keys", func() {
|
||||
const schemaWithExtra = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}},
|
||||
"aaa_unlisted": {"type": "string"}
|
||||
}
|
||||
}`
|
||||
// "aaa_unlisted" is alphabetically first but not in the order list, so
|
||||
// it must still come after the listed name/arguments keys.
|
||||
grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schemaWithExtra))
|
||||
Expect(err).To(BeNil())
|
||||
Expect(keyIndex(grammar, "name")).To(BeNumerically("<", keyIndex(grammar, "arguments")))
|
||||
Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "aaa_unlisted")))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -628,6 +628,36 @@ func buildContent(before string, parser *ChatMsgParser) string {
|
||||
// This provides better streaming and partial parsing support.
|
||||
// When format is nil or when format is set, tries "find scope/tool start, split, parse suffix"
|
||||
// first (llama.cpp PEG order) so that content before the tool block does not cause parse failure.
|
||||
// validToolNameRe matches a plausible function name. OpenAI tool names are
|
||||
// limited to letters, digits, underscores and hyphens; dots appear in some
|
||||
// providers' namespaced names. Anything else (whitespace, braces, brackets,
|
||||
// quotes, colons) signals the XML auto-detector grabbed a JSON blob or prose
|
||||
// rather than a real name.
|
||||
var validToolNameRe = regexp.MustCompile(`^[A-Za-z0-9_.\-]+$`)
|
||||
|
||||
// plausibleToolName reports whether name looks like a real function name.
|
||||
func plausibleToolName(name string) bool {
|
||||
return validToolNameRe.MatchString(strings.TrimSpace(name))
|
||||
}
|
||||
|
||||
// filterPlausibleToolCalls drops auto-detected tool calls whose name is not a
|
||||
// plausible function name. This guards against a format (notably glm-4.5, whose
|
||||
// tool block is <tool_call>name...</tool_call>) mis-claiming a Hermes-style
|
||||
// <tool_call>JSON</tool_call> block and returning the whole JSON object — or
|
||||
// any leading prose / array — as the function name. Dropping the misparse lets
|
||||
// auto-detection fall through to the next format and ultimately to JSON
|
||||
// parsing, which handles Hermes correctly. Replaces the narrower leading-"{"
|
||||
// check (PR #9940); see issue #9722.
|
||||
func filterPlausibleToolCalls(calls []FuncCallResults) []FuncCallResults {
|
||||
out := calls[:0:0]
|
||||
for _, c := range calls {
|
||||
if plausibleToolName(c.Name) {
|
||||
out = append(out, c)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]FuncCallResults, error) {
|
||||
// Try split-on-scope first so reasoning/content before tool block is skipped
|
||||
if format != nil {
|
||||
@@ -639,7 +669,12 @@ func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]F
|
||||
for _, fmtPreset := range formats {
|
||||
if fmtPreset.format != nil {
|
||||
if pr, ok := tryParseXMLFromScopeStart(s, fmtPreset.format, isPartial); ok {
|
||||
return pr.ToolCalls, nil
|
||||
// Auto-detect: discard misparsed (non-name) results so a
|
||||
// format that grabbed a JSON blob doesn't win; fall through
|
||||
// to the next format.
|
||||
if valid := filterPlausibleToolCalls(pr.ToolCalls); len(valid) > 0 {
|
||||
return valid, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -659,14 +694,19 @@ func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]F
|
||||
if err != nil {
|
||||
// Check if it's a partial exception (recoverable)
|
||||
if _, ok := err.(*ChatMsgPartialException); ok {
|
||||
// Partial parse, return what we have
|
||||
return parser.ToolCalls(), nil
|
||||
// Partial parse, return what we have — unless every
|
||||
// result is a misparse, in which case try the next format.
|
||||
if valid := filterPlausibleToolCalls(parser.ToolCalls()); len(valid) > 0 {
|
||||
return valid, nil
|
||||
}
|
||||
}
|
||||
// Try next format
|
||||
continue
|
||||
}
|
||||
if success && len(parser.ToolCalls()) > 0 {
|
||||
return parser.ToolCalls(), nil
|
||||
if valid := filterPlausibleToolCalls(parser.ToolCalls()); len(valid) > 0 {
|
||||
return valid, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
56
pkg/functions/parse_glm_9722_test.go
Normal file
56
pkg/functions/parse_glm_9722_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package functions
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Robust fix for the glm-4.5 XML auto-detect false positive (relates to #9722
|
||||
// / supersedes the brittle leading-"{" filter in #9940). When the XML
|
||||
// auto-detector mis-identifies a Hermes-style <tool_call>JSON</tool_call> block
|
||||
// as glm-4.5, it extracts the block body as the function NAME. A real function
|
||||
// name is [A-Za-z0-9_.-]+; anything with braces, brackets, whitespace, quotes
|
||||
// or colons is a misparse and must not be returned (so JSON parsing can take
|
||||
// over). This is stronger than checking only for a leading "{": it also rejects
|
||||
// leading prose, JSON arrays, and brace-less garbage.
|
||||
var _ = Describe("glm-4.5 auto-detect name validation (#9722/#9940)", func() {
|
||||
// plausibleName mirrors the contract: a returned auto-detected tool name
|
||||
// must look like a real function name.
|
||||
plausible := regexp.MustCompile(`^[A-Za-z0-9_.\-]+$`)
|
||||
|
||||
DescribeTable("auto-detect must not emit a misparsed tool name",
|
||||
func(input string) {
|
||||
results, err := ParseXMLIterative(input, nil, false)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
for _, r := range results {
|
||||
Expect(plausible.MatchString(r.Name)).To(BeTrue(),
|
||||
"auto-detected XML tool name must look like a function name, got: %q", r.Name)
|
||||
}
|
||||
},
|
||||
Entry("canonical Hermes JSON", "<tool_call>\n{\"name\": \"bash\", \"arguments\": {\"script\": \"ls\"}}\n</tool_call>"),
|
||||
Entry("leading prose then JSON", "<tool_call>\nSure: {\"name\": \"bash\", \"arguments\": {\"script\": \"ls\"}}\n</tool_call>"),
|
||||
Entry("JSON array (parallel calls)", "<tool_call>\n[{\"name\": \"bash\", \"arguments\": {}}]\n</tool_call>"),
|
||||
Entry("brace-less garbage", "<tool_call>\nname: bash, arguments: {}\n</tool_call>"),
|
||||
)
|
||||
|
||||
// No-regression: a genuine glm-4.5 tool call must still be auto-detected.
|
||||
It("still parses a legitimate glm-4.5 tool call", func() {
|
||||
legit := "<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>NYC</arg_value>\n</tool_call>"
|
||||
results, err := ParseXMLIterative(legit, nil, false)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("get_weather"))
|
||||
})
|
||||
|
||||
// A user who explicitly forces the glm-4.5 format keeps the raw behaviour
|
||||
// (no name filtering) — only auto-detection is guarded.
|
||||
It("does not filter when the glm-4.5 format is explicitly forced", func() {
|
||||
input := "<tool_call>\n{\"name\": \"bash\", \"arguments\": {}}\n</tool_call>"
|
||||
forced, err := ParseXMLIterative(input, GetXMLFormatPreset("glm-4.5"), false)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(forced).ToNot(BeEmpty(),
|
||||
"explicit format must be trusted verbatim, even if it yields a JSON-blob name")
|
||||
})
|
||||
})
|
||||
@@ -53,6 +53,13 @@ type ModelLoader struct {
|
||||
modelRouter ModelRouter // distributed mode: route to remote node
|
||||
backendLogs *BackendLogStore
|
||||
backendLoggingEnabled atomic.Bool
|
||||
// stoppingProcs marks backend processes that LocalAI is stopping on
|
||||
// purpose (model unload / graceful shutdown), keyed by the
|
||||
// *process.Process pointer. The exit-watcher goroutine in startProcess
|
||||
// consults it to decide whether an exit is an expected stop or a crash —
|
||||
// the exit code can't, since a child killed by our own SIGTERM/SIGKILL
|
||||
// reports -1, indistinguishable from a signal-induced crash.
|
||||
stoppingProcs sync.Map
|
||||
}
|
||||
|
||||
// NewModelLoader creates a new ModelLoader instance.
|
||||
|
||||
@@ -75,6 +75,9 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Mark the stop as intentional so the exit-watcher logs it as an
|
||||
// expected stop, not a crash (signal-terminated children report -1).
|
||||
ml.stoppingProcs.Store(process, struct{}{})
|
||||
err := process.Stop()
|
||||
if err != nil {
|
||||
xlog.Error("(deleteProcess) error while deleting process", "error", err, "model", s)
|
||||
@@ -171,8 +174,16 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
||||
xlog.Debug("GRPC Service state dir", "dir", grpcControlProcess.StateDir())
|
||||
|
||||
signals.RegisterGracefulTerminationHandler(func() {
|
||||
err := grpcControlProcess.Stop()
|
||||
if err != nil {
|
||||
// StopAllGRPC (the deleteProcess path) is registered earlier and runs
|
||||
// first for store-tracked backends, stopping this process and removing
|
||||
// its pidfile. Calling Stop again then fails with "failed to read PID".
|
||||
// Skip when it's already gone; this handler still covers processes that
|
||||
// StopAllGRPC doesn't track (e.g. worker-supervised backends).
|
||||
if !grpcControlProcess.IsAlive() {
|
||||
return
|
||||
}
|
||||
ml.stoppingProcs.Store(grpcControlProcess, struct{}{})
|
||||
if err := grpcControlProcess.Stop(); err != nil {
|
||||
xlog.Error("error while shutting down grpc process", "error", err)
|
||||
}
|
||||
})
|
||||
@@ -211,20 +222,27 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
||||
// whether the child is alive.
|
||||
go func() {
|
||||
<-grpcControlProcess.Done()
|
||||
// LoadAndDelete both reads the intentional-stop marker and frees the
|
||||
// map entry so it doesn't accumulate across the process's lifetime.
|
||||
_, intentional := ml.stoppingProcs.LoadAndDelete(grpcControlProcess)
|
||||
fields := []any{
|
||||
"id", id,
|
||||
"address", serverAddress,
|
||||
"process", filepath.Base(grpcProcess),
|
||||
}
|
||||
code, codeErr := grpcControlProcess.ExitCode()
|
||||
if codeErr == nil {
|
||||
// Report the raw exit code without interpreting it: a child killed by
|
||||
// our own SIGTERM/SIGKILL surfaces as -1 (Go reports -1 for signal
|
||||
// termination, not the shell's 128+signal convention), so the code
|
||||
// alone can't tell an intended stop from a crash. The stoppingProcs
|
||||
// marker is the reliable signal for that, so it picks the log level.
|
||||
if code, codeErr := grpcControlProcess.ExitCode(); codeErr == nil {
|
||||
fields = append(fields, "exitCode", code)
|
||||
}
|
||||
// 143 = 128 + SIGTERM, the signal sent during graceful stop / model unload.
|
||||
// Treat that and a clean 0 as expected; everything else is a likely crash.
|
||||
if codeErr == nil && (code == "0" || code == "143") {
|
||||
xlog.Info("Backend process exited", fields...)
|
||||
if intentional {
|
||||
xlog.Info("Backend process stopped", fields...)
|
||||
} else {
|
||||
// A stop we didn't initiate — a SIGSEGV from a missing shared
|
||||
// library, a Python ImportError, an OOM kill, an unexpected self-exit.
|
||||
xlog.Warn("Backend process exited unexpectedly", fields...)
|
||||
}
|
||||
}()
|
||||
|
||||
157
pkg/utils/path_test.go
Normal file
157
pkg/utils/path_test.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package utils_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/mudler/LocalAI/pkg/utils"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("utils/path tests", func() {
|
||||
Describe("VerifyPath", func() {
|
||||
It("accepts a simple file directly inside the base path", func() {
|
||||
Expect(VerifyPath("model.bin", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("accepts a nested subdirectory inside the base path", func() {
|
||||
Expect(VerifyPath("subdir/model.bin", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("accepts traversal sequences that stay inside the base", func() {
|
||||
// "a/b/../c" collapses to "a/c", still strictly inside the base,
|
||||
// so the verifier should permit it.
|
||||
Expect(VerifyPath("a/b/../c", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("rejects a single parent-traversal that escapes the base", func() {
|
||||
Expect(VerifyPath("../etc/passwd", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("rejects compound traversal that climbs above the base", func() {
|
||||
Expect(VerifyPath("a/../../etc/passwd", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("rejects a deeply-escaping path that lands on the filesystem root", func() {
|
||||
Expect(VerifyPath("../../etc/passwd", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("rejects the base path itself", func() {
|
||||
// Documents that VerifyPath requires a strict descendant: an
|
||||
// empty user input resolves to the base directory and is
|
||||
// rejected, which is the safer default for a download helper
|
||||
// that expects a target file inside the base.
|
||||
Expect(VerifyPath("", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("treats an absolute-looking user input as relative to the base", func() {
|
||||
// filepath.Join discards no segments here: the result is
|
||||
// "/srv/models/etc/passwd", which is still inside the base.
|
||||
// This protects callers that forward untrusted user paths
|
||||
// directly to the verifier.
|
||||
Expect(VerifyPath("/etc/passwd", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("is purely lexical and does not follow symlinks", func() {
|
||||
// VerifyPath uses filepath.Clean, not filepath.EvalSymlinks,
|
||||
// so a symlink that escapes the base is not detected here.
|
||||
// Callers who must defend against symlink escapes need to
|
||||
// EvalSymlinks before delegating to VerifyPath. This test
|
||||
// pins the current contract so the trade-off stays explicit.
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
base := filepath.Join(tmpDir, "base")
|
||||
outside := filepath.Join(tmpDir, "outside")
|
||||
Expect(os.Mkdir(base, 0o755)).To(Succeed())
|
||||
Expect(os.Mkdir(outside, 0o755)).To(Succeed())
|
||||
Expect(os.WriteFile(filepath.Join(outside, "secret.txt"), []byte("x"), 0o600)).To(Succeed())
|
||||
Expect(os.Symlink(outside, filepath.Join(base, "escape"))).To(Succeed())
|
||||
|
||||
Expect(VerifyPath("escape/secret.txt", base)).To(Succeed())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("InTrustedRoot", func() {
|
||||
It("accepts a strict descendant of the trusted root", func() {
|
||||
Expect(InTrustedRoot("/srv/models/file", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("accepts a deeply nested descendant", func() {
|
||||
Expect(InTrustedRoot("/srv/models/a/b/c/file", "/srv/models")).To(Succeed())
|
||||
})
|
||||
|
||||
It("rejects the trusted root itself", func() {
|
||||
// The implementation walks up before comparing, so the input
|
||||
// path must have at least one component beneath the root.
|
||||
Expect(InTrustedRoot("/srv/models", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("rejects a sibling directory that shares the parent", func() {
|
||||
Expect(InTrustedRoot("/srv/other/file", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
|
||||
It("rejects an unrelated absolute path", func() {
|
||||
Expect(InTrustedRoot("/etc/passwd", "/srv/models")).ToNot(Succeed())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("SanitizeFileName", func() {
|
||||
It("returns the original name when nothing is unsafe", func() {
|
||||
Expect(SanitizeFileName("model.bin")).To(Equal("model.bin"))
|
||||
})
|
||||
|
||||
It("strips leading directory components", func() {
|
||||
Expect(SanitizeFileName("subdir/model.bin")).To(Equal("model.bin"))
|
||||
})
|
||||
|
||||
It("strips absolute path prefixes", func() {
|
||||
Expect(SanitizeFileName("/etc/passwd")).To(Equal("passwd"))
|
||||
})
|
||||
|
||||
It("collapses parent-traversal sequences and keeps only the leaf", func() {
|
||||
Expect(SanitizeFileName("../etc/passwd")).To(Equal("passwd"))
|
||||
})
|
||||
|
||||
It("removes embedded .. sequences that Clean+Base alone do not catch", func() {
|
||||
// After Clean+Base "foo..bar" survives unchanged; the explicit
|
||||
// ReplaceAll on ".." in the implementation is the last line of
|
||||
// defence against filenames that look benign but still contain
|
||||
// traversal markers.
|
||||
Expect(SanitizeFileName("foo..bar")).To(Equal("foobar"))
|
||||
})
|
||||
|
||||
It("returns an empty string when the input is only a parent reference", func() {
|
||||
Expect(SanitizeFileName("..")).To(Equal(""))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("GenerateUniqueFileName", func() {
|
||||
It("returns the bare filename when no collision exists", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
Expect(GenerateUniqueFileName(tmpDir, "model", ".bin")).To(Equal("model.bin"))
|
||||
})
|
||||
|
||||
It("suffixes with _2 when the bare filename already exists", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "model.bin"), nil, 0o600)).To(Succeed())
|
||||
|
||||
Expect(GenerateUniqueFileName(tmpDir, "model", ".bin")).To(Equal("model_2.bin"))
|
||||
})
|
||||
|
||||
It("advances the counter past every existing collision", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
for _, name := range []string{"model.bin", "model_2.bin", "model_3.bin"} {
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, name), nil, 0o600)).To(Succeed())
|
||||
}
|
||||
|
||||
Expect(GenerateUniqueFileName(tmpDir, "model", ".bin")).To(Equal("model_4.bin"))
|
||||
})
|
||||
|
||||
It("preserves an empty extension when generating the suffixed name", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "README"), nil, 0o600)).To(Succeed())
|
||||
|
||||
Expect(GenerateUniqueFileName(tmpDir, "README", "")).To(Equal("README_2"))
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
localaiapp "github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
httpapi "github.com/mudler/LocalAI/core/http"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
@@ -41,6 +41,7 @@ var (
|
||||
cloudProxyPath string
|
||||
mcpServerURL string
|
||||
mcpServerShutdown func()
|
||||
localAIApp *localaiapp.Application
|
||||
|
||||
// Cloud-proxy fake upstreams. Live for the whole suite so the four
|
||||
// cloud-proxy model YAMLs can point at their URLs at startup time.
|
||||
@@ -390,7 +391,7 @@ var _ = BeforeSuite(func() {
|
||||
// Create application instance (GeneratedContentDir so sound-generation/TTS can write files the handler sends)
|
||||
generatedDir := filepath.Join(tmpDir, "generated")
|
||||
Expect(os.MkdirAll(generatedDir, 0750)).To(Succeed())
|
||||
application, err := application.New(
|
||||
localAIApp, err = localaiapp.New(
|
||||
config.WithContext(appCtx),
|
||||
config.WithSystemState(systemState),
|
||||
config.WithDebug(true),
|
||||
@@ -399,14 +400,14 @@ var _ = BeforeSuite(func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
// Register mock backend (always available for non-realtime tests).
|
||||
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
application.ModelLoader().SetExternalBackend("opus", mockBackendPath)
|
||||
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
|
||||
localAIApp.ModelLoader().SetExternalBackend("opus", mockBackendPath)
|
||||
if cloudProxyPath != "" {
|
||||
application.ModelLoader().SetExternalBackend("cloud-proxy", cloudProxyPath)
|
||||
localAIApp.ModelLoader().SetExternalBackend("cloud-proxy", cloudProxyPath)
|
||||
}
|
||||
|
||||
// Create HTTP app
|
||||
app, err = httpapi.API(application)
|
||||
app, err = httpapi.API(localAIApp)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
// Get free port
|
||||
@@ -436,6 +437,14 @@ var _ = BeforeSuite(func() {
|
||||
})
|
||||
|
||||
var _ = AfterSuite(func() {
|
||||
// Synchronous shutdown — the context-cancel goroutine in application.New
|
||||
// runs the same cleanup asynchronously, which races test-binary exit and
|
||||
// orphans spawned mock-backend children to init.
|
||||
if localAIApp != nil {
|
||||
if err := localAIApp.Shutdown(); err != nil {
|
||||
xlog.Error("error shutting down application", "error", err)
|
||||
}
|
||||
}
|
||||
if appCancel != nil {
|
||||
appCancel()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user