mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-16 20:59:07 -04:00
* fix(router): score classifier production-readiness Conversation trimming runs through the classifier model's chat template and trims by exact token count, sized to the model's n_batch which is now scaled to context so long probes can't crash the backend. Missing chat_message templates are a hard error at router build time. Router- facing factories (Embedder/Scorer/Reranker/TokenCounter) re-resolve ModelConfig per call so a model installed post-startup doesn't bind a stub Backend="" config and silently fall into the loader's auto- iterate path. New 'vector_store' backend trace recorded inside localVectorStore on every Search/Insert — including the backend-load-failure path that previously vanished into an xlog.Warn — with outcome tagging (hit/miss/empty_store/backend_load_error/find_error/insert_error/ok). Companion cleanup drops misleading similarity:0 and input_tokens_count:0 from non-hit and text-mode traces. Gallery local-store-development aliases to 'local-store' so the master image satisfies pkg/model.LocalStoreBackend lookups from the embedding cache. Misc: llama-cpp TokenizeString reads the correct 'prompt' JSON key (the original bug); ModelTokenize nil-guard; non-fatal mitm proxy startup; PII 'route_local' renamed to 'allow' with docs/UI in sync; model-editor footer no longer eats the edit area on small screens; several config-editor template/dropdown/section fixes. Tests: e2e router specs (casual/code-hint + long-conversation trim), vector_store trace specs, lazy-factory specs, gallery dev-alias resolution, Playwright trace badge + scroll regression. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(backend): auto-size batch to context for embedding and rerank models Embedding and rerank models pool over the whole input in a single physical batch (n_ubatch). With batch left at the 512 default, the backend rejects longer inputs with "input is too large to process", silently capping a large-context embedder (e.g. 8k/32k) at 512 tokens. Size n_batch to the context for these single-pass usecases, mirroring the existing FLAG_SCORE behaviour; an explicit batch: still wins. Extracts EffectiveContextSize/EffectiveBatchSize from grpcModelOpts so the effective decode window has one home for other callers to reuse. Adds an e2e-aio regression test that embeds a >512-token input. The AIO embedding model is switched to nomic-embed-text-v1.5 (2048 context) because the previous granite model was capped at 512 tokens and could not exercise the larger batch. Assisted-by: claude-code:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(gallery): raise arch-router scoring output cap via parallel:64 Scoring decodes the whole prompt+candidate in a single llama_decode and reads one logit row per candidate token. The vendored llama.cpp server caps causal output rows at n_parallel, so the default of 1 aborts with GGML_ASSERT(n_outputs_max <= cparams.n_outputs_max) on multi-token route labels. Set options: [parallel:64] on both arch-router quant entries to lift the cap; kv_unified (the grpc-server default) keeps the full context per sequence, so this does not split the KV cache. Assisted-by: claude-code:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
533 lines
17 KiB
Go
533 lines
17 KiB
Go
package config
|
|
|
|
import (
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("Test cases for config related functions", func() {
|
|
Context("ModelID", func() {
|
|
It("returns Name when set", func() {
|
|
c := ModelConfig{Name: "my-name"}
|
|
c.Model = "my-model"
|
|
Expect(c.ModelID()).To(Equal("my-name"))
|
|
})
|
|
It("falls back to Model when Name is empty", func() {
|
|
c := ModelConfig{}
|
|
c.Model = "my-model"
|
|
Expect(c.ModelID()).To(Equal("my-model"))
|
|
})
|
|
It("returns empty string when both are empty", func() {
|
|
c := ModelConfig{}
|
|
Expect(c.ModelID()).To(Equal(""))
|
|
})
|
|
})
|
|
|
|
Context("Test Read configuration functions", func() {
|
|
It("Test Validate", func() {
|
|
tmp, err := os.CreateTemp("", "config.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer os.Remove(tmp.Name())
|
|
_, err = tmp.WriteString(
|
|
`backend: "../foo-bar"
|
|
name: "foo"
|
|
parameters:
|
|
model: "foo-bar"
|
|
known_usecases:
|
|
- chat
|
|
- COMPLETION
|
|
`)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
configs, err := readModelConfigsFromFile(tmp.Name())
|
|
config := configs[0]
|
|
Expect(err).To(BeNil())
|
|
Expect(config).ToNot(BeNil())
|
|
valid, err := config.Validate()
|
|
Expect(err).To(HaveOccurred())
|
|
Expect(valid).To(BeFalse())
|
|
Expect(config.KnownUsecases).ToNot(BeNil())
|
|
})
|
|
It("Test Validate", func() {
|
|
tmp, err := os.CreateTemp("", "config.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer os.Remove(tmp.Name())
|
|
_, err = tmp.WriteString(
|
|
`name: bar-baz
|
|
backend: "foo-bar"
|
|
parameters:
|
|
model: "foo-bar"`)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
configs, err := readModelConfigsFromFile(tmp.Name())
|
|
config := configs[0]
|
|
Expect(err).To(BeNil())
|
|
Expect(config).ToNot(BeNil())
|
|
// two configs in config.yaml
|
|
Expect(config.Name).To(Equal("bar-baz"))
|
|
valid, err := config.Validate()
|
|
Expect(err).To(BeNil())
|
|
Expect(valid).To(BeTrue())
|
|
|
|
// llama-cpp configs can't mix the score usecase with
|
|
// chat/completion/embeddings — Score bypasses the slot
|
|
// loop and would race the llama_context. The check fires
|
|
// at load and save time; here we exercise it directly.
|
|
scoreFlag := FLAG_SCORE | FLAG_CHAT
|
|
conflicting := ModelConfig{
|
|
Name: "router-but-also-chat",
|
|
Backend: "llama-cpp",
|
|
KnownUsecases: &scoreFlag,
|
|
}
|
|
valid, err = conflicting.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(HaveOccurred())
|
|
Expect(err.Error()).To(ContainSubstring("score is incompatible"))
|
|
|
|
scoreOnly := FLAG_SCORE
|
|
dedicated := ModelConfig{
|
|
Name: "router-only",
|
|
Backend: "llama-cpp",
|
|
KnownUsecases: &scoreOnly,
|
|
}
|
|
valid, err = dedicated.Validate()
|
|
Expect(valid).To(BeTrue())
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
// The constraint is llama-cpp-specific; other backends
|
|
// may safely combine.
|
|
scoreAndChat := FLAG_SCORE | FLAG_CHAT
|
|
otherBackend := ModelConfig{
|
|
Name: "vllm-router-and-chat",
|
|
Backend: "vllm",
|
|
KnownUsecases: &scoreAndChat,
|
|
}
|
|
valid, err = otherBackend.Validate()
|
|
Expect(valid).To(BeTrue())
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
// Cloud-proxy: api_key_env and api_key_file are mutually
|
|
// exclusive — picking both is a config bug we catch at
|
|
// load/save rather than at backend-load time.
|
|
bothKeys := ModelConfig{
|
|
Name: "both-keys",
|
|
Backend: "cloud-proxy",
|
|
Proxy: ProxyConfig{
|
|
UpstreamURL: "https://example.com/v1",
|
|
APIKeyEnv: "OPENAI_KEY",
|
|
APIKeyFile: "/run/secrets/openai",
|
|
},
|
|
}
|
|
valid, err = bothKeys.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(MatchError(ContainSubstring("mutually exclusive")))
|
|
|
|
// Translate mode requires a provider — without one, the
|
|
// backend has no way to pick a wire format.
|
|
translateNoProvider := ModelConfig{
|
|
Name: "translate-no-provider",
|
|
Backend: "cloud-proxy",
|
|
Proxy: ProxyConfig{UpstreamURL: "https://example.com/v1", Mode: ProxyModeTranslate},
|
|
}
|
|
valid, err = translateNoProvider.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(MatchError(ContainSubstring("translate mode requires provider")))
|
|
|
|
// Unknown mode is rejected.
|
|
badMode := ModelConfig{
|
|
Name: "bad-mode",
|
|
Backend: "cloud-proxy",
|
|
Proxy: ProxyConfig{UpstreamURL: "https://example.com/v1", Mode: "rewrite"},
|
|
}
|
|
valid, err = badMode.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(MatchError(ContainSubstring("unknown mode")))
|
|
|
|
// Passthrough (default) with one key source is happy.
|
|
passthroughOK := ModelConfig{
|
|
Name: "passthrough-ok",
|
|
Backend: "cloud-proxy",
|
|
Proxy: ProxyConfig{UpstreamURL: "https://example.com/v1", APIKeyEnv: "OPENAI_KEY"},
|
|
}
|
|
valid, err = passthroughOK.Validate()
|
|
Expect(valid).To(BeTrue())
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
// router.score_normalization: load-time rejection of an
|
|
// unknown value. The classifier consumes it lazily, so
|
|
// without this validation a YAML typo wouldn't surface
|
|
// until the first router request panicked deep in
|
|
// NewScoreClassifier.
|
|
badNorm := ModelConfig{
|
|
Name: "bad-norm",
|
|
Router: RouterConfig{
|
|
ScoreNormalization: "men", // typo of "mean"
|
|
},
|
|
}
|
|
valid, err = badNorm.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(MatchError(ContainSubstring("unknown score_normalization")))
|
|
|
|
// Accepted values pass.
|
|
for _, mode := range []string{"", ScoreNormalizationRaw, ScoreNormalizationMean} {
|
|
goodNorm := ModelConfig{
|
|
Name: "good-norm-" + mode,
|
|
Router: RouterConfig{ScoreNormalization: mode},
|
|
}
|
|
valid, err = goodNorm.Validate()
|
|
Expect(valid).To(BeTrue(), "score_normalization=%q should be accepted", mode)
|
|
Expect(err).NotTo(HaveOccurred())
|
|
}
|
|
|
|
// router.classifier_system_template: parse-time rejection
|
|
// of malformed Go templates. Same reasoning as above —
|
|
// without this the parse error wouldn't surface until
|
|
// the first router request panicked in NewScoreClassifier.
|
|
badTmpl := ModelConfig{
|
|
Name: "bad-tmpl",
|
|
Router: RouterConfig{
|
|
ClassifierSystemTemplate: "Routes: {{range .Policies",
|
|
},
|
|
}
|
|
valid, err = badTmpl.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).To(MatchError(ContainSubstring("classifier_system_template parse error")))
|
|
|
|
// Well-formed template passes.
|
|
goodTmpl := ModelConfig{
|
|
Name: "good-tmpl",
|
|
Router: RouterConfig{
|
|
ClassifierSystemTemplate: `Routes: {{range .Policies}}{{.Label}} {{end}}`,
|
|
},
|
|
}
|
|
valid, err = goodTmpl.Validate()
|
|
Expect(valid).To(BeTrue())
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
|
httpClient := http.Client{}
|
|
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer resp.Body.Close()
|
|
tmp, err = os.CreateTemp("", "config.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer os.Remove(tmp.Name())
|
|
_, err = io.Copy(tmp, resp.Body)
|
|
Expect(err).To(BeNil())
|
|
configs, err = readModelConfigsFromFile(tmp.Name())
|
|
config = configs[0]
|
|
Expect(err).To(BeNil())
|
|
Expect(config).ToNot(BeNil())
|
|
// two configs in config.yaml
|
|
Expect(config.Name).To(Equal("hermes-2-pro-mistral"))
|
|
valid, err = config.Validate()
|
|
Expect(err).To(BeNil())
|
|
Expect(valid).To(BeTrue())
|
|
})
|
|
})
|
|
It("Properly handles backend usecase matching", func() {
|
|
|
|
a := ModelConfig{
|
|
Name: "a",
|
|
}
|
|
Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially.
|
|
|
|
b := ModelConfig{
|
|
Name: "b",
|
|
Backend: "stablediffusion",
|
|
}
|
|
Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue())
|
|
Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
|
|
|
c := ModelConfig{
|
|
Name: "c",
|
|
Backend: "llama-cpp",
|
|
TemplateConfig: TemplateConfig{
|
|
Chat: "chat",
|
|
},
|
|
}
|
|
Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
|
Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
|
|
Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
|
|
|
d := ModelConfig{
|
|
Name: "d",
|
|
Backend: "llama-cpp",
|
|
TemplateConfig: TemplateConfig{
|
|
Chat: "chat",
|
|
Completion: "completion",
|
|
},
|
|
}
|
|
Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
|
Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
|
Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
|
|
|
trueValue := true
|
|
e := ModelConfig{
|
|
Name: "e",
|
|
Backend: "llama-cpp",
|
|
TemplateConfig: TemplateConfig{
|
|
Completion: "completion",
|
|
},
|
|
Embeddings: &trueValue,
|
|
}
|
|
|
|
Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
|
Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
|
Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
|
Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue())
|
|
|
|
// Router models are chat dispatchers: no chat template of their
|
|
// own, but invoked through the chat endpoint, so they default to
|
|
// chat-capable.
|
|
r := ModelConfig{
|
|
Name: "r",
|
|
Router: RouterConfig{
|
|
Candidates: []RouterCandidate{{Model: "downstream", Labels: []string{"general"}}},
|
|
},
|
|
}
|
|
Expect(r.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(r.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
|
|
|
f := ModelConfig{
|
|
Name: "f",
|
|
Backend: "piper",
|
|
}
|
|
Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue())
|
|
Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
|
|
|
g := ModelConfig{
|
|
Name: "g",
|
|
Backend: "whisper",
|
|
}
|
|
Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
|
Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse())
|
|
|
|
h := ModelConfig{
|
|
Name: "h",
|
|
Backend: "transformers-musicgen",
|
|
}
|
|
Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse())
|
|
Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue())
|
|
Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue())
|
|
|
|
knownUsecases := FLAG_CHAT | FLAG_COMPLETION
|
|
i := ModelConfig{
|
|
Name: "i",
|
|
Backend: "whisper",
|
|
// Earlier test checks parsing, this just needs to set final values
|
|
KnownUsecases: &knownUsecases,
|
|
}
|
|
Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue())
|
|
Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
|
Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse())
|
|
Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
|
Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
|
|
|
// Declared `known_usecases: [score]` is authoritative — the
|
|
// guessing heuristic must NOT add chat on top, even though the
|
|
// inherited chatml template would otherwise satisfy the chat
|
|
// heuristic. Score means "this model is reserved for the
|
|
// router classifier"; surfacing it as a chat model defeats the
|
|
// reservation and reintroduces the slot contention the load-time
|
|
// score/chat conflict check exists to prevent.
|
|
scoreReserved := FLAG_SCORE
|
|
j := ModelConfig{
|
|
Name: "arch-router",
|
|
Backend: "llama-cpp",
|
|
KnownUsecases: &scoreReserved,
|
|
TemplateConfig: TemplateConfig{
|
|
Chat: "inherited from chatml",
|
|
ChatMessage: "inherited from chatml",
|
|
Completion: "inherited from chatml",
|
|
},
|
|
}
|
|
Expect(j.HasUsecases(FLAG_SCORE)).To(BeTrue())
|
|
Expect(j.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
|
Expect(j.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
|
|
Expect(j.HasUsecases(FLAG_EMBEDDINGS)).To(BeFalse())
|
|
})
|
|
It("Test Validate with invalid MCP config", func() {
|
|
tmp, err := os.CreateTemp("", "config.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer os.Remove(tmp.Name())
|
|
_, err = tmp.WriteString(
|
|
`name: test-mcp
|
|
backend: "llama-cpp"
|
|
mcp:
|
|
stdio: |
|
|
{
|
|
"mcpServers": {
|
|
"ddg": {
|
|
"command": "/docker/docker",
|
|
"args": ["run", "-i"]
|
|
}
|
|
"weather": {
|
|
"command": "/docker/docker",
|
|
"args": ["run", "-i"]
|
|
}
|
|
}
|
|
}`)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
configs, err := readModelConfigsFromFile(tmp.Name())
|
|
config := configs[0]
|
|
Expect(err).To(BeNil())
|
|
Expect(config).ToNot(BeNil())
|
|
valid, err := config.Validate()
|
|
Expect(err).To(HaveOccurred())
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err.Error()).To(ContainSubstring("invalid MCP configuration"))
|
|
})
|
|
It("Test Validate with valid MCP config", func() {
|
|
tmp, err := os.CreateTemp("", "config.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer os.Remove(tmp.Name())
|
|
_, err = tmp.WriteString(
|
|
`name: test-mcp-valid
|
|
backend: "llama-cpp"
|
|
mcp:
|
|
stdio: |
|
|
{
|
|
"mcpServers": {
|
|
"ddg": {
|
|
"command": "/docker/docker",
|
|
"args": ["run", "-i"]
|
|
},
|
|
"weather": {
|
|
"command": "/docker/docker",
|
|
"args": ["run", "-i"]
|
|
}
|
|
}
|
|
}`)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
configs, err := readModelConfigsFromFile(tmp.Name())
|
|
config := configs[0]
|
|
Expect(err).To(BeNil())
|
|
Expect(config).ToNot(BeNil())
|
|
valid, err := config.Validate()
|
|
Expect(err).To(BeNil())
|
|
Expect(valid).To(BeTrue())
|
|
})
|
|
It("Test Validate rejects unmarshalable engine_args", func() {
|
|
// chan values cannot be JSON-marshalled. A valid YAML config could
|
|
// not produce one, but a Go caller stuffing a bad value would, and
|
|
// silently dropping it would change runtime behaviour.
|
|
cfg := &ModelConfig{
|
|
Backend: "vllm",
|
|
LLMConfig: LLMConfig{
|
|
EngineArgs: map[string]any{
|
|
"speculative_config": make(chan int),
|
|
},
|
|
},
|
|
}
|
|
valid, err := cfg.Validate()
|
|
Expect(valid).To(BeFalse())
|
|
Expect(err).ToNot(BeNil())
|
|
Expect(err.Error()).To(ContainSubstring("engine_args is not JSON-serialisable"))
|
|
})
|
|
It("Test Validate accepts well-formed engine_args", func() {
|
|
cfg := &ModelConfig{
|
|
Backend: "vllm",
|
|
LLMConfig: LLMConfig{
|
|
EngineArgs: map[string]any{
|
|
"data_parallel_size": 8,
|
|
"speculative_config": map[string]any{
|
|
"method": "ngram",
|
|
"num_speculative_tokens": 4,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
valid, err := cfg.Validate()
|
|
Expect(err).To(BeNil())
|
|
Expect(valid).To(BeTrue())
|
|
})
|
|
Context("ConcurrencyGroups", func() {
|
|
It("returns nil when no groups are configured", func() {
|
|
cfg := &ModelConfig{Name: "no-groups"}
|
|
Expect(cfg.GetConcurrencyGroups()).To(BeNil())
|
|
})
|
|
It("returns nil when all entries are blank", func() {
|
|
cfg := &ModelConfig{
|
|
Name: "blanks",
|
|
ConcurrencyGroups: []string{"", " ", "\t"},
|
|
}
|
|
Expect(cfg.GetConcurrencyGroups()).To(BeNil())
|
|
})
|
|
It("trims whitespace, drops empty entries, and dedupes", func() {
|
|
cfg := &ModelConfig{
|
|
Name: "messy",
|
|
ConcurrencyGroups: []string{" vram-heavy ", "", "vram-heavy", "vision", " vision "},
|
|
}
|
|
Expect(cfg.GetConcurrencyGroups()).To(Equal([]string{"vram-heavy", "vision"}))
|
|
})
|
|
It("returns a defensive copy", func() {
|
|
cfg := &ModelConfig{
|
|
Name: "copy",
|
|
ConcurrencyGroups: []string{"heavy"},
|
|
}
|
|
got := cfg.GetConcurrencyGroups()
|
|
got[0] = "tampered"
|
|
Expect(cfg.GetConcurrencyGroups()).To(Equal([]string{"heavy"}))
|
|
})
|
|
It("parses concurrency_groups from YAML", func() {
|
|
tmp, err := os.CreateTemp("", "concgroups.yaml")
|
|
Expect(err).To(BeNil())
|
|
defer func() { _ = os.Remove(tmp.Name()) }()
|
|
_, err = tmp.WriteString(
|
|
`name: heavy-a
|
|
backend: llama-cpp
|
|
parameters:
|
|
model: heavy-a.gguf
|
|
concurrency_groups:
|
|
- vram-heavy
|
|
- "120b"
|
|
`)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
configs, err := readModelConfigsFromFile(tmp.Name())
|
|
Expect(err).To(BeNil())
|
|
Expect(configs).To(HaveLen(1))
|
|
Expect(configs[0].ConcurrencyGroups).To(Equal([]string{"vram-heavy", "120b"}))
|
|
Expect(configs[0].GetConcurrencyGroups()).To(Equal([]string{"vram-heavy", "120b"}))
|
|
})
|
|
})
|
|
|
|
// When templating is delegated to the backend (use_tokenizer_template),
|
|
// the backend also owns tool-call grammar generation and parsing. A
|
|
// LocalAI-generated grammar sent alongside would override the backend's
|
|
// native (name-first) tool pipeline and make it stream the tool-call JSON
|
|
// back as plain content (issue #10052). SetDefaults must therefore couple
|
|
// the two: tokenizer template implies grammar generation is disabled.
|
|
Context("use_tokenizer_template couples with grammar disable (issue #10052)", func() {
|
|
It("disables Go grammar generation when the tokenizer template is used", func() {
|
|
cfg := &ModelConfig{
|
|
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
|
}
|
|
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse())
|
|
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeTrue(),
|
|
"use_tokenizer_template must imply grammar.disable so tools go to the backend's native pipeline")
|
|
})
|
|
|
|
It("leaves grammar generation enabled when the tokenizer template is not used", func() {
|
|
cfg := &ModelConfig{}
|
|
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse(),
|
|
"models that template in Go still rely on the Go-generated grammar")
|
|
})
|
|
})
|
|
})
|