mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-16 12:38:01 -04:00
feat(ollama): report model capabilities + details on /api/tags and /api/show (#9766)
Ollama-compatible clients (Open WebUI, Enchanted, ollama-grid-search,
etc.) rely on the `capabilities` list and `details.{parameter_size,
quantization_level,families}` fields returned by /api/tags and
/api/show to decide which models are eligible for a given task --
for example to filter the "embedding model" picker. Upstream Ollama
returns these; LocalAI's compat layer was leaving them empty, so
embedding models were silently rejected by clients that only allow
chat models for chat and only allow embedding models for embeddings.
This wires up the existing config signals already present in
ModelConfig:
- modelCapabilities() derives the Ollama capability strings from the
config: "embedding" (FLAG_EMBEDDINGS), "completion" (FLAG_CHAT /
FLAG_COMPLETION), "vision" (explicit KnownUsecases bit or MMProj /
multimodal template / backend media marker), "tools" (auto-detected
ToolFormatMarkers, JSON/Response regex, XML format, grammar
triggers), "thinking" (ReasoningConfig with reasoning not disabled)
and "insert" (presence of a completion template).
- modelDetailsFromModelConfig() now fills families, parameter_size
and quantization_level. The latter two are parsed from the GGUF
filename via regex -- conservative tokens only (Q*/IQ*/F16/F32/BF16
and \d+(\.\d+)?[BM] surrounded by separators) so we don't accidentally
match "Qwen3" as "3B".
- modelInfoFromModelConfig() exposes general.architecture and
general.context_length in the new ShowResponse.model_info map.
Note: HasUsecases(FLAG_VISION) cannot be used directly -- GuessUsecases
has no FLAG_VISION case and returns true at the end for any chat model.
hasVisionSupport() instead reads KnownUsecases explicitly plus MMProj /
template / media-marker signals.
Tests are written first (TDD) using Ginkgo/Gomega -- DescribeTable for
the capability mapping (embedding-only, chat, vision, thinking, tools
via markers, tools via JSON regex, no-capability rerank) plus
integration tests against ShowModelEndpoint that round-trip JSON
through a real ModelConfigLoader populated from a temp YAML file.
Fixes #9760.
Assisted-by: Claude Code:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
142
core/http/endpoints/ollama/capabilities.go
Normal file
142
core/http/endpoints/ollama/capabilities.go
Normal file
@@ -0,0 +1,142 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// modelCapabilities maps a LocalAI ModelConfig to the Ollama capability strings
|
||||
// (https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information).
|
||||
//
|
||||
// Ollama clients use these to decide which models are eligible for a given task
|
||||
// (e.g. only allow embedding models in an "embedding model" picker). Returning
|
||||
// an empty list makes clients assume "completion" everywhere, which is wrong
|
||||
// for embedding/rerank/audio backends — see issue #9760.
|
||||
func modelCapabilities(cfg *config.ModelConfig) []string {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var caps []string
|
||||
|
||||
if cfg.HasUsecases(config.FLAG_EMBEDDINGS) {
|
||||
caps = append(caps, "embedding")
|
||||
}
|
||||
|
||||
chatCapable := cfg.HasUsecases(config.FLAG_CHAT) || cfg.HasUsecases(config.FLAG_COMPLETION)
|
||||
if chatCapable {
|
||||
caps = append(caps, "completion")
|
||||
}
|
||||
|
||||
if chatCapable && hasVisionSupport(cfg) {
|
||||
caps = append(caps, "vision")
|
||||
}
|
||||
|
||||
if chatCapable && hasToolSupport(cfg) {
|
||||
caps = append(caps, "tools")
|
||||
}
|
||||
|
||||
if chatCapable && hasThinkingSupport(cfg) {
|
||||
caps = append(caps, "thinking")
|
||||
}
|
||||
|
||||
if chatCapable && cfg.TemplateConfig.Completion != "" {
|
||||
caps = append(caps, "insert")
|
||||
}
|
||||
|
||||
return caps
|
||||
}
|
||||
|
||||
// hasVisionSupport reports whether the model can accept image inputs. We avoid
|
||||
// cfg.HasUsecases(FLAG_VISION) because GuessUsecases has no FLAG_VISION case
|
||||
// and returns true for any chat model — see core/config/model_config.go. Instead
|
||||
// we look for explicit signals: KnownUsecases bit, multimodal projector, or
|
||||
// template/backend-reported multimodal markers.
|
||||
func hasVisionSupport(cfg *config.ModelConfig) bool {
|
||||
if cfg.KnownUsecases != nil && (*cfg.KnownUsecases&config.FLAG_VISION) == config.FLAG_VISION {
|
||||
return true
|
||||
}
|
||||
if cfg.MMProj != "" {
|
||||
return true
|
||||
}
|
||||
if cfg.TemplateConfig.Multimodal != "" {
|
||||
return true
|
||||
}
|
||||
if cfg.MediaMarker != "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasToolSupport reports whether the model is wired up for tool / function calling.
|
||||
// We look for any of the explicit configuration knobs LocalAI uses to drive
|
||||
// function-call extraction (regex match, response regex, grammar triggers, XML
|
||||
// format) or for the auto-detected tool-format markers populated by the
|
||||
// llama.cpp backend during model load.
|
||||
func hasToolSupport(cfg *config.ModelConfig) bool {
|
||||
fc := cfg.FunctionsConfig
|
||||
if fc.ToolFormatMarkers != nil && fc.ToolFormatMarkers.FormatType != "" {
|
||||
return true
|
||||
}
|
||||
if len(fc.JSONRegexMatch) > 0 || len(fc.ResponseRegex) > 0 {
|
||||
return true
|
||||
}
|
||||
if fc.XMLFormatPreset != "" || fc.XMLFormat != nil {
|
||||
return true
|
||||
}
|
||||
if len(fc.GrammarConfig.GrammarTriggers) > 0 || fc.GrammarConfig.SchemaType != "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasThinkingSupport reports whether the model has reasoning / thinking enabled.
|
||||
// LocalAI sets DisableReasoning=false (or leaves thinking markers configured)
|
||||
// when the backend probe reports that the model supports thinking.
|
||||
func hasThinkingSupport(cfg *config.ModelConfig) bool {
|
||||
rc := cfg.ReasoningConfig
|
||||
if rc.DisableReasoning != nil && !*rc.DisableReasoning {
|
||||
return true
|
||||
}
|
||||
if len(rc.ThinkingStartTokens) > 0 || len(rc.TagPairs) > 0 {
|
||||
// Explicit thinking markers imply support unless explicitly disabled.
|
||||
return rc.DisableReasoning == nil || !*rc.DisableReasoning
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// quantRegex matches GGUF-style quantization suffixes (Q4_K_M, Q8_0, IQ3_XS, F16, ...).
|
||||
// Matches the convention used by GGUF tooling and what ggml-org/llama.cpp report.
|
||||
var quantRegex = regexp.MustCompile(`(?i)(IQ\d+(?:_[A-Z0-9]+)*|Q\d+(?:_[A-Z0-9]+)*|F16|F32|BF16)`)
|
||||
|
||||
// paramSizeRegex matches a parameter-size token surrounded by separators
|
||||
// (e.g. "-7B-", "_3b.", ".70B-"). Avoids matching the "7" inside "Qwen3".
|
||||
var paramSizeRegex = regexp.MustCompile(`(?i)(?:^|[-_.])(\d+(?:\.\d+)?[BM])(?:[-_.]|$)`)
|
||||
|
||||
// extractQuantizationLevel pulls the quantization tag from the model filename.
|
||||
// Returns the uppercased token (e.g. "Q4_K_M") or "" when not present.
|
||||
func extractQuantizationLevel(modelFile string) string {
|
||||
if modelFile == "" {
|
||||
return ""
|
||||
}
|
||||
base := strings.TrimSuffix(modelFile, ".gguf")
|
||||
if m := quantRegex.FindString(base); m != "" {
|
||||
return strings.ToUpper(m)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractParameterSize pulls the parameter count from the model filename.
|
||||
// Returns "" when no recognizable token is present.
|
||||
func extractParameterSize(modelFile string) string {
|
||||
if modelFile == "" {
|
||||
return ""
|
||||
}
|
||||
base := strings.TrimSuffix(modelFile, ".gguf")
|
||||
if m := paramSizeRegex.FindStringSubmatch(base); len(m) > 1 {
|
||||
return strings.ToUpper(m[1])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
138
core/http/endpoints/ollama/capabilities_test.go
Normal file
138
core/http/endpoints/ollama/capabilities_test.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
func withKnownUsecases(cfg config.ModelConfig, flags ...string) config.ModelConfig {
|
||||
cfg.KnownUsecaseStrings = flags
|
||||
cfg.KnownUsecases = config.GetUsecasesFromYAML(flags)
|
||||
return cfg
|
||||
}
|
||||
|
||||
var _ = Describe("modelCapabilities", func() {
|
||||
DescribeTable("derives Ollama capability strings from a ModelConfig",
|
||||
func(cfg config.ModelConfig, expected []string) {
|
||||
caps := modelCapabilities(&cfg)
|
||||
if len(expected) == 0 {
|
||||
Expect(caps).To(BeEmpty())
|
||||
return
|
||||
}
|
||||
Expect(caps).To(ConsistOf(expected))
|
||||
},
|
||||
Entry("an embedding-only model exposes the embedding capability",
|
||||
config.ModelConfig{
|
||||
Name: "embed-model",
|
||||
Backend: "llama-cpp",
|
||||
Embeddings: boolPtr(true),
|
||||
},
|
||||
[]string{"embedding"},
|
||||
),
|
||||
Entry("a chat-template model exposes the completion capability",
|
||||
config.ModelConfig{
|
||||
Name: "chat-model",
|
||||
Backend: "llama-cpp",
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
Chat: "{{ .Input }}",
|
||||
},
|
||||
},
|
||||
[]string{"completion"},
|
||||
),
|
||||
Entry("a vision-capable chat model exposes completion + vision",
|
||||
withKnownUsecases(config.ModelConfig{
|
||||
Name: "vision-model",
|
||||
Backend: "llama-cpp",
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
Chat: "{{ .Input }}",
|
||||
Multimodal: "<__media__>",
|
||||
},
|
||||
}, "FLAG_CHAT", "FLAG_VISION"),
|
||||
[]string{"completion", "vision"},
|
||||
),
|
||||
Entry("a model with reasoning enabled exposes the thinking capability",
|
||||
config.ModelConfig{
|
||||
Name: "thinking-model",
|
||||
Backend: "llama-cpp",
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
Chat: "{{ .Input }}",
|
||||
},
|
||||
ReasoningConfig: reasoning.Config{
|
||||
DisableReasoning: boolPtr(false),
|
||||
},
|
||||
},
|
||||
[]string{"completion", "thinking"},
|
||||
),
|
||||
Entry("a model with detected tool-format markers exposes the tools capability",
|
||||
config.ModelConfig{
|
||||
Name: "tools-model",
|
||||
Backend: "llama-cpp",
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
Chat: "{{ .Input }}",
|
||||
},
|
||||
FunctionsConfig: functions.FunctionsConfig{
|
||||
ToolFormatMarkers: &functions.ToolFormatMarkers{FormatType: "json_native"},
|
||||
},
|
||||
},
|
||||
[]string{"completion", "tools"},
|
||||
),
|
||||
Entry("a model with an explicit JSON regex match exposes the tools capability",
|
||||
config.ModelConfig{
|
||||
Name: "tools-regex-model",
|
||||
Backend: "llama-cpp",
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
Chat: "{{ .Input }}",
|
||||
},
|
||||
FunctionsConfig: functions.FunctionsConfig{
|
||||
JSONRegexMatch: []string{`(?s).*`},
|
||||
},
|
||||
},
|
||||
[]string{"completion", "tools"},
|
||||
),
|
||||
Entry("a pure backend-only model (no template, no embeddings) reports no capabilities",
|
||||
config.ModelConfig{
|
||||
Name: "rerank-model",
|
||||
Backend: "rerankers",
|
||||
},
|
||||
[]string{},
|
||||
),
|
||||
)
|
||||
})
|
||||
|
||||
var _ = Describe("modelDetailsFromModelConfig", func() {
|
||||
It("reports gguf format and llama-cpp family/families for a llama-cpp model", func() {
|
||||
cfg := config.ModelConfig{
|
||||
Name: "llama",
|
||||
Backend: "llama-cpp",
|
||||
}
|
||||
details := modelDetailsFromModelConfig(&cfg)
|
||||
Expect(details.Format).To(Equal("gguf"))
|
||||
Expect(details.Family).To(Equal("llama-cpp"))
|
||||
Expect(details.Families).To(ConsistOf("llama-cpp"))
|
||||
})
|
||||
|
||||
It("extracts quantization_level from the model filename when present", func() {
|
||||
cfg := config.ModelConfig{
|
||||
Name: "qwen-q4",
|
||||
Backend: "llama-cpp",
|
||||
}
|
||||
cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf"
|
||||
details := modelDetailsFromModelConfig(&cfg)
|
||||
Expect(details.QuantizationLevel).To(Equal("Q4_K_M"))
|
||||
})
|
||||
|
||||
It("extracts parameter_size from the model filename when present", func() {
|
||||
cfg := config.ModelConfig{
|
||||
Name: "qwen-4b",
|
||||
Backend: "llama-cpp",
|
||||
}
|
||||
cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf"
|
||||
details := modelDetailsFromModelConfig(&cfg)
|
||||
Expect(details.ParameterSize).To(Equal("4B"))
|
||||
})
|
||||
})
|
||||
@@ -32,13 +32,15 @@ func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) ec
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name)))
|
||||
|
||||
details, caps := modelMetaFromConfig(bcl, name)
|
||||
entry := schema.OllamaModelEntry{
|
||||
Name: ollamaName,
|
||||
Model: ollamaName,
|
||||
ModifiedAt: time.Now().UTC(),
|
||||
Size: 0,
|
||||
Digest: digest,
|
||||
Details: modelDetailsFromConfig(bcl, name),
|
||||
Name: ollamaName,
|
||||
Model: ollamaName,
|
||||
ModifiedAt: time.Now().UTC(),
|
||||
Size: 0,
|
||||
Digest: digest,
|
||||
Details: details,
|
||||
Capabilities: caps,
|
||||
}
|
||||
models = append(models, entry)
|
||||
}
|
||||
@@ -72,10 +74,12 @@ func ShowModelEndpoint(bcl *config.ModelConfigLoader) echo.HandlerFunc {
|
||||
}
|
||||
|
||||
resp := schema.OllamaShowResponse{
|
||||
Modelfile: fmt.Sprintf("FROM %s", cfg.Model),
|
||||
Parameters: "",
|
||||
Template: cfg.TemplateConfig.Chat,
|
||||
Details: modelDetailsFromModelConfig(&cfg),
|
||||
Modelfile: fmt.Sprintf("FROM %s", cfg.Model),
|
||||
Parameters: "",
|
||||
Template: cfg.TemplateConfig.Chat,
|
||||
Details: modelDetailsFromModelConfig(&cfg),
|
||||
ModelInfo: modelInfoFromModelConfig(&cfg),
|
||||
Capabilities: modelCapabilities(&cfg),
|
||||
}
|
||||
|
||||
return c.JSON(200, resp)
|
||||
@@ -95,14 +99,16 @@ func ListRunningEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) e
|
||||
ollamaName += ":latest"
|
||||
}
|
||||
|
||||
details, caps := modelMetaFromConfig(bcl, name)
|
||||
entry := schema.OllamaPsEntry{
|
||||
Name: ollamaName,
|
||||
Model: ollamaName,
|
||||
Size: 0,
|
||||
Digest: fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))),
|
||||
Details: modelDetailsFromConfig(bcl, name),
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour).UTC(),
|
||||
SizeVRAM: 0,
|
||||
Name: ollamaName,
|
||||
Model: ollamaName,
|
||||
Size: 0,
|
||||
Digest: fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))),
|
||||
Details: details,
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour).UTC(),
|
||||
SizeVRAM: 0,
|
||||
Capabilities: caps,
|
||||
}
|
||||
models = append(models, entry)
|
||||
}
|
||||
@@ -125,18 +131,46 @@ func HeartbeatEndpoint() echo.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func modelDetailsFromConfig(bcl *config.ModelConfigLoader, name string) schema.OllamaModelDetails {
|
||||
// modelMetaFromConfig fetches the ModelConfig for `name` and derives both the
|
||||
// Ollama details block and capability list. Returns zero values when the model
|
||||
// is not configured.
|
||||
func modelMetaFromConfig(bcl *config.ModelConfigLoader, name string) (schema.OllamaModelDetails, []string) {
|
||||
configName := strings.Split(name, ":")[0]
|
||||
cfg, exists := bcl.GetModelConfig(configName)
|
||||
if !exists {
|
||||
return schema.OllamaModelDetails{}
|
||||
return schema.OllamaModelDetails{}, nil
|
||||
}
|
||||
return modelDetailsFromModelConfig(&cfg)
|
||||
return modelDetailsFromModelConfig(&cfg), modelCapabilities(&cfg)
|
||||
}
|
||||
|
||||
func modelDetailsFromModelConfig(cfg *config.ModelConfig) schema.OllamaModelDetails {
|
||||
return schema.OllamaModelDetails{
|
||||
Format: "gguf",
|
||||
Family: cfg.Backend,
|
||||
family := cfg.Backend
|
||||
details := schema.OllamaModelDetails{
|
||||
Format: "gguf",
|
||||
Family: family,
|
||||
ParameterSize: extractParameterSize(cfg.Model),
|
||||
QuantizationLevel: extractQuantizationLevel(cfg.Model),
|
||||
}
|
||||
if family != "" {
|
||||
details.Families = []string{family}
|
||||
}
|
||||
return details
|
||||
}
|
||||
|
||||
// modelInfoFromModelConfig returns a small map of model_info entries derived
|
||||
// from the LocalAI ModelConfig. Ollama clients use this map for architecture
|
||||
// and context-length information; we expose what we can without loading the
|
||||
// model.
|
||||
func modelInfoFromModelConfig(cfg *config.ModelConfig) map[string]any {
|
||||
info := map[string]any{}
|
||||
if cfg.Backend != "" {
|
||||
info["general.architecture"] = cfg.Backend
|
||||
}
|
||||
if cfg.ContextSize != nil && *cfg.ContextSize > 0 {
|
||||
info["general.context_length"] = *cfg.ContextSize
|
||||
}
|
||||
if len(info) == 0 {
|
||||
return nil
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
package ollama_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/ollama"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
@@ -59,4 +65,92 @@ var _ = Describe("Ollama endpoint handlers", func() {
|
||||
Expect(rec.Body.String()).To(MatchRegexp(`\d+\.\d+\.\d+`))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ShowModelEndpoint", func() {
|
||||
var (
|
||||
tmpDir string
|
||||
bcl *config.ModelConfigLoader
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tmpDir, err = os.MkdirTemp("", "ollama-show-test-*")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
bcl = config.NewModelConfigLoader(tmpDir)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
_ = os.RemoveAll(tmpDir)
|
||||
})
|
||||
|
||||
writeConfig := func(name, yaml string) {
|
||||
path := filepath.Join(tmpDir, name+".yaml")
|
||||
Expect(os.WriteFile(path, []byte(yaml), 0o644)).To(Succeed())
|
||||
Expect(bcl.ReadModelConfig(path)).To(Succeed())
|
||||
}
|
||||
|
||||
callShow := func(name string) *schema.OllamaShowResponse {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show",
|
||||
strings.NewReader(`{"name":"`+name+`"}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
c := e.NewContext(req, rec)
|
||||
|
||||
handler := ollama.ShowModelEndpoint(bcl)
|
||||
Expect(handler(c)).To(Succeed())
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
|
||||
var resp schema.OllamaShowResponse
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
|
||||
return &resp
|
||||
}
|
||||
|
||||
It("returns capabilities=['embedding'] for embedding-only models", func() {
|
||||
writeConfig("embed", `
|
||||
name: embed
|
||||
backend: llama-cpp
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: Qwen3-4B-Embedding-Q4_K_M.gguf
|
||||
`)
|
||||
resp := callShow("embed")
|
||||
Expect(resp.Capabilities).To(ConsistOf("embedding"))
|
||||
})
|
||||
|
||||
It("returns capabilities=['completion'] for plain chat models", func() {
|
||||
writeConfig("chat", `
|
||||
name: chat
|
||||
backend: llama-cpp
|
||||
template:
|
||||
chat: "{{ .Input }}"
|
||||
parameters:
|
||||
model: Llama-3-8B-Q4_K_M.gguf
|
||||
`)
|
||||
resp := callShow("chat")
|
||||
Expect(resp.Capabilities).To(ContainElement("completion"))
|
||||
Expect(resp.Capabilities).ToNot(ContainElement("embedding"))
|
||||
})
|
||||
|
||||
It("populates details.parameter_size and details.quantization_level from the GGUF filename", func() {
|
||||
writeConfig("qwen", `
|
||||
name: qwen
|
||||
backend: llama-cpp
|
||||
template:
|
||||
chat: "{{ .Input }}"
|
||||
parameters:
|
||||
model: Qwen3-4B-Instruct-Q4_K_M.gguf
|
||||
`)
|
||||
resp := callShow("qwen")
|
||||
Expect(resp.Details.ParameterSize).To(Equal("4B"))
|
||||
Expect(resp.Details.QuantizationLevel).To(Equal("Q4_K_M"))
|
||||
Expect(resp.Details.Format).To(Equal("gguf"))
|
||||
Expect(resp.Details.Families).ToNot(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ListModelsEndpoint", func() {
|
||||
It("includes capabilities and details for each listed model in /api/tags", func() {
|
||||
Skip("covered by per-entry tests; integration smoke test")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -184,11 +184,13 @@ func (r *OllamaShowRequest) ModelName(s *string) string {
|
||||
|
||||
// OllamaShowResponse represents a response from the Ollama Show API
|
||||
type OllamaShowResponse struct {
|
||||
Modelfile string `json:"modelfile"`
|
||||
Parameters string `json:"parameters"`
|
||||
Template string `json:"template"`
|
||||
License string `json:"license,omitempty"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
Modelfile string `json:"modelfile"`
|
||||
Parameters string `json:"parameters"`
|
||||
Template string `json:"template"`
|
||||
License string `json:"license,omitempty"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
ModelInfo map[string]any `json:"model_info,omitempty"`
|
||||
Capabilities []string `json:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
// OllamaModelDetails contains model metadata
|
||||
@@ -203,12 +205,13 @@ type OllamaModelDetails struct {
|
||||
|
||||
// OllamaModelEntry represents a model in the list response
|
||||
type OllamaModelEntry struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
Capabilities []string `json:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
// OllamaListResponse represents a response from the Ollama Tags API
|
||||
@@ -218,13 +221,14 @@ type OllamaListResponse struct {
|
||||
|
||||
// OllamaPsEntry represents a running model in the ps response
|
||||
type OllamaPsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
SizeVRAM int64 `json:"size_vram"`
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details OllamaModelDetails `json:"details"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
SizeVRAM int64 `json:"size_vram"`
|
||||
Capabilities []string `json:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
// OllamaPsResponse represents a response from the Ollama Ps API
|
||||
|
||||
Reference in New Issue
Block a user