diff --git a/core/http/endpoints/ollama/capabilities.go b/core/http/endpoints/ollama/capabilities.go new file mode 100644 index 000000000..96c24651d --- /dev/null +++ b/core/http/endpoints/ollama/capabilities.go @@ -0,0 +1,142 @@ +package ollama + +import ( + "regexp" + "strings" + + "github.com/mudler/LocalAI/core/config" +) + +// modelCapabilities maps a LocalAI ModelConfig to the Ollama capability strings +// (https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information). +// +// Ollama clients use these to decide which models are eligible for a given task +// (e.g. only allow embedding models in an "embedding model" picker). Returning +// an empty list makes clients assume "completion" everywhere, which is wrong +// for embedding/rerank/audio backends — see issue #9760. +func modelCapabilities(cfg *config.ModelConfig) []string { + if cfg == nil { + return nil + } + + var caps []string + + if cfg.HasUsecases(config.FLAG_EMBEDDINGS) { + caps = append(caps, "embedding") + } + + chatCapable := cfg.HasUsecases(config.FLAG_CHAT) || cfg.HasUsecases(config.FLAG_COMPLETION) + if chatCapable { + caps = append(caps, "completion") + } + + if chatCapable && hasVisionSupport(cfg) { + caps = append(caps, "vision") + } + + if chatCapable && hasToolSupport(cfg) { + caps = append(caps, "tools") + } + + if chatCapable && hasThinkingSupport(cfg) { + caps = append(caps, "thinking") + } + + if chatCapable && cfg.TemplateConfig.Completion != "" { + caps = append(caps, "insert") + } + + return caps +} + +// hasVisionSupport reports whether the model can accept image inputs. We avoid +// cfg.HasUsecases(FLAG_VISION) because GuessUsecases has no FLAG_VISION case +// and returns true for any chat model — see core/config/model_config.go. Instead +// we look for explicit signals: KnownUsecases bit, multimodal projector, or +// template/backend-reported multimodal markers. +func hasVisionSupport(cfg *config.ModelConfig) bool { + if cfg.KnownUsecases != nil && (*cfg.KnownUsecases&config.FLAG_VISION) == config.FLAG_VISION { + return true + } + if cfg.MMProj != "" { + return true + } + if cfg.TemplateConfig.Multimodal != "" { + return true + } + if cfg.MediaMarker != "" { + return true + } + return false +} + +// hasToolSupport reports whether the model is wired up for tool / function calling. +// We look for any of the explicit configuration knobs LocalAI uses to drive +// function-call extraction (regex match, response regex, grammar triggers, XML +// format) or for the auto-detected tool-format markers populated by the +// llama.cpp backend during model load. +func hasToolSupport(cfg *config.ModelConfig) bool { + fc := cfg.FunctionsConfig + if fc.ToolFormatMarkers != nil && fc.ToolFormatMarkers.FormatType != "" { + return true + } + if len(fc.JSONRegexMatch) > 0 || len(fc.ResponseRegex) > 0 { + return true + } + if fc.XMLFormatPreset != "" || fc.XMLFormat != nil { + return true + } + if len(fc.GrammarConfig.GrammarTriggers) > 0 || fc.GrammarConfig.SchemaType != "" { + return true + } + return false +} + +// hasThinkingSupport reports whether the model has reasoning / thinking enabled. +// LocalAI sets DisableReasoning=false (or leaves thinking markers configured) +// when the backend probe reports that the model supports thinking. +func hasThinkingSupport(cfg *config.ModelConfig) bool { + rc := cfg.ReasoningConfig + if rc.DisableReasoning != nil && !*rc.DisableReasoning { + return true + } + if len(rc.ThinkingStartTokens) > 0 || len(rc.TagPairs) > 0 { + // Explicit thinking markers imply support unless explicitly disabled. + return rc.DisableReasoning == nil || !*rc.DisableReasoning + } + return false +} + +// quantRegex matches GGUF-style quantization suffixes (Q4_K_M, Q8_0, IQ3_XS, F16, ...). +// Matches the convention used by GGUF tooling and what ggml-org/llama.cpp report. +var quantRegex = regexp.MustCompile(`(?i)(IQ\d+(?:_[A-Z0-9]+)*|Q\d+(?:_[A-Z0-9]+)*|F16|F32|BF16)`) + +// paramSizeRegex matches a parameter-size token surrounded by separators +// (e.g. "-7B-", "_3b.", ".70B-"). Avoids matching the "7" inside "Qwen3". +var paramSizeRegex = regexp.MustCompile(`(?i)(?:^|[-_.])(\d+(?:\.\d+)?[BM])(?:[-_.]|$)`) + +// extractQuantizationLevel pulls the quantization tag from the model filename. +// Returns the uppercased token (e.g. "Q4_K_M") or "" when not present. +func extractQuantizationLevel(modelFile string) string { + if modelFile == "" { + return "" + } + base := strings.TrimSuffix(modelFile, ".gguf") + if m := quantRegex.FindString(base); m != "" { + return strings.ToUpper(m) + } + return "" +} + +// extractParameterSize pulls the parameter count from the model filename. +// Returns "" when no recognizable token is present. +func extractParameterSize(modelFile string) string { + if modelFile == "" { + return "" + } + base := strings.TrimSuffix(modelFile, ".gguf") + if m := paramSizeRegex.FindStringSubmatch(base); len(m) > 1 { + return strings.ToUpper(m[1]) + } + return "" +} diff --git a/core/http/endpoints/ollama/capabilities_test.go b/core/http/endpoints/ollama/capabilities_test.go new file mode 100644 index 000000000..b787b2034 --- /dev/null +++ b/core/http/endpoints/ollama/capabilities_test.go @@ -0,0 +1,138 @@ +package ollama + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func boolPtr(b bool) *bool { return &b } + +func withKnownUsecases(cfg config.ModelConfig, flags ...string) config.ModelConfig { + cfg.KnownUsecaseStrings = flags + cfg.KnownUsecases = config.GetUsecasesFromYAML(flags) + return cfg +} + +var _ = Describe("modelCapabilities", func() { + DescribeTable("derives Ollama capability strings from a ModelConfig", + func(cfg config.ModelConfig, expected []string) { + caps := modelCapabilities(&cfg) + if len(expected) == 0 { + Expect(caps).To(BeEmpty()) + return + } + Expect(caps).To(ConsistOf(expected)) + }, + Entry("an embedding-only model exposes the embedding capability", + config.ModelConfig{ + Name: "embed-model", + Backend: "llama-cpp", + Embeddings: boolPtr(true), + }, + []string{"embedding"}, + ), + Entry("a chat-template model exposes the completion capability", + config.ModelConfig{ + Name: "chat-model", + Backend: "llama-cpp", + TemplateConfig: config.TemplateConfig{ + Chat: "{{ .Input }}", + }, + }, + []string{"completion"}, + ), + Entry("a vision-capable chat model exposes completion + vision", + withKnownUsecases(config.ModelConfig{ + Name: "vision-model", + Backend: "llama-cpp", + TemplateConfig: config.TemplateConfig{ + Chat: "{{ .Input }}", + Multimodal: "<__media__>", + }, + }, "FLAG_CHAT", "FLAG_VISION"), + []string{"completion", "vision"}, + ), + Entry("a model with reasoning enabled exposes the thinking capability", + config.ModelConfig{ + Name: "thinking-model", + Backend: "llama-cpp", + TemplateConfig: config.TemplateConfig{ + Chat: "{{ .Input }}", + }, + ReasoningConfig: reasoning.Config{ + DisableReasoning: boolPtr(false), + }, + }, + []string{"completion", "thinking"}, + ), + Entry("a model with detected tool-format markers exposes the tools capability", + config.ModelConfig{ + Name: "tools-model", + Backend: "llama-cpp", + TemplateConfig: config.TemplateConfig{ + Chat: "{{ .Input }}", + }, + FunctionsConfig: functions.FunctionsConfig{ + ToolFormatMarkers: &functions.ToolFormatMarkers{FormatType: "json_native"}, + }, + }, + []string{"completion", "tools"}, + ), + Entry("a model with an explicit JSON regex match exposes the tools capability", + config.ModelConfig{ + Name: "tools-regex-model", + Backend: "llama-cpp", + TemplateConfig: config.TemplateConfig{ + Chat: "{{ .Input }}", + }, + FunctionsConfig: functions.FunctionsConfig{ + JSONRegexMatch: []string{`(?s).*`}, + }, + }, + []string{"completion", "tools"}, + ), + Entry("a pure backend-only model (no template, no embeddings) reports no capabilities", + config.ModelConfig{ + Name: "rerank-model", + Backend: "rerankers", + }, + []string{}, + ), + ) +}) + +var _ = Describe("modelDetailsFromModelConfig", func() { + It("reports gguf format and llama-cpp family/families for a llama-cpp model", func() { + cfg := config.ModelConfig{ + Name: "llama", + Backend: "llama-cpp", + } + details := modelDetailsFromModelConfig(&cfg) + Expect(details.Format).To(Equal("gguf")) + Expect(details.Family).To(Equal("llama-cpp")) + Expect(details.Families).To(ConsistOf("llama-cpp")) + }) + + It("extracts quantization_level from the model filename when present", func() { + cfg := config.ModelConfig{ + Name: "qwen-q4", + Backend: "llama-cpp", + } + cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf" + details := modelDetailsFromModelConfig(&cfg) + Expect(details.QuantizationLevel).To(Equal("Q4_K_M")) + }) + + It("extracts parameter_size from the model filename when present", func() { + cfg := config.ModelConfig{ + Name: "qwen-4b", + Backend: "llama-cpp", + } + cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf" + details := modelDetailsFromModelConfig(&cfg) + Expect(details.ParameterSize).To(Equal("4B")) + }) +}) diff --git a/core/http/endpoints/ollama/models.go b/core/http/endpoints/ollama/models.go index eb68494d5..60e58b9ea 100644 --- a/core/http/endpoints/ollama/models.go +++ b/core/http/endpoints/ollama/models.go @@ -32,13 +32,15 @@ func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) ec digest := fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))) + details, caps := modelMetaFromConfig(bcl, name) entry := schema.OllamaModelEntry{ - Name: ollamaName, - Model: ollamaName, - ModifiedAt: time.Now().UTC(), - Size: 0, - Digest: digest, - Details: modelDetailsFromConfig(bcl, name), + Name: ollamaName, + Model: ollamaName, + ModifiedAt: time.Now().UTC(), + Size: 0, + Digest: digest, + Details: details, + Capabilities: caps, } models = append(models, entry) } @@ -72,10 +74,12 @@ func ShowModelEndpoint(bcl *config.ModelConfigLoader) echo.HandlerFunc { } resp := schema.OllamaShowResponse{ - Modelfile: fmt.Sprintf("FROM %s", cfg.Model), - Parameters: "", - Template: cfg.TemplateConfig.Chat, - Details: modelDetailsFromModelConfig(&cfg), + Modelfile: fmt.Sprintf("FROM %s", cfg.Model), + Parameters: "", + Template: cfg.TemplateConfig.Chat, + Details: modelDetailsFromModelConfig(&cfg), + ModelInfo: modelInfoFromModelConfig(&cfg), + Capabilities: modelCapabilities(&cfg), } return c.JSON(200, resp) @@ -95,14 +99,16 @@ func ListRunningEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) e ollamaName += ":latest" } + details, caps := modelMetaFromConfig(bcl, name) entry := schema.OllamaPsEntry{ - Name: ollamaName, - Model: ollamaName, - Size: 0, - Digest: fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))), - Details: modelDetailsFromConfig(bcl, name), - ExpiresAt: time.Now().Add(24 * time.Hour).UTC(), - SizeVRAM: 0, + Name: ollamaName, + Model: ollamaName, + Size: 0, + Digest: fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))), + Details: details, + ExpiresAt: time.Now().Add(24 * time.Hour).UTC(), + SizeVRAM: 0, + Capabilities: caps, } models = append(models, entry) } @@ -125,18 +131,46 @@ func HeartbeatEndpoint() echo.HandlerFunc { } } -func modelDetailsFromConfig(bcl *config.ModelConfigLoader, name string) schema.OllamaModelDetails { +// modelMetaFromConfig fetches the ModelConfig for `name` and derives both the +// Ollama details block and capability list. Returns zero values when the model +// is not configured. +func modelMetaFromConfig(bcl *config.ModelConfigLoader, name string) (schema.OllamaModelDetails, []string) { configName := strings.Split(name, ":")[0] cfg, exists := bcl.GetModelConfig(configName) if !exists { - return schema.OllamaModelDetails{} + return schema.OllamaModelDetails{}, nil } - return modelDetailsFromModelConfig(&cfg) + return modelDetailsFromModelConfig(&cfg), modelCapabilities(&cfg) } func modelDetailsFromModelConfig(cfg *config.ModelConfig) schema.OllamaModelDetails { - return schema.OllamaModelDetails{ - Format: "gguf", - Family: cfg.Backend, + family := cfg.Backend + details := schema.OllamaModelDetails{ + Format: "gguf", + Family: family, + ParameterSize: extractParameterSize(cfg.Model), + QuantizationLevel: extractQuantizationLevel(cfg.Model), } + if family != "" { + details.Families = []string{family} + } + return details +} + +// modelInfoFromModelConfig returns a small map of model_info entries derived +// from the LocalAI ModelConfig. Ollama clients use this map for architecture +// and context-length information; we expose what we can without loading the +// model. +func modelInfoFromModelConfig(cfg *config.ModelConfig) map[string]any { + info := map[string]any{} + if cfg.Backend != "" { + info["general.architecture"] = cfg.Backend + } + if cfg.ContextSize != nil && *cfg.ContextSize > 0 { + info["general.context_length"] = *cfg.ContextSize + } + if len(info) == 0 { + return nil + } + return info } diff --git a/core/http/endpoints/ollama/models_test.go b/core/http/endpoints/ollama/models_test.go index cc521411a..b13cf59a0 100644 --- a/core/http/endpoints/ollama/models_test.go +++ b/core/http/endpoints/ollama/models_test.go @@ -1,12 +1,18 @@ package ollama_test import ( + "encoding/json" "net/http" "net/http/httptest" + "os" + "path/filepath" + "strings" "testing" "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http/endpoints/ollama" + "github.com/mudler/LocalAI/core/schema" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -59,4 +65,92 @@ var _ = Describe("Ollama endpoint handlers", func() { Expect(rec.Body.String()).To(MatchRegexp(`\d+\.\d+\.\d+`)) }) }) + + Describe("ShowModelEndpoint", func() { + var ( + tmpDir string + bcl *config.ModelConfigLoader + ) + + BeforeEach(func() { + var err error + tmpDir, err = os.MkdirTemp("", "ollama-show-test-*") + Expect(err).ToNot(HaveOccurred()) + bcl = config.NewModelConfigLoader(tmpDir) + }) + + AfterEach(func() { + _ = os.RemoveAll(tmpDir) + }) + + writeConfig := func(name, yaml string) { + path := filepath.Join(tmpDir, name+".yaml") + Expect(os.WriteFile(path, []byte(yaml), 0o644)).To(Succeed()) + Expect(bcl.ReadModelConfig(path)).To(Succeed()) + } + + callShow := func(name string) *schema.OllamaShowResponse { + req := httptest.NewRequest(http.MethodPost, "/api/show", + strings.NewReader(`{"name":"`+name+`"}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + + handler := ollama.ShowModelEndpoint(bcl) + Expect(handler(c)).To(Succeed()) + Expect(rec.Code).To(Equal(http.StatusOK)) + + var resp schema.OllamaShowResponse + Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed()) + return &resp + } + + It("returns capabilities=['embedding'] for embedding-only models", func() { + writeConfig("embed", ` +name: embed +backend: llama-cpp +embeddings: true +parameters: + model: Qwen3-4B-Embedding-Q4_K_M.gguf +`) + resp := callShow("embed") + Expect(resp.Capabilities).To(ConsistOf("embedding")) + }) + + It("returns capabilities=['completion'] for plain chat models", func() { + writeConfig("chat", ` +name: chat +backend: llama-cpp +template: + chat: "{{ .Input }}" +parameters: + model: Llama-3-8B-Q4_K_M.gguf +`) + resp := callShow("chat") + Expect(resp.Capabilities).To(ContainElement("completion")) + Expect(resp.Capabilities).ToNot(ContainElement("embedding")) + }) + + It("populates details.parameter_size and details.quantization_level from the GGUF filename", func() { + writeConfig("qwen", ` +name: qwen +backend: llama-cpp +template: + chat: "{{ .Input }}" +parameters: + model: Qwen3-4B-Instruct-Q4_K_M.gguf +`) + resp := callShow("qwen") + Expect(resp.Details.ParameterSize).To(Equal("4B")) + Expect(resp.Details.QuantizationLevel).To(Equal("Q4_K_M")) + Expect(resp.Details.Format).To(Equal("gguf")) + Expect(resp.Details.Families).ToNot(BeEmpty()) + }) + }) + + Describe("ListModelsEndpoint", func() { + It("includes capabilities and details for each listed model in /api/tags", func() { + Skip("covered by per-entry tests; integration smoke test") + }) + }) }) diff --git a/core/schema/ollama.go b/core/schema/ollama.go index d3b0f7062..68deaf416 100644 --- a/core/schema/ollama.go +++ b/core/schema/ollama.go @@ -184,11 +184,13 @@ func (r *OllamaShowRequest) ModelName(s *string) string { // OllamaShowResponse represents a response from the Ollama Show API type OllamaShowResponse struct { - Modelfile string `json:"modelfile"` - Parameters string `json:"parameters"` - Template string `json:"template"` - License string `json:"license,omitempty"` - Details OllamaModelDetails `json:"details"` + Modelfile string `json:"modelfile"` + Parameters string `json:"parameters"` + Template string `json:"template"` + License string `json:"license,omitempty"` + Details OllamaModelDetails `json:"details"` + ModelInfo map[string]any `json:"model_info,omitempty"` + Capabilities []string `json:"capabilities,omitempty"` } // OllamaModelDetails contains model metadata @@ -203,12 +205,13 @@ type OllamaModelDetails struct { // OllamaModelEntry represents a model in the list response type OllamaModelEntry struct { - Name string `json:"name"` - Model string `json:"model"` - ModifiedAt time.Time `json:"modified_at"` - Size int64 `json:"size"` - Digest string `json:"digest"` - Details OllamaModelDetails `json:"details"` + Name string `json:"name"` + Model string `json:"model"` + ModifiedAt time.Time `json:"modified_at"` + Size int64 `json:"size"` + Digest string `json:"digest"` + Details OllamaModelDetails `json:"details"` + Capabilities []string `json:"capabilities,omitempty"` } // OllamaListResponse represents a response from the Ollama Tags API @@ -218,13 +221,14 @@ type OllamaListResponse struct { // OllamaPsEntry represents a running model in the ps response type OllamaPsEntry struct { - Name string `json:"name"` - Model string `json:"model"` - Size int64 `json:"size"` - Digest string `json:"digest"` - Details OllamaModelDetails `json:"details"` - ExpiresAt time.Time `json:"expires_at"` - SizeVRAM int64 `json:"size_vram"` + Name string `json:"name"` + Model string `json:"model"` + Size int64 `json:"size"` + Digest string `json:"digest"` + Details OllamaModelDetails `json:"details"` + ExpiresAt time.Time `json:"expires_at"` + SizeVRAM int64 `json:"size_vram"` + Capabilities []string `json:"capabilities,omitempty"` } // OllamaPsResponse represents a response from the Ollama Ps API