diff --git a/core/http/endpoints/ollama/capabilities.go b/core/http/endpoints/ollama/capabilities.go
new file mode 100644
index 000000000..96c24651d
--- /dev/null
+++ b/core/http/endpoints/ollama/capabilities.go
@@ -0,0 +1,142 @@
+package ollama
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+)
+
+// modelCapabilities maps a LocalAI ModelConfig to the Ollama capability strings
+// (https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information).
+//
+// Ollama clients use these to decide which models are eligible for a given task
+// (e.g. only allow embedding models in an "embedding model" picker). Returning
+// an empty list makes clients assume "completion" everywhere, which is wrong
+// for embedding/rerank/audio backends — see issue #9760.
+func modelCapabilities(cfg *config.ModelConfig) []string {
+	if cfg == nil {
+		return nil
+	}
+
+	var caps []string
+
+	if cfg.HasUsecases(config.FLAG_EMBEDDINGS) {
+		caps = append(caps, "embedding")
+	}
+
+	chatCapable := cfg.HasUsecases(config.FLAG_CHAT) || cfg.HasUsecases(config.FLAG_COMPLETION)
+	if chatCapable {
+		caps = append(caps, "completion")
+	}
+
+	if chatCapable && hasVisionSupport(cfg) {
+		caps = append(caps, "vision")
+	}
+
+	if chatCapable && hasToolSupport(cfg) {
+		caps = append(caps, "tools")
+	}
+
+	if chatCapable && hasThinkingSupport(cfg) {
+		caps = append(caps, "thinking")
+	}
+
+	if chatCapable && cfg.TemplateConfig.Completion != "" {
+		caps = append(caps, "insert")
+	}
+
+	return caps
+}
+
+// hasVisionSupport reports whether the model can accept image inputs. We avoid
+// cfg.HasUsecases(FLAG_VISION) because GuessUsecases has no FLAG_VISION case
+// and returns true for any chat model — see core/config/model_config.go. Instead
+// we look for explicit signals: KnownUsecases bit, multimodal projector, or
+// template/backend-reported multimodal markers.
+func hasVisionSupport(cfg *config.ModelConfig) bool {
+	if cfg.KnownUsecases != nil && (*cfg.KnownUsecases&config.FLAG_VISION) == config.FLAG_VISION {
+		return true
+	}
+	if cfg.MMProj != "" {
+		return true
+	}
+	if cfg.TemplateConfig.Multimodal != "" {
+		return true
+	}
+	if cfg.MediaMarker != "" {
+		return true
+	}
+	return false
+}
+
+// hasToolSupport reports whether the model is wired up for tool / function calling.
+// We look for any of the explicit configuration knobs LocalAI uses to drive
+// function-call extraction (regex match, response regex, grammar triggers, XML
+// format) or for the auto-detected tool-format markers populated by the
+// llama.cpp backend during model load.
+func hasToolSupport(cfg *config.ModelConfig) bool {
+	fc := cfg.FunctionsConfig
+	if fc.ToolFormatMarkers != nil && fc.ToolFormatMarkers.FormatType != "" {
+		return true
+	}
+	if len(fc.JSONRegexMatch) > 0 || len(fc.ResponseRegex) > 0 {
+		return true
+	}
+	if fc.XMLFormatPreset != "" || fc.XMLFormat != nil {
+		return true
+	}
+	if len(fc.GrammarConfig.GrammarTriggers) > 0 || fc.GrammarConfig.SchemaType != "" {
+		return true
+	}
+	return false
+}
+
+// hasThinkingSupport reports whether the model has reasoning / thinking enabled.
+// LocalAI sets DisableReasoning=false (or leaves thinking markers configured)
+// when the backend probe reports that the model supports thinking.
+func hasThinkingSupport(cfg *config.ModelConfig) bool {
+	rc := cfg.ReasoningConfig
+	if rc.DisableReasoning != nil && !*rc.DisableReasoning {
+		return true
+	}
+	if len(rc.ThinkingStartTokens) > 0 || len(rc.TagPairs) > 0 {
+		// Explicit thinking markers imply support unless explicitly disabled.
+		return rc.DisableReasoning == nil || !*rc.DisableReasoning
+	}
+	return false
+}
+
+// quantRegex matches GGUF-style quantization suffixes (Q4_K_M, Q8_0, IQ3_XS, F16, ...).
+// Matches the convention used by GGUF tooling and what ggml-org/llama.cpp report.
+var quantRegex = regexp.MustCompile(`(?i)(IQ\d+(?:_[A-Z0-9]+)*|Q\d+(?:_[A-Z0-9]+)*|F16|F32|BF16)`)
+
+// paramSizeRegex matches a parameter-size token surrounded by separators
+// (e.g. "-7B-", "_3b.", ".70B-"). Avoids matching the "7" inside "Qwen3".
+var paramSizeRegex = regexp.MustCompile(`(?i)(?:^|[-_.])(\d+(?:\.\d+)?[BM])(?:[-_.]|$)`)
+
+// extractQuantizationLevel pulls the quantization tag from the model filename.
+// Returns the uppercased token (e.g. "Q4_K_M") or "" when not present.
+func extractQuantizationLevel(modelFile string) string {
+	if modelFile == "" {
+		return ""
+	}
+	base := strings.TrimSuffix(modelFile, ".gguf")
+	if m := quantRegex.FindString(base); m != "" {
+		return strings.ToUpper(m)
+	}
+	return ""
+}
+
+// extractParameterSize pulls the parameter count from the model filename.
+// Returns "" when no recognizable token is present.
+func extractParameterSize(modelFile string) string {
+	if modelFile == "" {
+		return ""
+	}
+	base := strings.TrimSuffix(modelFile, ".gguf")
+	if m := paramSizeRegex.FindStringSubmatch(base); len(m) > 1 {
+		return strings.ToUpper(m[1])
+	}
+	return ""
+}
diff --git a/core/http/endpoints/ollama/capabilities_test.go b/core/http/endpoints/ollama/capabilities_test.go
new file mode 100644
index 000000000..b787b2034
--- /dev/null
+++ b/core/http/endpoints/ollama/capabilities_test.go
@@ -0,0 +1,138 @@
+package ollama
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/reasoning"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func boolPtr(b bool) *bool { return &b }
+
+func withKnownUsecases(cfg config.ModelConfig, flags ...string) config.ModelConfig {
+	cfg.KnownUsecaseStrings = flags
+	cfg.KnownUsecases = config.GetUsecasesFromYAML(flags)
+	return cfg
+}
+
+var _ = Describe("modelCapabilities", func() {
+	DescribeTable("derives Ollama capability strings from a ModelConfig",
+		func(cfg config.ModelConfig, expected []string) {
+			caps := modelCapabilities(&cfg)
+			if len(expected) == 0 {
+				Expect(caps).To(BeEmpty())
+				return
+			}
+			Expect(caps).To(ConsistOf(expected))
+		},
+		Entry("an embedding-only model exposes the embedding capability",
+			config.ModelConfig{
+				Name:       "embed-model",
+				Backend:    "llama-cpp",
+				Embeddings: boolPtr(true),
+			},
+			[]string{"embedding"},
+		),
+		Entry("a chat-template model exposes the completion capability",
+			config.ModelConfig{
+				Name:    "chat-model",
+				Backend: "llama-cpp",
+				TemplateConfig: config.TemplateConfig{
+					Chat: "{{ .Input }}",
+				},
+			},
+			[]string{"completion"},
+		),
+		Entry("a vision-capable chat model exposes completion + vision",
+			withKnownUsecases(config.ModelConfig{
+				Name:    "vision-model",
+				Backend: "llama-cpp",
+				TemplateConfig: config.TemplateConfig{
+					Chat:       "{{ .Input }}",
+					Multimodal: "<__media__>",
+				},
+			}, "FLAG_CHAT", "FLAG_VISION"),
+			[]string{"completion", "vision"},
+		),
+		Entry("a model with reasoning enabled exposes the thinking capability",
+			config.ModelConfig{
+				Name:    "thinking-model",
+				Backend: "llama-cpp",
+				TemplateConfig: config.TemplateConfig{
+					Chat: "{{ .Input }}",
+				},
+				ReasoningConfig: reasoning.Config{
+					DisableReasoning: boolPtr(false),
+				},
+			},
+			[]string{"completion", "thinking"},
+		),
+		Entry("a model with detected tool-format markers exposes the tools capability",
+			config.ModelConfig{
+				Name:    "tools-model",
+				Backend: "llama-cpp",
+				TemplateConfig: config.TemplateConfig{
+					Chat: "{{ .Input }}",
+				},
+				FunctionsConfig: functions.FunctionsConfig{
+					ToolFormatMarkers: &functions.ToolFormatMarkers{FormatType: "json_native"},
+				},
+			},
+			[]string{"completion", "tools"},
+		),
+		Entry("a model with an explicit JSON regex match exposes the tools capability",
+			config.ModelConfig{
+				Name:    "tools-regex-model",
+				Backend: "llama-cpp",
+				TemplateConfig: config.TemplateConfig{
+					Chat: "{{ .Input }}",
+				},
+				FunctionsConfig: functions.FunctionsConfig{
+					JSONRegexMatch: []string{`(?s).*`},
+				},
+			},
+			[]string{"completion", "tools"},
+		),
+		Entry("a pure backend-only model (no template, no embeddings) reports no capabilities",
+			config.ModelConfig{
+				Name:    "rerank-model",
+				Backend: "rerankers",
+			},
+			[]string{},
+		),
+	)
+})
+
+var _ = Describe("modelDetailsFromModelConfig", func() {
+	It("reports gguf format and llama-cpp family/families for a llama-cpp model", func() {
+		cfg := config.ModelConfig{
+			Name:    "llama",
+			Backend: "llama-cpp",
+		}
+		details := modelDetailsFromModelConfig(&cfg)
+		Expect(details.Format).To(Equal("gguf"))
+		Expect(details.Family).To(Equal("llama-cpp"))
+		Expect(details.Families).To(ConsistOf("llama-cpp"))
+	})
+
+	It("extracts quantization_level from the model filename when present", func() {
+		cfg := config.ModelConfig{
+			Name:    "qwen-q4",
+			Backend: "llama-cpp",
+		}
+		cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf"
+		details := modelDetailsFromModelConfig(&cfg)
+		Expect(details.QuantizationLevel).To(Equal("Q4_K_M"))
+	})
+
+	It("extracts parameter_size from the model filename when present", func() {
+		cfg := config.ModelConfig{
+			Name:    "qwen-4b",
+			Backend: "llama-cpp",
+		}
+		cfg.Model = "Qwen3-4B-Instruct-Q4_K_M.gguf"
+		details := modelDetailsFromModelConfig(&cfg)
+		Expect(details.ParameterSize).To(Equal("4B"))
+	})
+})
diff --git a/core/http/endpoints/ollama/models.go b/core/http/endpoints/ollama/models.go
index eb68494d5..60e58b9ea 100644
--- a/core/http/endpoints/ollama/models.go
+++ b/core/http/endpoints/ollama/models.go
@@ -32,13 +32,15 @@ func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) ec
 
 			digest := fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name)))
 
+			details, caps := modelMetaFromConfig(bcl, name)
 			entry := schema.OllamaModelEntry{
-				Name:       ollamaName,
-				Model:      ollamaName,
-				ModifiedAt: time.Now().UTC(),
-				Size:       0,
-				Digest:     digest,
-				Details:    modelDetailsFromConfig(bcl, name),
+				Name:         ollamaName,
+				Model:        ollamaName,
+				ModifiedAt:   time.Now().UTC(),
+				Size:         0,
+				Digest:       digest,
+				Details:      details,
+				Capabilities: caps,
 			}
 			models = append(models, entry)
 		}
@@ -72,10 +74,12 @@ func ShowModelEndpoint(bcl *config.ModelConfigLoader) echo.HandlerFunc {
 		}
 
 		resp := schema.OllamaShowResponse{
-			Modelfile:  fmt.Sprintf("FROM %s", cfg.Model),
-			Parameters: "",
-			Template:   cfg.TemplateConfig.Chat,
-			Details:    modelDetailsFromModelConfig(&cfg),
+			Modelfile:    fmt.Sprintf("FROM %s", cfg.Model),
+			Parameters:   "",
+			Template:     cfg.TemplateConfig.Chat,
+			Details:      modelDetailsFromModelConfig(&cfg),
+			ModelInfo:    modelInfoFromModelConfig(&cfg),
+			Capabilities: modelCapabilities(&cfg),
 		}
 
 		return c.JSON(200, resp)
@@ -95,14 +99,16 @@ func ListRunningEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) e
 				ollamaName += ":latest"
 			}
 
+			details, caps := modelMetaFromConfig(bcl, name)
 			entry := schema.OllamaPsEntry{
-				Name:      ollamaName,
-				Model:     ollamaName,
-				Size:      0,
-				Digest:    fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))),
-				Details:   modelDetailsFromConfig(bcl, name),
-				ExpiresAt: time.Now().Add(24 * time.Hour).UTC(),
-				SizeVRAM:  0,
+				Name:         ollamaName,
+				Model:        ollamaName,
+				Size:         0,
+				Digest:       fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))),
+				Details:      details,
+				ExpiresAt:    time.Now().Add(24 * time.Hour).UTC(),
+				SizeVRAM:     0,
+				Capabilities: caps,
 			}
 			models = append(models, entry)
 		}
@@ -125,18 +131,46 @@ func HeartbeatEndpoint() echo.HandlerFunc {
 	}
 }
 
-func modelDetailsFromConfig(bcl *config.ModelConfigLoader, name string) schema.OllamaModelDetails {
+// modelMetaFromConfig fetches the ModelConfig for `name` and derives both the
+// Ollama details block and capability list. Returns zero values when the model
+// is not configured.
+func modelMetaFromConfig(bcl *config.ModelConfigLoader, name string) (schema.OllamaModelDetails, []string) {
 	configName := strings.Split(name, ":")[0]
 	cfg, exists := bcl.GetModelConfig(configName)
 	if !exists {
-		return schema.OllamaModelDetails{}
+		return schema.OllamaModelDetails{}, nil
 	}
-	return modelDetailsFromModelConfig(&cfg)
+	return modelDetailsFromModelConfig(&cfg), modelCapabilities(&cfg)
 }
 
 func modelDetailsFromModelConfig(cfg *config.ModelConfig) schema.OllamaModelDetails {
-	return schema.OllamaModelDetails{
-		Format: "gguf",
-		Family: cfg.Backend,
+	family := cfg.Backend
+	details := schema.OllamaModelDetails{
+		Format:            "gguf",
+		Family:            family,
+		ParameterSize:     extractParameterSize(cfg.Model),
+		QuantizationLevel: extractQuantizationLevel(cfg.Model),
 	}
+	if family != "" {
+		details.Families = []string{family}
+	}
+	return details
+}
+
+// modelInfoFromModelConfig returns a small map of model_info entries derived
+// from the LocalAI ModelConfig. Ollama clients use this map for architecture
+// and context-length information; we expose what we can without loading the
+// model.
+func modelInfoFromModelConfig(cfg *config.ModelConfig) map[string]any {
+	info := map[string]any{}
+	if cfg.Backend != "" {
+		info["general.architecture"] = cfg.Backend
+	}
+	if cfg.ContextSize != nil && *cfg.ContextSize > 0 {
+		info["general.context_length"] = *cfg.ContextSize
+	}
+	if len(info) == 0 {
+		return nil
+	}
+	return info
 }
diff --git a/core/http/endpoints/ollama/models_test.go b/core/http/endpoints/ollama/models_test.go
index cc521411a..b13cf59a0 100644
--- a/core/http/endpoints/ollama/models_test.go
+++ b/core/http/endpoints/ollama/models_test.go
@@ -1,12 +1,18 @@
 package ollama_test
 
 import (
+	"encoding/json"
 	"net/http"
 	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
 	"testing"
 
 	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/ollama"
+	"github.com/mudler/LocalAI/core/schema"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -59,4 +65,92 @@ var _ = Describe("Ollama endpoint handlers", func() {
 			Expect(rec.Body.String()).To(MatchRegexp(`\d+\.\d+\.\d+`))
 		})
 	})
+
+	Describe("ShowModelEndpoint", func() {
+		var (
+			tmpDir string
+			bcl    *config.ModelConfigLoader
+		)
+
+		BeforeEach(func() {
+			var err error
+			tmpDir, err = os.MkdirTemp("", "ollama-show-test-*")
+			Expect(err).ToNot(HaveOccurred())
+			bcl = config.NewModelConfigLoader(tmpDir)
+		})
+
+		AfterEach(func() {
+			_ = os.RemoveAll(tmpDir)
+		})
+
+		writeConfig := func(name, yaml string) {
+			path := filepath.Join(tmpDir, name+".yaml")
+			Expect(os.WriteFile(path, []byte(yaml), 0o644)).To(Succeed())
+			Expect(bcl.ReadModelConfig(path)).To(Succeed())
+		}
+
+		callShow := func(name string) *schema.OllamaShowResponse {
+			req := httptest.NewRequest(http.MethodPost, "/api/show",
+				strings.NewReader(`{"name":"`+name+`"}`))
+			req.Header.Set("Content-Type", "application/json")
+			rec := httptest.NewRecorder()
+			c := e.NewContext(req, rec)
+
+			handler := ollama.ShowModelEndpoint(bcl)
+			Expect(handler(c)).To(Succeed())
+			Expect(rec.Code).To(Equal(http.StatusOK))
+
+			var resp schema.OllamaShowResponse
+			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
+			return &resp
+		}
+
+		It("returns capabilities=['embedding'] for embedding-only models", func() {
+			writeConfig("embed", `
+name: embed
+backend: llama-cpp
+embeddings: true
+parameters:
+  model: Qwen3-4B-Embedding-Q4_K_M.gguf
+`)
+			resp := callShow("embed")
+			Expect(resp.Capabilities).To(ConsistOf("embedding"))
+		})
+
+		It("returns capabilities=['completion'] for plain chat models", func() {
+			writeConfig("chat", `
+name: chat
+backend: llama-cpp
+template:
+  chat: "{{ .Input }}"
+parameters:
+  model: Llama-3-8B-Q4_K_M.gguf
+`)
+			resp := callShow("chat")
+			Expect(resp.Capabilities).To(ContainElement("completion"))
+			Expect(resp.Capabilities).ToNot(ContainElement("embedding"))
+		})
+
+		It("populates details.parameter_size and details.quantization_level from the GGUF filename", func() {
+			writeConfig("qwen", `
+name: qwen
+backend: llama-cpp
+template:
+  chat: "{{ .Input }}"
+parameters:
+  model: Qwen3-4B-Instruct-Q4_K_M.gguf
+`)
+			resp := callShow("qwen")
+			Expect(resp.Details.ParameterSize).To(Equal("4B"))
+			Expect(resp.Details.QuantizationLevel).To(Equal("Q4_K_M"))
+			Expect(resp.Details.Format).To(Equal("gguf"))
+			Expect(resp.Details.Families).ToNot(BeEmpty())
+		})
+	})
+
+	Describe("ListModelsEndpoint", func() {
+		It("includes capabilities and details for each listed model in /api/tags", func() {
+			Skip("covered by per-entry tests; integration smoke test")
+		})
+	})
 })
diff --git a/core/schema/ollama.go b/core/schema/ollama.go
index d3b0f7062..68deaf416 100644
--- a/core/schema/ollama.go
+++ b/core/schema/ollama.go
@@ -184,11 +184,13 @@ func (r *OllamaShowRequest) ModelName(s *string) string {
 
 // OllamaShowResponse represents a response from the Ollama Show API
 type OllamaShowResponse struct {
-	Modelfile  string             `json:"modelfile"`
-	Parameters string             `json:"parameters"`
-	Template   string             `json:"template"`
-	License    string             `json:"license,omitempty"`
-	Details    OllamaModelDetails `json:"details"`
+	Modelfile    string             `json:"modelfile"`
+	Parameters   string             `json:"parameters"`
+	Template     string             `json:"template"`
+	License      string             `json:"license,omitempty"`
+	Details      OllamaModelDetails `json:"details"`
+	ModelInfo    map[string]any     `json:"model_info,omitempty"`
+	Capabilities []string           `json:"capabilities,omitempty"`
 }
 
 // OllamaModelDetails contains model metadata
@@ -203,12 +205,13 @@ type OllamaModelDetails struct {
 
 // OllamaModelEntry represents a model in the list response
 type OllamaModelEntry struct {
-	Name       string             `json:"name"`
-	Model      string             `json:"model"`
-	ModifiedAt time.Time          `json:"modified_at"`
-	Size       int64              `json:"size"`
-	Digest     string             `json:"digest"`
-	Details    OllamaModelDetails `json:"details"`
+	Name         string             `json:"name"`
+	Model        string             `json:"model"`
+	ModifiedAt   time.Time          `json:"modified_at"`
+	Size         int64              `json:"size"`
+	Digest       string             `json:"digest"`
+	Details      OllamaModelDetails `json:"details"`
+	Capabilities []string           `json:"capabilities,omitempty"`
 }
 
 // OllamaListResponse represents a response from the Ollama Tags API
@@ -218,13 +221,14 @@ type OllamaListResponse struct {
 
 // OllamaPsEntry represents a running model in the ps response
 type OllamaPsEntry struct {
-	Name       string             `json:"name"`
-	Model      string             `json:"model"`
-	Size       int64              `json:"size"`
-	Digest     string             `json:"digest"`
-	Details    OllamaModelDetails `json:"details"`
-	ExpiresAt  time.Time          `json:"expires_at"`
-	SizeVRAM   int64              `json:"size_vram"`
+	Name         string             `json:"name"`
+	Model        string             `json:"model"`
+	Size         int64              `json:"size"`
+	Digest       string             `json:"digest"`
+	Details      OllamaModelDetails `json:"details"`
+	ExpiresAt    time.Time          `json:"expires_at"`
+	SizeVRAM     int64              `json:"size_vram"`
+	Capabilities []string           `json:"capabilities,omitempty"`
 }
 
 // OllamaPsResponse represents a response from the Ollama Ps API