mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 21:21:23 -04:00
Ollama-compatible clients (Open WebUI, Enchanted, ollama-grid-search,
etc.) rely on the `capabilities` list and `details.{parameter_size,
quantization_level,families}` fields returned by /api/tags and
/api/show to decide which models are eligible for a given task --
for example to filter the "embedding model" picker. Upstream Ollama
returns these; LocalAI's compat layer was leaving them empty, so
embedding models were silently rejected by clients that only allow
chat models for chat and only allow embedding models for embeddings.
This wires up the existing config signals already present in
ModelConfig:
- modelCapabilities() derives the Ollama capability strings from the
config: "embedding" (FLAG_EMBEDDINGS), "completion" (FLAG_CHAT /
FLAG_COMPLETION), "vision" (explicit KnownUsecases bit or MMProj /
multimodal template / backend media marker), "tools" (auto-detected
ToolFormatMarkers, JSON/Response regex, XML format, grammar
triggers), "thinking" (ReasoningConfig with reasoning not disabled)
and "insert" (presence of a completion template).
- modelDetailsFromModelConfig() now fills families, parameter_size
and quantization_level. The latter two are parsed from the GGUF
filename via regex -- conservative tokens only (Q*/IQ*/F16/F32/BF16
and \d+(\.\d+)?[BM] surrounded by separators) so we don't accidentally
match "Qwen3" as "3B".
- modelInfoFromModelConfig() exposes general.architecture and
general.context_length in the new ShowResponse.model_info map.
Note: HasUsecases(FLAG_VISION) cannot be used directly -- GuessUsecases
has no FLAG_VISION case and returns true at the end for any chat model.
hasVisionSupport() instead reads KnownUsecases explicitly plus MMProj /
template / media-marker signals.
Tests are written first (TDD) using Ginkgo/Gomega -- DescribeTable for
the capability mapping (embedding-only, chat, vision, thinking, tools
via markers, tools via JSON regex, no-capability rerank) plus
integration tests against ShowModelEndpoint that round-trip JSON
through a real ModelConfigLoader populated from a temp YAML file.
Fixes #9760.
Assisted-by: Claude Code:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
177 lines
5.0 KiB
Go
177 lines
5.0 KiB
Go
package ollama
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
)
|
|
|
|
const ollamaCompatVersion = "0.9.0"
|
|
|
|
// ListModelsEndpoint handles Ollama-compatible GET /api/tags
|
|
func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
modelNames, err := galleryop.ListModels(bcl, ml, nil, galleryop.SKIP_IF_CONFIGURED)
|
|
if err != nil {
|
|
return ollamaError(c, 500, fmt.Sprintf("failed to list models: %v", err))
|
|
}
|
|
|
|
var models []schema.OllamaModelEntry
|
|
for _, name := range modelNames {
|
|
ollamaName := name
|
|
if !strings.Contains(ollamaName, ":") {
|
|
ollamaName += ":latest"
|
|
}
|
|
|
|
digest := fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name)))
|
|
|
|
details, caps := modelMetaFromConfig(bcl, name)
|
|
entry := schema.OllamaModelEntry{
|
|
Name: ollamaName,
|
|
Model: ollamaName,
|
|
ModifiedAt: time.Now().UTC(),
|
|
Size: 0,
|
|
Digest: digest,
|
|
Details: details,
|
|
Capabilities: caps,
|
|
}
|
|
models = append(models, entry)
|
|
}
|
|
|
|
return c.JSON(200, schema.OllamaListResponse{Models: models})
|
|
}
|
|
}
|
|
|
|
// ShowModelEndpoint handles Ollama-compatible POST /api/show
|
|
func ShowModelEndpoint(bcl *config.ModelConfigLoader) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
var req schema.OllamaShowRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return ollamaError(c, 400, "invalid request body")
|
|
}
|
|
|
|
name := req.Name
|
|
if name == "" {
|
|
name = req.Model
|
|
}
|
|
if name == "" {
|
|
return ollamaError(c, 400, "name is required")
|
|
}
|
|
|
|
// Strip tag suffix for config lookup
|
|
configName := strings.Split(name, ":")[0]
|
|
|
|
cfg, exists := bcl.GetModelConfig(configName)
|
|
if !exists {
|
|
return ollamaError(c, 404, fmt.Sprintf("model '%s' not found", name))
|
|
}
|
|
|
|
resp := schema.OllamaShowResponse{
|
|
Modelfile: fmt.Sprintf("FROM %s", cfg.Model),
|
|
Parameters: "",
|
|
Template: cfg.TemplateConfig.Chat,
|
|
Details: modelDetailsFromModelConfig(&cfg),
|
|
ModelInfo: modelInfoFromModelConfig(&cfg),
|
|
Capabilities: modelCapabilities(&cfg),
|
|
}
|
|
|
|
return c.JSON(200, resp)
|
|
}
|
|
}
|
|
|
|
// ListRunningEndpoint handles Ollama-compatible GET /api/ps
|
|
func ListRunningEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
loadedModels := ml.ListLoadedModels()
|
|
|
|
var models []schema.OllamaPsEntry
|
|
for _, m := range loadedModels {
|
|
name := m.ID
|
|
ollamaName := name
|
|
if !strings.Contains(ollamaName, ":") {
|
|
ollamaName += ":latest"
|
|
}
|
|
|
|
details, caps := modelMetaFromConfig(bcl, name)
|
|
entry := schema.OllamaPsEntry{
|
|
Name: ollamaName,
|
|
Model: ollamaName,
|
|
Size: 0,
|
|
Digest: fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(name))),
|
|
Details: details,
|
|
ExpiresAt: time.Now().Add(24 * time.Hour).UTC(),
|
|
SizeVRAM: 0,
|
|
Capabilities: caps,
|
|
}
|
|
models = append(models, entry)
|
|
}
|
|
|
|
return c.JSON(200, schema.OllamaPsResponse{Models: models})
|
|
}
|
|
}
|
|
|
|
// VersionEndpoint handles Ollama-compatible GET /api/version
|
|
func VersionEndpoint() echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
return c.JSON(200, schema.OllamaVersionResponse{Version: ollamaCompatVersion})
|
|
}
|
|
}
|
|
|
|
// HeartbeatEndpoint handles the Ollama root health check
|
|
func HeartbeatEndpoint() echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
return c.String(200, "Ollama is running")
|
|
}
|
|
}
|
|
|
|
// modelMetaFromConfig fetches the ModelConfig for `name` and derives both the
|
|
// Ollama details block and capability list. Returns zero values when the model
|
|
// is not configured.
|
|
func modelMetaFromConfig(bcl *config.ModelConfigLoader, name string) (schema.OllamaModelDetails, []string) {
|
|
configName := strings.Split(name, ":")[0]
|
|
cfg, exists := bcl.GetModelConfig(configName)
|
|
if !exists {
|
|
return schema.OllamaModelDetails{}, nil
|
|
}
|
|
return modelDetailsFromModelConfig(&cfg), modelCapabilities(&cfg)
|
|
}
|
|
|
|
func modelDetailsFromModelConfig(cfg *config.ModelConfig) schema.OllamaModelDetails {
|
|
family := cfg.Backend
|
|
details := schema.OllamaModelDetails{
|
|
Format: "gguf",
|
|
Family: family,
|
|
ParameterSize: extractParameterSize(cfg.Model),
|
|
QuantizationLevel: extractQuantizationLevel(cfg.Model),
|
|
}
|
|
if family != "" {
|
|
details.Families = []string{family}
|
|
}
|
|
return details
|
|
}
|
|
|
|
// modelInfoFromModelConfig returns a small map of model_info entries derived
|
|
// from the LocalAI ModelConfig. Ollama clients use this map for architecture
|
|
// and context-length information; we expose what we can without loading the
|
|
// model.
|
|
func modelInfoFromModelConfig(cfg *config.ModelConfig) map[string]any {
|
|
info := map[string]any{}
|
|
if cfg.Backend != "" {
|
|
info["general.architecture"] = cfg.Backend
|
|
}
|
|
if cfg.ContextSize != nil && *cfg.ContextSize > 0 {
|
|
info["general.context_length"] = *cfg.ContextSize
|
|
}
|
|
if len(info) == 0 {
|
|
return nil
|
|
}
|
|
return info
|
|
}
|