feat(gallery): Speed up load times and clean gallery entries (#9211)

* feat: Rework VRAM estimation and use known_usecases in gallery Signed-off-by: Richard Palethorpe <io@richiejp.com> Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code] * chore(gallery): regenerate gallery index and add known_usecases to model entries Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-17 04:56:52 -04:00 · 2026-05-06 13:51:38 +01:00
parent 6d56bf98fe
commit 969005b2a1
47 changed files with 17089 additions and 5345 deletions
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -17,6 +17,7 @@ import (
 	"github.com/mudler/LocalAI/core/services/jobs"
 	"github.com/mudler/LocalAI/core/services/nodes"
 	"github.com/mudler/LocalAI/core/services/storage"
+	"github.com/mudler/LocalAI/pkg/vram"
 	coreStartup "github.com/mudler/LocalAI/core/startup"
 	"github.com/mudler/LocalAI/internal"

@@ -251,6 +252,10 @@ func New(opts ...config.AppOption) (*Application, error) {
 		go uc.Run(options.Context)
 	}

+	// Wire gallery generation counter into VRAM caches so they invalidate
+	// when gallery data refreshes instead of using a fixed TTL.
+	vram.SetGalleryGenerationFunc(gallery.GalleryGeneration)
+
 	if options.ConfigFile != "" {
 		if err := application.ModelConfigLoader().LoadMultipleModelConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
 			xlog.Error("error loading config file", "error", err)
--- a/core/config/backend_capabilities.go
+++ b/core/config/backend_capabilities.go
@@ -0,0 +1,480 @@
+package config
+
+import (
+	"slices"
+	"strings"
+)
+
+// Usecase name constants — the canonical string values used in gallery entries,
+// model configs (known_usecases), and UsecaseInfoMap keys.
+const (
+	UsecaseChat            = "chat"
+	UsecaseCompletion      = "completion"
+	UsecaseEdit            = "edit"
+	UsecaseVision          = "vision"
+	UsecaseEmbeddings      = "embeddings"
+	UsecaseTokenize        = "tokenize"
+	UsecaseImage           = "image"
+	UsecaseVideo           = "video"
+	UsecaseTranscript      = "transcript"
+	UsecaseTTS             = "tts"
+	UsecaseSoundGeneration = "sound_generation"
+	UsecaseRerank          = "rerank"
+	UsecaseDetection       = "detection"
+	UsecaseVAD             = "vad"
+	UsecaseAudioTransform  = "audio_transform"
+	UsecaseDiarization     = "diarization"
+)
+
+// GRPCMethod identifies a Backend service RPC from backend.proto.
+type GRPCMethod string
+
+const (
+	MethodPredict            GRPCMethod = "Predict"
+	MethodPredictStream      GRPCMethod = "PredictStream"
+	MethodEmbedding          GRPCMethod = "Embedding"
+	MethodGenerateImage      GRPCMethod = "GenerateImage"
+	MethodGenerateVideo      GRPCMethod = "GenerateVideo"
+	MethodAudioTranscription GRPCMethod = "AudioTranscription"
+	MethodTTS                GRPCMethod = "TTS"
+	MethodTTSStream          GRPCMethod = "TTSStream"
+	MethodSoundGeneration    GRPCMethod = "SoundGeneration"
+	MethodTokenizeString     GRPCMethod = "TokenizeString"
+	MethodDetect             GRPCMethod = "Detect"
+	MethodRerank             GRPCMethod = "Rerank"
+	MethodVAD                GRPCMethod = "VAD"
+	MethodAudioTransform     GRPCMethod = "AudioTransform"
+	MethodDiarize            GRPCMethod = "Diarize"
+)
+
+// UsecaseInfo describes a single known_usecase value and how it maps
+// to the gRPC backend API.
+type UsecaseInfo struct {
+	// Flag is the ModelConfigUsecase bitmask value.
+	Flag ModelConfigUsecase
+	// GRPCMethod is the primary Backend service RPC this usecase maps to.
+	GRPCMethod GRPCMethod
+	// IsModifier is true when this usecase doesn't map to its own gRPC RPC
+	// but modifies how another RPC behaves (e.g., vision uses Predict with images).
+	IsModifier bool
+	// DependsOn names the usecase(s) this modifier requires (e.g., "chat").
+	DependsOn string
+	// Description is a human/LLM-readable explanation of what this usecase means.
+	Description string
+}
+
+// UsecaseInfoMap maps each known_usecase string to its gRPC and semantic info.
+var UsecaseInfoMap = map[string]UsecaseInfo{
+	UsecaseChat: {
+		Flag:        FLAG_CHAT,
+		GRPCMethod:  MethodPredict,
+		Description: "Conversational/instruction-following via the Predict RPC with chat templates.",
+	},
+	UsecaseCompletion: {
+		Flag:        FLAG_COMPLETION,
+		GRPCMethod:  MethodPredict,
+		Description: "Text completion via the Predict RPC with a completion template.",
+	},
+	UsecaseEdit: {
+		Flag:        FLAG_EDIT,
+		GRPCMethod:  MethodPredict,
+		Description: "Text editing via the Predict RPC with an edit template.",
+	},
+	UsecaseVision: {
+		Flag:        FLAG_VISION,
+		GRPCMethod:  MethodPredict,
+		IsModifier:  true,
+		DependsOn:   UsecaseChat,
+		Description: "The model accepts images alongside text in the Predict RPC. For llama-cpp this requires an mmproj file.",
+	},
+	UsecaseEmbeddings: {
+		Flag:        FLAG_EMBEDDINGS,
+		GRPCMethod:  MethodEmbedding,
+		Description: "Vector embedding generation via the Embedding RPC.",
+	},
+	UsecaseTokenize: {
+		Flag:        FLAG_TOKENIZE,
+		GRPCMethod:  MethodTokenizeString,
+		Description: "Tokenization via the TokenizeString RPC without running inference.",
+	},
+	UsecaseImage: {
+		Flag:        FLAG_IMAGE,
+		GRPCMethod:  MethodGenerateImage,
+		Description: "Image generation via the GenerateImage RPC (Stable Diffusion, Flux, etc.).",
+	},
+	UsecaseVideo: {
+		Flag:        FLAG_VIDEO,
+		GRPCMethod:  MethodGenerateVideo,
+		Description: "Video generation via the GenerateVideo RPC.",
+	},
+	UsecaseTranscript: {
+		Flag:        FLAG_TRANSCRIPT,
+		GRPCMethod:  MethodAudioTranscription,
+		Description: "Speech-to-text via the AudioTranscription RPC.",
+	},
+	UsecaseTTS: {
+		Flag:        FLAG_TTS,
+		GRPCMethod:  MethodTTS,
+		Description: "Text-to-speech via the TTS RPC.",
+	},
+	UsecaseSoundGeneration: {
+		Flag:        FLAG_SOUND_GENERATION,
+		GRPCMethod:  MethodSoundGeneration,
+		Description: "Music/sound generation via the SoundGeneration RPC (not speech).",
+	},
+	UsecaseRerank: {
+		Flag:        FLAG_RERANK,
+		GRPCMethod:  MethodRerank,
+		Description: "Document reranking via the Rerank RPC.",
+	},
+	UsecaseDetection: {
+		Flag:        FLAG_DETECTION,
+		GRPCMethod:  MethodDetect,
+		Description: "Object detection via the Detect RPC with bounding boxes.",
+	},
+	UsecaseVAD: {
+		Flag:        FLAG_VAD,
+		GRPCMethod:  MethodVAD,
+		Description: "Voice activity detection via the VAD RPC.",
+	},
+	UsecaseAudioTransform: {
+		Flag:        FLAG_AUDIO_TRANSFORM,
+		GRPCMethod:  MethodAudioTransform,
+		Description: "Audio-in / audio-out transformations (echo cancellation, noise suppression, dereverberation, voice conversion) via the AudioTransform RPC.",
+	},
+	UsecaseDiarization: {
+		Flag:        FLAG_DIARIZATION,
+		GRPCMethod:  MethodDiarize,
+		Description: "Speaker diarization (who-spoke-when, per-speaker segments) via the Diarize RPC.",
+	},
+}
+
+// BackendCapability describes which gRPC methods and usecases a backend supports.
+// Derived from reviewing actual implementations in backend/go/ and backend/python/.
+type BackendCapability struct {
+	// GRPCMethods lists the Backend service RPCs this backend implements.
+	GRPCMethods []GRPCMethod
+	// PossibleUsecases lists all usecase strings this backend can support.
+	PossibleUsecases []string
+	// DefaultUsecases lists the conservative safe defaults.
+	DefaultUsecases []string
+	// AcceptsImages indicates multimodal image input in Predict.
+	AcceptsImages bool
+	// AcceptsVideos indicates multimodal video input in Predict.
+	AcceptsVideos bool
+	// AcceptsAudios indicates multimodal audio input in Predict.
+	AcceptsAudios bool
+	// Description is a human-readable summary of the backend.
+	Description string
+}
+
+// BackendCapabilities maps each backend name (as used in model configs and gallery
+// entries) to its verified capabilities. This is the single source of truth for
+// what each backend supports.
+//
+// Backend names use hyphens (e.g., "llama-cpp") matching the gallery convention.
+// Use NormalizeBackendName() for names with dots (e.g., "llama.cpp").
+var BackendCapabilities = map[string]BackendCapability{
+	// --- LLM / text generation backends ---
+	"llama-cpp": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding, MethodTokenizeString},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEdit, UsecaseEmbeddings, UsecaseTokenize, UsecaseVision},
+		DefaultUsecases:  []string{UsecaseChat},
+		AcceptsImages:    true, // requires mmproj
+		Description:      "llama.cpp GGUF models — LLM inference with optional vision via mmproj",
+	},
+	"vllm": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseVision},
+		DefaultUsecases:  []string{UsecaseChat},
+		AcceptsImages:    true,
+		AcceptsVideos:    true,
+		Description:      "vLLM engine — high-throughput LLM serving with optional multimodal",
+	},
+	"vllm-omni": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodGenerateImage, MethodGenerateVideo, MethodTTS},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseImage, UsecaseVideo, UsecaseTTS, UsecaseVision},
+		DefaultUsecases:  []string{UsecaseChat},
+		AcceptsImages:    true,
+		AcceptsVideos:    true,
+		AcceptsAudios:    true,
+		Description:      "vLLM omni-modal — supports text, image, video generation and TTS",
+	},
+	"transformers": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding, MethodTTS, MethodSoundGeneration},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseTTS, UsecaseSoundGeneration},
+		DefaultUsecases:  []string{UsecaseChat},
+		Description:      "HuggingFace transformers — general-purpose Python inference",
+	},
+	"mlx": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings},
+		DefaultUsecases:  []string{UsecaseChat},
+		Description:      "Apple MLX framework — optimized for Apple Silicon",
+	},
+	"mlx-distributed": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings},
+		DefaultUsecases:  []string{UsecaseChat},
+		Description:      "MLX distributed inference across multiple Apple Silicon devices",
+	},
+	"mlx-vlm": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseVision},
+		DefaultUsecases:  []string{UsecaseChat, UsecaseVision},
+		AcceptsImages:    true,
+		AcceptsAudios:    true,
+		Description:      "MLX vision-language models with multimodal input",
+	},
+	"mlx-audio": {
+		GRPCMethods:      []GRPCMethod{MethodPredict, MethodTTS},
+		PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseChat},
+		Description:      "MLX audio models — text generation and TTS",
+	},
+
+	// --- Image/video generation backends ---
+	"diffusers": {
+		GRPCMethods:      []GRPCMethod{MethodGenerateImage, MethodGenerateVideo},
+		PossibleUsecases: []string{UsecaseImage, UsecaseVideo},
+		DefaultUsecases:  []string{UsecaseImage},
+		Description:      "HuggingFace diffusers — Stable Diffusion, Flux, video generation",
+	},
+	"stablediffusion": {
+		GRPCMethods:      []GRPCMethod{MethodGenerateImage},
+		PossibleUsecases: []string{UsecaseImage},
+		DefaultUsecases:  []string{UsecaseImage},
+		Description:      "Stable Diffusion native backend",
+	},
+	"stablediffusion-ggml": {
+		GRPCMethods:      []GRPCMethod{MethodGenerateImage},
+		PossibleUsecases: []string{UsecaseImage},
+		DefaultUsecases:  []string{UsecaseImage},
+		Description:      "Stable Diffusion via GGML quantized models",
+	},
+
+	// --- Speech-to-text backends ---
+	"whisper": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription, MethodVAD},
+		PossibleUsecases: []string{UsecaseTranscript, UsecaseVAD},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "OpenAI Whisper — speech recognition and voice activity detection",
+	},
+	"faster-whisper": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "CTranslate2-accelerated Whisper for faster transcription",
+	},
+	"whisperx": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "WhisperX — Whisper with word-level timestamps and speaker diarization",
+	},
+	"moonshine": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "Moonshine speech recognition",
+	},
+	"nemo": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "NVIDIA NeMo speech recognition",
+	},
+	"qwen-asr": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "Qwen automatic speech recognition",
+	},
+	"voxtral": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription},
+		PossibleUsecases: []string{UsecaseTranscript},
+		DefaultUsecases:  []string{UsecaseTranscript},
+		Description:      "Voxtral speech recognition",
+	},
+	"vibevoice": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTranscription, MethodTTS},
+		PossibleUsecases: []string{UsecaseTranscript, UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTranscript, UsecaseTTS},
+		Description:      "VibeVoice — bidirectional speech (transcription and synthesis)",
+	},
+
+	// --- TTS backends ---
+	"piper": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Piper — fast neural TTS optimized for Raspberry Pi",
+	},
+	"kokoro": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Kokoro TTS",
+	},
+	"coqui": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Coqui TTS — multi-speaker neural synthesis",
+	},
+	"kitten-tts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Kitten TTS",
+	},
+	"outetts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "OuteTTS",
+	},
+	"pocket-tts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Pocket TTS — lightweight text-to-speech",
+	},
+	"qwen-tts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Qwen TTS",
+	},
+	"faster-qwen3-tts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Faster Qwen3 TTS — accelerated Qwen TTS",
+	},
+	"fish-speech": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Fish Speech TTS",
+	},
+	"neutts": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "NeuTTS — neural text-to-speech",
+	},
+	"chatterbox": {
+		GRPCMethods:      []GRPCMethod{MethodTTS},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "Chatterbox TTS",
+	},
+	"voxcpm": {
+		GRPCMethods:      []GRPCMethod{MethodTTS, MethodTTSStream},
+		PossibleUsecases: []string{UsecaseTTS},
+		DefaultUsecases:  []string{UsecaseTTS},
+		Description:      "VoxCPM TTS with streaming support",
+	},
+
+	// --- Sound generation backends ---
+	"ace-step": {
+		GRPCMethods:      []GRPCMethod{MethodTTS, MethodSoundGeneration},
+		PossibleUsecases: []string{UsecaseTTS, UsecaseSoundGeneration},
+		DefaultUsecases:  []string{UsecaseSoundGeneration},
+		Description:      "ACE-Step — music and sound generation",
+	},
+	"acestep-cpp": {
+		GRPCMethods:      []GRPCMethod{MethodSoundGeneration},
+		PossibleUsecases: []string{UsecaseSoundGeneration},
+		DefaultUsecases:  []string{UsecaseSoundGeneration},
+		Description:      "ACE-Step C++ — native sound generation",
+	},
+	"transformers-musicgen": {
+		GRPCMethods:      []GRPCMethod{MethodTTS, MethodSoundGeneration},
+		PossibleUsecases: []string{UsecaseTTS, UsecaseSoundGeneration},
+		DefaultUsecases:  []string{UsecaseSoundGeneration},
+		Description:      "Meta MusicGen via transformers — music generation from text",
+	},
+
+	// --- Audio transform backends ---
+	"localvqe": {
+		GRPCMethods:      []GRPCMethod{MethodAudioTransform},
+		PossibleUsecases: []string{UsecaseAudioTransform},
+		DefaultUsecases:  []string{UsecaseAudioTransform},
+		Description:      "LocalVQE — joint AEC, noise suppression, and dereverberation for 16 kHz mono speech",
+	},
+
+	// --- Utility backends ---
+	"rerankers": {
+		GRPCMethods:      []GRPCMethod{MethodRerank},
+		PossibleUsecases: []string{UsecaseRerank},
+		DefaultUsecases:  []string{UsecaseRerank},
+		Description:      "Cross-encoder reranking models",
+	},
+	"rfdetr": {
+		GRPCMethods:      []GRPCMethod{MethodDetect},
+		PossibleUsecases: []string{UsecaseDetection},
+		DefaultUsecases:  []string{UsecaseDetection},
+		Description:      "RF-DETR object detection",
+	},
+	"silero-vad": {
+		GRPCMethods:      []GRPCMethod{MethodVAD},
+		PossibleUsecases: []string{UsecaseVAD},
+		DefaultUsecases:  []string{UsecaseVAD},
+		Description:      "Silero VAD — voice activity detection",
+	},
+}
+
+// NormalizeBackendName converts backend names to the canonical hyphenated form
+// used in gallery entries (e.g., "llama.cpp" → "llama-cpp").
+func NormalizeBackendName(backend string) string {
+	return strings.ReplaceAll(backend, ".", "-")
+}
+
+// GetBackendCapability returns the capability info for a backend, or nil if unknown.
+// Handles backend name normalization.
+func GetBackendCapability(backend string) *BackendCapability {
+	if cap, ok := BackendCapabilities[NormalizeBackendName(backend)]; ok {
+		return &cap
+	}
+	return nil
+}
+
+// PossibleUsecasesForBackend returns all usecases a backend can support.
+// Returns nil if the backend is unknown.
+func PossibleUsecasesForBackend(backend string) []string {
+	if cap := GetBackendCapability(backend); cap != nil {
+		return cap.PossibleUsecases
+	}
+	return nil
+}
+
+// DefaultUsecasesForBackend returns the conservative default usecases.
+// Returns nil if the backend is unknown.
+func DefaultUsecasesForBackendCap(backend string) []string {
+	if cap := GetBackendCapability(backend); cap != nil {
+		return cap.DefaultUsecases
+	}
+	return nil
+}
+
+// IsValidUsecaseForBackend checks whether a usecase is in a backend's possible set.
+// Returns true for unknown backends (permissive fallback).
+func IsValidUsecaseForBackend(backend, usecase string) bool {
+	cap := GetBackendCapability(backend)
+	if cap == nil {
+		return true // unknown backend — don't restrict
+	}
+	return slices.Contains(cap.PossibleUsecases, usecase)
+}
+
+// AllBackendNames returns a sorted list of all known backend names.
+func AllBackendNames() []string {
+	names := make([]string, 0, len(BackendCapabilities))
+	for name := range BackendCapabilities {
+		names = append(names, name)
+	}
+	slices.Sort(names)
+	return names
+}
--- a/core/config/backend_capabilities_test.go
+++ b/core/config/backend_capabilities_test.go
@@ -0,0 +1,95 @@
+package config
+
+import (
+	"slices"
+	"strings"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("BackendCapabilities", func() {
+	It("every backend declares possible/default usecases and gRPC methods", func() {
+		for name, cap := range BackendCapabilities {
+			Expect(cap.PossibleUsecases).NotTo(BeEmpty(), "backend %q has no possible usecases", name)
+			Expect(cap.DefaultUsecases).NotTo(BeEmpty(), "backend %q has no default usecases", name)
+			Expect(cap.GRPCMethods).NotTo(BeEmpty(), "backend %q has no gRPC methods", name)
+		}
+	})
+
+	It("default usecases are a subset of possible usecases", func() {
+		for name, cap := range BackendCapabilities {
+			for _, d := range cap.DefaultUsecases {
+				Expect(cap.PossibleUsecases).To(ContainElement(d), "backend %q: default %q not in possible %v", name, d, cap.PossibleUsecases)
+			}
+		}
+	})
+
+	It("every backend's possible usecases map to a known FLAG_*", func() {
+		allFlags := GetAllModelConfigUsecases()
+		for name, cap := range BackendCapabilities {
+			for _, u := range cap.PossibleUsecases {
+				info, ok := UsecaseInfoMap[u]
+				Expect(ok).To(BeTrue(), "backend %q: usecase %q not in UsecaseInfoMap", name, u)
+				flagName := "FLAG_" + strings.ToUpper(u)
+				if _, ok := allFlags[flagName]; ok {
+					continue
+				}
+				// Some usecase names don't transform exactly to FLAG_<UPPER>; fall back to flag value lookup.
+				found := false
+				for _, flag := range allFlags {
+					if flag == info.Flag {
+						found = true
+						break
+					}
+				}
+				Expect(found).To(BeTrue(), "backend %q: usecase %q flag %d not in GetAllModelConfigUsecases", name, u, info.Flag)
+			}
+		}
+	})
+
+	It("every UsecaseInfoMap entry has a non-zero flag and a gRPC method", func() {
+		for name, info := range UsecaseInfoMap {
+			Expect(info.Flag).NotTo(Equal(FLAG_ANY), "usecase %q has FLAG_ANY (zero) — should have a real flag", name)
+			Expect(info.GRPCMethod).NotTo(BeEmpty(), "usecase %q has no gRPC method", name)
+		}
+	})
+})
+
+var _ = Describe("GetBackendCapability", func() {
+	It("returns the capability for a known backend", func() {
+		cap := GetBackendCapability("llama-cpp")
+		Expect(cap).NotTo(BeNil())
+		Expect(cap.PossibleUsecases).To(ContainElement("chat"))
+	})
+
+	It("normalizes hyphenated names so llama.cpp resolves to llama-cpp", func() {
+		Expect(GetBackendCapability("llama.cpp")).NotTo(BeNil())
+	})
+
+	It("returns nil for unknown backends", func() {
+		Expect(GetBackendCapability("nonexistent")).To(BeNil())
+	})
+})
+
+var _ = Describe("IsValidUsecaseForBackend", func() {
+	It("accepts a backend's declared usecases", func() {
+		Expect(IsValidUsecaseForBackend("piper", "tts")).To(BeTrue())
+	})
+
+	It("rejects usecases outside a backend's possible set", func() {
+		Expect(IsValidUsecaseForBackend("piper", "chat")).To(BeFalse())
+	})
+
+	It("is permissive for unknown backends", func() {
+		Expect(IsValidUsecaseForBackend("unknown", "anything")).To(BeTrue())
+	})
+})
+
+var _ = Describe("AllBackendNames", func() {
+	It("returns 30+ backends in sorted order", func() {
+		names := AllBackendNames()
+		Expect(len(names)).To(BeNumerically(">=", 30))
+		Expect(slices.IsSorted(names)).To(BeTrue())
+	})
+})
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -630,16 +630,45 @@ const (
 	FLAG_TOKENIZE         ModelConfigUsecase = 0b001000000000
 	FLAG_VAD              ModelConfigUsecase = 0b010000000000
 	FLAG_VIDEO            ModelConfigUsecase = 0b100000000000
-	FLAG_DETECTION        ModelConfigUsecase = 0b1000000000000
-	FLAG_FACE_RECOGNITION    ModelConfigUsecase = 0b10000000000000
-	FLAG_SPEAKER_RECOGNITION ModelConfigUsecase = 0b100000000000000
-	FLAG_AUDIO_TRANSFORM     ModelConfigUsecase = 0b1000000000000000
-	FLAG_DIARIZATION         ModelConfigUsecase = 0b10000000000000000
+	FLAG_DETECTION           ModelConfigUsecase = 0b1000000000000
+	FLAG_VISION              ModelConfigUsecase = 0b10000000000000
+	FLAG_FACE_RECOGNITION    ModelConfigUsecase = 0b100000000000000
+	FLAG_SPEAKER_RECOGNITION ModelConfigUsecase = 0b1000000000000000
+	FLAG_AUDIO_TRANSFORM     ModelConfigUsecase = 0b10000000000000000
+	FLAG_DIARIZATION         ModelConfigUsecase = 0b100000000000000000

 	// Common Subsets
 	FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
 )

+// ModalityGroups defines groups of usecases that belong to the same modality.
+// Flags within the same group are NOT orthogonal (e.g., chat and completion are
+// both text/language). A model is multimodal when its usecases span 2+ groups.
+var ModalityGroups = []ModelConfigUsecase{
+	FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT, // text/language
+	FLAG_VISION | FLAG_DETECTION,            // visual understanding
+	FLAG_TRANSCRIPT,                         // speech input
+	FLAG_TTS | FLAG_SOUND_GENERATION,        // audio output
+	FLAG_AUDIO_TRANSFORM,                    // audio in/out transforms
+	FLAG_IMAGE | FLAG_VIDEO,                 // visual generation
+}
+
+// IsMultimodal returns true if the given usecases span two or more orthogonal
+// modality groups. For example chat+vision is multimodal, but chat+completion
+// is not (both belong to the text/language group).
+func IsMultimodal(usecases ModelConfigUsecase) bool {
+	groupCount := 0
+	for _, group := range ModalityGroups {
+		if usecases&group != 0 {
+			groupCount++
+			if groupCount >= 2 {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
 	return map[string]ModelConfigUsecase{
 		// Note: FLAG_ANY is intentionally excluded from this map
@@ -657,7 +686,8 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
 		"FLAG_VAD":              FLAG_VAD,
 		"FLAG_LLM":              FLAG_LLM,
 		"FLAG_VIDEO":            FLAG_VIDEO,
-		"FLAG_DETECTION":        FLAG_DETECTION,
+		"FLAG_DETECTION":           FLAG_DETECTION,
+		"FLAG_VISION":              FLAG_VISION,
 		"FLAG_FACE_RECOGNITION":    FLAG_FACE_RECOGNITION,
 		"FLAG_SPEAKER_RECOGNITION": FLAG_SPEAKER_RECOGNITION,
 		"FLAG_AUDIO_TRANSFORM":     FLAG_AUDIO_TRANSFORM,
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -7,6 +7,8 @@ import (
 	"path/filepath"
 	"slices"
 	"strings"
+	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/lithammer/fuzzysearch/fuzzy"
@@ -92,6 +94,34 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
 	return filteredModels
 }

+// FilterGalleryModelsByUsecase returns models whose known_usecases include all
+// the bits set in usecase. For example, passing FLAG_CHAT matches any model
+// with the chat usecase; passing FLAG_CHAT|FLAG_VISION matches only models
+// that have both.
+func FilterGalleryModelsByUsecase(models GalleryElements[*GalleryModel], usecase config.ModelConfigUsecase) GalleryElements[*GalleryModel] {
+	var filtered GalleryElements[*GalleryModel]
+	for _, m := range models {
+		u := m.GetKnownUsecases()
+		if u != nil && (*u&usecase) == usecase {
+			filtered = append(filtered, m)
+		}
+	}
+	return filtered
+}
+
+// FilterGalleryModelsByMultimodal returns models whose known_usecases span two
+// or more orthogonal modality groups (e.g. chat+vision, tts+transcript).
+func FilterGalleryModelsByMultimodal(models GalleryElements[*GalleryModel]) GalleryElements[*GalleryModel] {
+	var filtered GalleryElements[*GalleryModel]
+	for _, m := range models {
+		u := m.GetKnownUsecases()
+		if u != nil && config.IsMultimodal(*u) {
+			filtered = append(filtered, m)
+		}
+	}
+	return filtered
+}
+
 func (gm GalleryElements[T]) FilterByTag(tag string) GalleryElements[T] {
 	var filtered GalleryElements[T]
 	for _, m := range gm {
@@ -267,6 +297,77 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst
 	return models, nil
 }

+var (
+	availableModelsMu    sync.RWMutex
+	availableModelsCache GalleryElements[*GalleryModel]
+	refreshing           atomic.Bool
+	galleryGeneration    atomic.Uint64
+)
+
+// GalleryGeneration returns a counter that increments each time the gallery
+// model list is refreshed from upstream. VRAM estimation caches use this to
+// invalidate entries when the gallery data changes.
+func GalleryGeneration() uint64 { return galleryGeneration.Load() }
+
+// AvailableGalleryModelsCached returns gallery models from an in-memory cache.
+// Local-only fields (installed status) are refreshed on every call. A background
+// goroutine is triggered to re-fetch the full model list (including network
+// calls) so subsequent requests pick up changes without blocking the caller.
+// The first call with an empty cache blocks until the initial load completes.
+func AvailableGalleryModelsCached(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryModel], error) {
+	availableModelsMu.RLock()
+	cached := availableModelsCache
+	availableModelsMu.RUnlock()
+
+	if cached != nil {
+		// Refresh installed status under write lock to avoid races with
+		// concurrent readers and the background refresh goroutine.
+		availableModelsMu.Lock()
+		for _, m := range cached {
+			_, err := os.Stat(filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", m.GetName())))
+			m.SetInstalled(err == nil)
+		}
+		availableModelsMu.Unlock()
+		// Trigger a background refresh if one is not already running.
+		triggerGalleryRefresh(galleries, systemState)
+		return cached, nil
+	}
+
+	// No cache yet — must do a blocking load.
+	models, err := AvailableGalleryModels(galleries, systemState)
+	if err != nil {
+		return nil, err
+	}
+
+	availableModelsMu.Lock()
+	availableModelsCache = models
+	galleryGeneration.Add(1)
+	availableModelsMu.Unlock()
+
+	return models, nil
+}
+
+// triggerGalleryRefresh starts a background goroutine that refreshes the
+// gallery model cache. Only one refresh runs at a time; concurrent calls
+// are no-ops.
+func triggerGalleryRefresh(galleries []config.Gallery, systemState *system.SystemState) {
+	if !refreshing.CompareAndSwap(false, true) {
+		return
+	}
+	go func() {
+		defer refreshing.Store(false)
+		models, err := AvailableGalleryModels(galleries, systemState)
+		if err != nil {
+			xlog.Error("background gallery refresh failed", "error", err)
+			return
+		}
+		availableModelsMu.Lock()
+		availableModelsCache = models
+		galleryGeneration.Add(1)
+		availableModelsMu.Unlock()
+	}()
+}
+
 // List available backends
 func AvailableBackends(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
 	return availableBackendsWithFilter(galleries, systemState, true)
--- a/core/gallery/gallery_test.go
+++ b/core/gallery/gallery_test.go
@@ -581,4 +581,42 @@ var _ = Describe("Gallery", func() {
 			Expect(mergedParams["model"]).To(Equal("nanbeige4.1-3b-q4_k_m.gguf"))
 		})
 	})
+
+	Describe("GetKnownUsecases", func() {
+		It("uses explicit known_usecases from overrides when present", func() {
+			m := &GalleryModel{
+				Metadata: Metadata{Backend: "stablediffusion-ggml"},
+				Overrides: map[string]any{
+					"known_usecases": []any{"chat"},
+				},
+			}
+			u := m.GetKnownUsecases()
+			Expect(u).NotTo(BeNil())
+			// Override wins over the backend's image default.
+			Expect(*u & config.FLAG_CHAT).To(Equal(config.FLAG_CHAT))
+			Expect(*u & config.FLAG_IMAGE).To(Equal(config.ModelConfigUsecase(0)))
+		})
+
+		It("falls back to backend defaults when no override is set", func() {
+			m := &GalleryModel{Metadata: Metadata{Backend: "stablediffusion-ggml"}}
+			u := m.GetKnownUsecases()
+			Expect(u).NotTo(BeNil())
+			Expect(*u & config.FLAG_IMAGE).To(Equal(config.FLAG_IMAGE))
+		})
+
+		It("returns nil when neither overrides nor a known backend provide usecases", func() {
+			m := &GalleryModel{}
+			Expect(m.GetKnownUsecases()).To(BeNil())
+		})
+
+		It("filters models without explicit known_usecases via backend defaults", func() {
+			models := GalleryElements[*GalleryModel]{
+				&GalleryModel{Metadata: Metadata{Name: "sd-model", Backend: "stablediffusion-ggml"}},
+				&GalleryModel{Metadata: Metadata{Name: "whisper-model", Backend: "whisper"}},
+			}
+			filtered := FilterGalleryModelsByUsecase(models, config.FLAG_IMAGE)
+			Expect(filtered).To(HaveLen(1))
+			Expect(filtered[0].Name).To(Equal("sd-model"))
+		})
+	})
 })
--- a/core/gallery/importers/diffuser.go
+++ b/core/gallery/importers/diffuser.go
@@ -97,7 +97,7 @@ func (i *DiffuserImporter) Import(details Details) (gallery.ModelConfig, error)
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
-		KnownUsecaseStrings: []string{"image"},
+		KnownUsecaseStrings: []string{config.UsecaseImage},
 		Backend:             backend,
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -135,7 +135,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
-		KnownUsecaseStrings: []string{"chat"},
+		KnownUsecaseStrings: []string{config.UsecaseChat},
 		Options:             []string{"use_jinja:true"},
 		Backend:             backend,
 		TemplateConfig: config.TemplateConfig{
--- a/core/gallery/importers/local.go
+++ b/core/gallery/importers/local.go
@@ -45,7 +45,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
 		cfg := &config.ModelConfig{
 			Name:                name,
 			Backend:             "llama-cpp",
-			KnownUsecaseStrings: []string{"chat"},
+			KnownUsecaseStrings: []string{config.UsecaseChat},
 			Options:             []string{"use_jinja:true"},
 		}
 		cfg.Model = relPath(ggufFile)
@@ -104,7 +104,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
 		cfg := &config.ModelConfig{
 			Name:                name,
 			Backend:             "transformers",
-			KnownUsecaseStrings: []string{"chat"},
+			KnownUsecaseStrings: []string{config.UsecaseChat},
 		}
 		cfg.Model = baseModel
 		cfg.TemplateConfig.UseTokenizerTemplate = true
@@ -120,7 +120,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
 		cfg := &config.ModelConfig{
 			Name:                name,
 			Backend:             "transformers",
-			KnownUsecaseStrings: []string{"chat"},
+			KnownUsecaseStrings: []string{config.UsecaseChat},
 		}
 		cfg.Model = baseModel
 		cfg.TemplateConfig.UseTokenizerTemplate = true
@@ -135,7 +135,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
 		cfg := &config.ModelConfig{
 			Name:                name,
 			Backend:             "transformers",
-			KnownUsecaseStrings: []string{"chat"},
+			KnownUsecaseStrings: []string{config.UsecaseChat},
 		}
 		cfg.Model = relPath(dirPath)
 		cfg.TemplateConfig.UseTokenizerTemplate = true
--- a/core/gallery/importers/mlx.go
+++ b/core/gallery/importers/mlx.go
@@ -73,7 +73,7 @@ func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
-		KnownUsecaseStrings: []string{"chat"},
+		KnownUsecaseStrings: []string{config.UsecaseChat},
 		Backend:             backend,
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
--- a/core/gallery/importers/transformers.go
+++ b/core/gallery/importers/transformers.go
@@ -87,7 +87,7 @@ func (i *TransformersImporter) Import(details Details) (gallery.ModelConfig, err
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
-		KnownUsecaseStrings: []string{"chat"},
+		KnownUsecaseStrings: []string{config.UsecaseChat},
 		Backend:             backend,
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
--- a/core/gallery/importers/vllm.go
+++ b/core/gallery/importers/vllm.go
@@ -77,7 +77,7 @@ func (i *VLLMImporter) Import(details Details) (gallery.ModelConfig, error) {
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
-		KnownUsecaseStrings: []string{"chat"},
+		KnownUsecaseStrings: []string{config.UsecaseChat},
 		Backend:             backend,
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
--- a/core/gallery/models_types.go
+++ b/core/gallery/models_types.go
@@ -52,3 +52,39 @@ func (m *GalleryModel) GetTags() []string {
 func (m *GalleryModel) GetDescription() string {
 	return m.Description
 }
+
+// GetKnownUsecases returns the usecase flags declared by the gallery entry,
+// falling back to the resolved backend's default usecases when the entry has
+// none of its own. Returns nil only when neither source provides any.
+//
+// Why the fallback: many gallery entries omit known_usecases because their
+// backend has only one sensible mode (e.g. stablediffusion-ggml is always
+// image generation). Without this fallback such models silently disappear
+// from usecase-based filtering in the UI.
+func (m *GalleryModel) GetKnownUsecases() *config.ModelConfigUsecase {
+	if strs := overrideUsecaseStrings(m.Overrides); len(strs) > 0 {
+		return config.GetUsecasesFromYAML(strs)
+	}
+	if defaults := config.DefaultUsecasesForBackendCap(m.Backend); len(defaults) > 0 {
+		return config.GetUsecasesFromYAML(defaults)
+	}
+	return nil
+}
+
+func overrideUsecaseStrings(overrides map[string]any) []string {
+	raw, ok := overrides["known_usecases"]
+	if !ok {
+		return nil
+	}
+	list, ok := raw.([]any)
+	if !ok {
+		return nil
+	}
+	strs := make([]string, 0, len(list))
+	for _, v := range list {
+		if s, ok := v.(string); ok {
+			strs = append(strs, s)
+		}
+	}
+	return strs
+}
--- a/core/http/endpoints/localai/config_meta.go
+++ b/core/http/endpoints/localai/config_meta.go
@@ -116,13 +116,13 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 			capability := strings.TrimPrefix(provider, "models:")
 			var filterFn config.ModelConfigFilterFn
 			switch capability {
-			case "chat":
+			case config.UsecaseChat:
 				filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
-			case "tts":
+			case config.UsecaseTTS:
 				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
-			case "vad":
+			case config.UsecaseVAD:
 				filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
-			case "transcript":
+			case config.UsecaseTranscript:
 				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
 			default:
 				filterFn = config.NoFilterFn
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -77,18 +77,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 			}
 			estCtx, cancel := context.WithTimeout(c.Request().Context(), 5*time.Second)
 			defer cancel()
-			result, err := vram.EstimateModel(estCtx, vram.ModelEstimateInput{
-				Files:   files,
-				Options: vram.EstimateOptions{ContextLength: 8192},
-			})
+			result, err := vram.EstimateModelMultiContext(estCtx, vram.ModelEstimateInput{
+				Files: files,
+			}, []uint32{8192})
 			if err == nil {
 				if result.SizeBytes > 0 {
 					resp.EstimatedSizeBytes = result.SizeBytes
 					resp.EstimatedSizeDisplay = result.SizeDisplay
 				}
-				if result.VRAMBytes > 0 {
-					resp.EstimatedVRAMBytes = result.VRAMBytes
-					resp.EstimatedVRAMDisplay = result.VRAMDisplay
+				if v := result.VRAMForContext(8192); v > 0 {
+					resp.EstimatedVRAMBytes = v
+					resp.EstimatedVRAMDisplay = vram.FormatBytes(v)
 				}
 			}
 		}
--- a/core/http/endpoints/localai/vram.go
+++ b/core/http/endpoints/localai/vram.go
@@ -9,10 +9,9 @@ import (
 )

 // VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
-// installed model configuration. For uninstalled models (gallery URLs), use
-// the gallery-level estimates in /api/models instead.
+// installed model configuration at multiple context sizes.
 // @Summary Estimate VRAM usage for a model
-// @Description Estimates VRAM based on model weight files, context size, and GPU layers
+// @Description Estimates VRAM based on model weight files at multiple context sizes
 // @Tags config
 // @Accept json
 // @Produce json
--- a/core/http/endpoints/localai/vram_test.go
+++ b/core/http/endpoints/localai/vram_test.go
@@ -121,13 +121,13 @@ var _ = Describe("VRAM Estimate Endpoint", func() {
 		Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
 		// The response should have non-zero size and vram estimates.
 		// JSON numbers unmarshal as float64.
-		sizeBytes, ok := resp["sizeBytes"].(float64)
-		Expect(ok).To(BeTrue(), "sizeBytes should be a number, got: %v (response: %s)", resp["sizeBytes"], rec.Body.String())
+		sizeBytes, ok := resp["size_bytes"].(float64)
+		Expect(ok).To(BeTrue(), "size_bytes should be a number, got: %v (response: %s)", resp["size_bytes"], rec.Body.String())
 		Expect(sizeBytes).To(BeNumerically(">", 0))
-		vramBytes, ok := resp["vramBytes"].(float64)
-		Expect(ok).To(BeTrue(), "vramBytes should be a number")
+		vramBytes, ok := resp["vram_bytes"].(float64)
+		Expect(ok).To(BeTrue(), "vram_bytes should be a number")
 		Expect(vramBytes).To(BeNumerically(">", 0))
-		Expect(resp["sizeDisplay"]).NotTo(BeEmpty())
-		Expect(resp["vramDisplay"]).NotTo(BeEmpty())
+		Expect(resp["size_display"]).NotTo(BeEmpty())
+		Expect(resp["vram_display"]).NotTo(BeEmpty())
 	})
 })
--- a/core/http/react-ui/e2e/models-gallery.spec.js
+++ b/core/http/react-ui/e2e/models-gallery.spec.js
@@ -2,13 +2,13 @@ import { test, expect } from '@playwright/test'

 const MOCK_MODELS_RESPONSE = {
  models: [
-    { name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['llm'] },
-    { name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['stt'] },
+    { name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['chat'] },
+    { name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['transcript'] },
    { name: 'stablediffusion-model', description: 'An image model', backend: 'stablediffusion', installed: false, tags: ['sd'] },
    { name: 'unknown-model', description: 'No backend', backend: '', installed: false, tags: [] },
  ],
  allBackends: ['llama-cpp', 'stablediffusion', 'whisper'],
-  allTags: ['llm', 'sd', 'stt'],
+  allTags: ['chat', 'sd', 'transcript'],
  availableModels: 4,
  installedModels: 1,
  totalPages: 1,
@@ -78,3 +78,121 @@ test.describe('Models Gallery - Backend Features', () => {
    await expect(detail.locator('text=llama-cpp')).toBeVisible()
  })
 })
+
+const BACKEND_USECASES_MOCK = {
+  'llama-cpp': ['chat', 'embeddings', 'vision'],
+  'whisper': ['transcript'],
+  'stablediffusion': ['image'],
+}
+
+test.describe('Models Gallery - Multi-select Filters', () => {
+  test.beforeEach(async ({ page }) => {
+    await page.route('**/api/models*', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_MODELS_RESPONSE),
+      })
+    })
+    await page.route('**/api/backends/usecases', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(BACKEND_USECASES_MOCK),
+      })
+    })
+    await page.goto('/app/models')
+    await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
+  })
+
+  test('multi-select toggle: click Chat, TTS, then Chat again', async ({ page }) => {
+    const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
+    const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
+
+    await chatBtn.click()
+    await expect(chatBtn).toHaveClass(/active/)
+
+    await ttsBtn.click()
+    await expect(chatBtn).toHaveClass(/active/)
+    await expect(ttsBtn).toHaveClass(/active/)
+
+    // Click Chat again to deselect it
+    await chatBtn.click()
+    await expect(chatBtn).not.toHaveClass(/active/)
+    await expect(ttsBtn).toHaveClass(/active/)
+  })
+
+  test('"All" clears selection', async ({ page }) => {
+    const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
+    const allBtn = page.locator('.filter-btn', { hasText: 'All' })
+
+    await chatBtn.click()
+    await expect(chatBtn).toHaveClass(/active/)
+
+    await allBtn.click()
+    await expect(allBtn).toHaveClass(/active/)
+    await expect(chatBtn).not.toHaveClass(/active/)
+  })
+
+  test('query param sent correctly with multiple filters', async ({ page }) => {
+    const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
+    const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
+
+    // Click Chat and wait for its request to settle
+    await chatBtn.click()
+    await page.waitForResponse(resp => resp.url().includes('/api/models'))
+
+    // Now click TTS and capture the resulting request
+    const [request] = await Promise.all([
+      page.waitForRequest(req => {
+        if (!req.url().includes('/api/models')) return false
+        const u = new URL(req.url())
+        const tag = u.searchParams.get('tag')
+        return tag && tag.split(',').length >= 2
+      }),
+      ttsBtn.click(),
+    ])
+
+    const url = new URL(request.url())
+    const tags = url.searchParams.get('tag').split(',').sort()
+    expect(tags).toEqual(['chat', 'tts'])
+  })
+
+  test('backend greys out unavailable filters', async ({ page }) => {
+    // Select llama-cpp backend via dropdown
+    await page.locator('button', { hasText: 'All Backends' }).click()
+    const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
+    await dropdown.locator('text=llama-cpp').click()
+
+    // Wait for filter state to update
+    const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
+    const sttBtn = page.locator('.filter-btn', { hasText: 'STT' })
+    const imageBtn = page.locator('.filter-btn', { hasText: 'Image' })
+
+    // TTS, STT, Image should be disabled for llama-cpp
+    await expect(ttsBtn).toBeDisabled()
+    await expect(sttBtn).toBeDisabled()
+    await expect(imageBtn).toBeDisabled()
+
+    // Chat, Embeddings, Vision should remain enabled
+    const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
+    const embBtn = page.locator('.filter-btn', { hasText: 'Embeddings' })
+    const visBtn = page.locator('.filter-btn', { hasText: 'Vision' })
+    await expect(chatBtn).toBeEnabled()
+    await expect(embBtn).toBeEnabled()
+    await expect(visBtn).toBeEnabled()
+  })
+
+  test('backend clears incompatible filters', async ({ page }) => {
+    // Select TTS filter first
+    const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
+    await ttsBtn.click()
+    await expect(ttsBtn).toHaveClass(/active/)
+
+    // Now select llama-cpp backend (which doesn't support TTS)
+    await page.locator('button', { hasText: 'All Backends' }).click()
+    const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
+    await dropdown.locator('text=llama-cpp').click()
+
+    // TTS should be auto-removed from selection
+    await expect(ttsBtn).not.toHaveClass(/active/)
+  })
+})
--- a/core/http/react-ui/public/locales/de/models.json
+++ b/core/http/react-ui/public/locales/de/models.json
@@ -20,6 +20,7 @@
    "vision": "Vision",
    "tts": "TTS",
    "stt": "STT",
+    "diarization": "Diarisierung",
    "embedding": "Embedding",
    "rerank": "Rerank",
    "allBackends": "Alle Backends",
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -14,14 +14,20 @@
  },
  "filters": {
    "all": "All",
-    "llm": "LLM",
+    "llm": "Chat",
    "image": "Image",
+    "video": "Video",
    "multimodal": "Multimodal",
    "vision": "Vision",
    "tts": "TTS",
    "stt": "STT",
-    "embedding": "Embedding",
+    "diarization": "Diarization",
+    "soundGen": "Sound",
+    "audioTransform": "Audio FX",
+    "embedding": "Embeddings",
    "rerank": "Rerank",
+    "detection": "Detection",
+    "vad": "VAD",
    "allBackends": "All Backends",
    "searchBackends": "Search backends..."
  },
--- a/core/http/react-ui/public/locales/es/models.json
+++ b/core/http/react-ui/public/locales/es/models.json
@@ -20,6 +20,7 @@
    "vision": "Visión",
    "tts": "TTS",
    "stt": "STT",
+    "diarization": "Diarización",
    "embedding": "Embedding",
    "rerank": "Rerank",
    "allBackends": "Todos los backends",
--- a/core/http/react-ui/public/locales/it/models.json
+++ b/core/http/react-ui/public/locales/it/models.json
@@ -20,6 +20,7 @@
    "vision": "Visione",
    "tts": "TTS",
    "stt": "STT",
+    "diarization": "Diarizzazione",
    "embedding": "Embedding",
    "rerank": "Rerank",
    "allBackends": "Tutti i backend",
--- a/core/http/react-ui/public/locales/zh-CN/models.json
+++ b/core/http/react-ui/public/locales/zh-CN/models.json
@@ -20,6 +20,7 @@
    "vision": "视觉",
    "tts": "TTS",
    "stt": "STT",
+    "diarization": "说话人分离",
    "embedding": "嵌入",
    "rerank": "重排",
    "allBackends": "所有后端",
--- a/core/http/react-ui/src/pages/Backends.jsx
+++ b/core/http/react-ui/src/pages/Backends.jsx
@@ -296,11 +296,11 @@ export default function Backends() {

  const FILTERS = [
    { key: '', label: 'All', icon: 'fa-layer-group' },
-    { key: 'llm', label: 'LLM', icon: 'fa-brain' },
+    { key: 'chat', label: 'Chat', icon: 'fa-brain' },
    { key: 'image', label: 'Image', icon: 'fa-image' },
    { key: 'video', label: 'Video', icon: 'fa-video' },
    { key: 'tts', label: 'TTS', icon: 'fa-microphone' },
-    { key: 'stt', label: 'STT', icon: 'fa-headphones' },
+    { key: 'transcript', label: 'STT', icon: 'fa-headphones' },
    { key: 'vision', label: 'Vision', icon: 'fa-eye' },
  ]

--- a/core/http/react-ui/src/pages/Models.jsx
+++ b/core/http/react-ui/src/pages/Models.jsx
@@ -11,16 +11,26 @@ import GalleryLoader from '../components/GalleryLoader'
 import React from 'react'


+const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
+const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
+
+
 const FILTERS = [
  { key: '', labelKey: 'filters.all', icon: 'fa-layer-group' },
-  { key: 'llm', labelKey: 'filters.llm', icon: 'fa-brain' },
-  { key: 'sd', labelKey: 'filters.image', icon: 'fa-image' },
+  { key: 'chat', labelKey: 'filters.llm', icon: 'fa-brain' },
+  { key: 'image', labelKey: 'filters.image', icon: 'fa-image' },
+  { key: 'video', labelKey: 'filters.video', icon: 'fa-video' },
  { key: 'multimodal', labelKey: 'filters.multimodal', icon: 'fa-shapes' },
  { key: 'vision', labelKey: 'filters.vision', icon: 'fa-eye' },
  { key: 'tts', labelKey: 'filters.tts', icon: 'fa-microphone' },
-  { key: 'stt', labelKey: 'filters.stt', icon: 'fa-headphones' },
-  { key: 'embedding', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
-  { key: 'reranker', labelKey: 'filters.rerank', icon: 'fa-sort' },
+  { key: 'transcript', labelKey: 'filters.stt', icon: 'fa-headphones' },
+  { key: 'diarization', labelKey: 'filters.diarization', icon: 'fa-users' },
+  { key: 'sound_generation', labelKey: 'filters.soundGen', icon: 'fa-music' },
+  { key: 'audio_transform', labelKey: 'filters.audioTransform', icon: 'fa-sliders' },
+  { key: 'embeddings', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
+  { key: 'rerank', labelKey: 'filters.rerank', icon: 'fa-sort' },
+  { key: 'detection', labelKey: 'filters.detection', icon: 'fa-bullseye' },
+  { key: 'vad', labelKey: 'filters.vad', icon: 'fa-wave-square' },
 ]

 export default function Models() {
@@ -34,7 +44,7 @@ export default function Models() {
  const [page, setPage] = useState(1)
  const [totalPages, setTotalPages] = useState(1)
  const [search, setSearch] = useState('')
-  const [filter, setFilter] = useState('')
+  const [filters, setFilters] = useState([])
  const [sort, setSort] = useState('')
  const [order, setOrder] = useState('asc')
  const [installing, setInstalling] = useState(new Map())
@@ -43,6 +53,9 @@ export default function Models() {
  const [stats, setStats] = useState({ total: 0, installed: 0, repositories: 0 })
  const [backendFilter, setBackendFilter] = useState('')
  const [allBackends, setAllBackends] = useState([])
+  const [backendUsecases, setBackendUsecases] = useState({})
+  const [estimates, setEstimates] = useState({})
+  const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
  const [confirmDialog, setConfirmDialog] = useState(null)

  // Total GPU memory for "fits" check
@@ -52,14 +65,14 @@ export default function Models() {
    try {
      setLoading(true)
      const searchVal = params.search !== undefined ? params.search : search
-      const filterVal = params.filter !== undefined ? params.filter : filter
+      const filtersVal = params.filters !== undefined ? params.filters : filters
      const sortVal = params.sort !== undefined ? params.sort : sort
      const backendVal = params.backendFilter !== undefined ? params.backendFilter : backendFilter
      const queryParams = {
        page: params.page || page,
        items: 9,
      }
-      if (filterVal) queryParams.tag = filterVal
+      if (filtersVal.length > 0) queryParams.tag = filtersVal.join(',')
      if (searchVal) queryParams.term = searchVal
      if (backendVal) queryParams.backend = backendVal
      if (sortVal) {
@@ -79,11 +92,27 @@ export default function Models() {
    } finally {
      setLoading(false)
    }
-  }, [page, search, filter, sort, order, backendFilter, addToast, t])
+  }, [page, search, filters, sort, order, backendFilter, addToast, t])

  useEffect(() => {
    fetchModels()
-  }, [page, filter, sort, order, backendFilter])
+  }, [page, filters, sort, order, backendFilter])
+
+  // Fetch backend→usecase mapping once on mount
+  useEffect(() => {
+    modelsApi.backendUsecases().then(setBackendUsecases).catch(() => {})
+  }, [])
+
+  // When backend changes, remove selected filters that aren't available
+  useEffect(() => {
+    if (backendFilter && backendUsecases[backendFilter]) {
+      setFilters(prev => {
+        const possible = backendUsecases[backendFilter]
+        const filtered = prev.filter(k => k === 'multimodal' || possible.includes(k))
+        return filtered.length !== prev.length ? filtered : prev
+      })
+    }
+  }, [backendFilter, backendUsecases])

  // Re-fetch when operations change (install/delete completion)
  useEffect(() => {
@@ -95,11 +124,42 @@ export default function Models() {
    fetchModels({ search: value, page: 1 })
  })

+  // Fetch VRAM/size estimates asynchronously for visible models.
+  useEffect(() => {
+    if (models.length === 0) return
+    let cancelled = false
+    models.forEach(model => {
+      const id = model.name || model.id
+      if (estimates[id]) return
+      modelsApi.estimate(id, CONTEXT_SIZES).then(est => {
+        if (cancelled) return
+        if (est && (est.sizeBytes || est.estimates)) {
+          setEstimates(prev => ({ ...prev, [id]: est }))
+        }
+      }).catch(() => {})
+    })
+    return () => { cancelled = true }
+  }, [models])
+
  const handleSearch = (value) => {
    setSearch(value)
    debouncedFetch(value)
  }

+  const toggleFilter = (key) => {
+    if (key === '') { setFilters([]); setPage(1); return }
+    setFilters(prev =>
+      prev.includes(key) ? prev.filter(k => k !== key) : [...prev, key]
+    )
+    setPage(1)
+  }
+
+  const isFilterAvailable = (key) => {
+    if (!backendFilter || key === '' || key === 'multimodal') return true
+    const possible = backendUsecases[backendFilter]
+    return !possible || possible.includes(key)
+  }
+
  const handleSort = (col) => {
    if (sort === col) {
      setOrder(o => o === 'asc' ? 'desc' : 'asc')
@@ -221,16 +281,23 @@ export default function Models() {

      {/* Filter buttons */}
      <div className="filter-bar">
-        {FILTERS.map(f => (
-          <button
-            key={f.key}
-            className={`filter-btn ${filter === f.key ? 'active' : ''}`}
-            onClick={() => { setFilter(f.key); setPage(1) }}
-          >
-            <i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
-            {t(f.labelKey)}
-          </button>
-        ))}
+        {FILTERS.map(f => {
+          const isAll = f.key === ''
+          const active = isAll ? filters.length === 0 : filters.includes(f.key)
+          const available = isFilterAvailable(f.key)
+          return (
+            <button
+              key={f.key}
+              className={`filter-btn ${active ? 'active' : ''}`}
+              disabled={!available}
+              style={!available ? { opacity: 0.4, cursor: 'not-allowed' } : undefined}
+              onClick={() => toggleFilter(f.key)}
+            >
+              <i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
+              {t(f.labelKey)}
+            </button>
+          )
+        })}
        {allBackends.length > 0 && (
          <SearchableSelect
            value={backendFilter}
@@ -244,6 +311,25 @@ export default function Models() {
        )}
      </div>

+      {/* Context size slider for VRAM estimates */}
+      <div style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)', fontSize: '0.8125rem' }}>
+        <label style={{ color: 'var(--color-text-muted)', whiteSpace: 'nowrap' }}>
+          <i className="fas fa-memory" style={{ marginRight: 4 }} />
+          Context:
+        </label>
+        <input
+          type="range"
+          min={0}
+          max={CONTEXT_SIZES.length - 1}
+          value={CONTEXT_SIZES.indexOf(contextSize)}
+          onChange={(e) => setContextSize(CONTEXT_SIZES[e.target.value])}
+          style={{ width: 140, accentColor: 'var(--color-primary)' }}
+        />
+        <span style={{ fontWeight: 600, minWidth: '3em' }}>
+          {CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
+        </span>
+      </div>
+
      {/* Table */}
      {loading ? (
        <GalleryLoader />
@@ -252,12 +338,12 @@ export default function Models() {
          <div className="empty-state-icon"><i className="fas fa-search" /></div>
          <h2 className="empty-state-title">{t('empty.title')}</h2>
          <p className="empty-state-text">
-            {search || filter || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
+            {search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
          </p>
-          {(search || filter || backendFilter) && (
+          {(search || filters.length > 0 || backendFilter) && (
            <button
              className="btn btn-secondary btn-sm"
-              onClick={() => { handleSearch(''); setFilter(''); setBackendFilter(''); setPage(1) }}
+              onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
            >
              <i className="fas fa-times" /> {t('search.clearFilters')}
            </button>
@@ -286,9 +372,14 @@ export default function Models() {
              <tbody>
                {models.map((model, idx) => {
                  const name = model.name || model.id
+                  const estData = estimates[name]
+                  const sizeDisplay = estData?.sizeDisplay
+                  const ctxEst = estData?.estimates?.[String(contextSize)]
+                  const vramDisplay = ctxEst?.vramDisplay
+                  const vramBytes = ctxEst?.vramBytes
                  const installing = isInstalling(name)
                  const progress = getOperationProgress(name)
-                  const fit = fitsGpu(model.estimated_vram_bytes)
+                  const fit = fitsGpu(vramBytes)
                  const isExpanded = expandedRow === idx

                  return (
@@ -355,15 +446,15 @@ export default function Models() {
                      {/* Size / VRAM */}
                      <td>
                        <div style={{ display: 'flex', flexDirection: 'column', gap: '2px' }}>
-                          {(model.estimated_size_display || model.estimated_vram_display) ? (
+                          {(sizeDisplay || vramDisplay) ? (
                            <>
                              <span style={{ fontSize: '0.75rem', color: 'var(--color-text-secondary)' }}>
-                                {model.estimated_size_display && model.estimated_size_display !== '0 B' && (
-                                  <span>{t('table.size', { size: model.estimated_size_display })}</span>
+                                {sizeDisplay && sizeDisplay !== '0 B' && (
+                                  <span>{t('table.size', { size: sizeDisplay })}</span>
                                )}
-                                {model.estimated_size_display && model.estimated_size_display !== '0 B' && model.estimated_vram_display && model.estimated_vram_display !== '0 B' && ' · '}
-                                {model.estimated_vram_display && model.estimated_vram_display !== '0 B' && (
-                                  <span>{t('table.vram', { vram: model.estimated_vram_display })}</span>
+                                {sizeDisplay && sizeDisplay !== '0 B' && vramDisplay && vramDisplay !== '0 B' && ' · '}
+                                {vramDisplay && vramDisplay !== '0 B' && (
+                                  <span>{t('table.vram', { vram: vramDisplay })}</span>
                                )}
                              </span>
                              {fit !== null && (
@@ -437,7 +528,7 @@ export default function Models() {
                    {isExpanded && (
                      <tr>
                        <td colSpan="8" style={{ padding: 0 }}>
-                          <ModelDetail model={model} fit={fit} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
+                          <ModelDetail model={model} fit={fit} sizeDisplay={sizeDisplay} vramDisplay={vramDisplay} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
                        </td>
                      </tr>
                    )}
@@ -490,7 +581,7 @@ function DetailRow({ label, children }) {
  )
 }

-function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
+function ModelDetail({ model, fit, sizeDisplay, vramDisplay, expandedFiles, setExpandedFiles, t }) {
  const files = model.additionalFiles || model.files || []
  return (
    <div style={{ padding: 'var(--spacing-md) var(--spacing-lg)', background: 'var(--color-bg-primary)', borderTop: '1px solid var(--color-border-subtle)' }}>
@@ -516,12 +607,12 @@ function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
            )}
          </DetailRow>
          <DetailRow label={t('detail.size')}>
-            {model.estimated_size_display && model.estimated_size_display !== '0 B' ? model.estimated_size_display : null}
+            {sizeDisplay && sizeDisplay !== '0 B' ? sizeDisplay : null}
          </DetailRow>
          <DetailRow label={t('detail.vram')}>
-            {model.estimated_vram_display && model.estimated_vram_display !== '0 B' ? (
+            {vramDisplay && vramDisplay !== '0 B' ? (
              <span style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)' }}>
-                {model.estimated_vram_display}
+                {vramDisplay}
                {fit !== null && (
                  <span style={{ fontSize: '0.75rem', color: fit ? 'var(--color-success)' : 'var(--color-error)' }}>
                    <i className="fas fa-microchip" /> {fit ? t('detail.fitsGpu') : t('detail.mayNotFitGpu')}
--- a/core/http/react-ui/src/utils/api.js
+++ b/core/http/react-ui/src/utils/api.js
@@ -86,6 +86,10 @@ export const modelsApi = {
  listCapabilities: () => fetchJSON(API_CONFIG.endpoints.modelsCapabilities),
  install: (id) => postJSON(API_CONFIG.endpoints.installModel(id), {}),
  delete: (id) => postJSON(API_CONFIG.endpoints.deleteModel(id), {}),
+  estimate: (id, contexts) => fetchJSON(
+    buildUrl(API_CONFIG.endpoints.modelEstimate(id),
+      contexts?.length ? { contexts: contexts.join(',') } : {})
+  ),
  getConfig: (id) => postJSON(API_CONFIG.endpoints.modelConfig(id), {}),
  getConfigJson: (name) => fetchJSON(API_CONFIG.endpoints.modelConfigJson(name)),
  getJob: (uid) => fetchJSON(API_CONFIG.endpoints.modelJob(uid)),
@@ -116,6 +120,7 @@ export const modelsApi = {
    method: 'PATCH',
    body: JSON.stringify(patch),
  }),
+  backendUsecases: () => fetchJSON('/api/backends/usecases'),
 }

 // Backends API
--- a/core/http/react-ui/src/utils/config.js
+++ b/core/http/react-ui/src/utils/config.js
@@ -9,6 +9,7 @@ export const API_CONFIG = {
    models: '/api/models',
    installModel: (id) => `/api/models/install/${id}`,
    deleteModel: (id) => `/api/models/delete/${id}`,
+    modelEstimate: (id) => `/api/models/estimate/${id}`,
    modelConfig: (id) => `/api/models/config/${id}`,
    modelConfigJson: (name) => `/api/models/config-json/${name}`,
    configMetadata: '/api/models/config-metadata',
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -9,11 +9,9 @@ import (
 	"math"
 	"net/http"
 	"net/url"
-	"path"
 	"slices"
 	"strconv"
 	"strings"
-	"sync"
 	"time"

 	"github.com/google/uuid"
@@ -37,8 +35,81 @@ const (
 	licenseSortFieldName    = "license"
 	statusSortFieldName     = "status"
 	ascSortOrder            = "asc"
+	multimodalFilterKey     = "multimodal"
 )

+// usecaseFilters maps UI filter keys to ModelConfigUsecase flags for
+// capability-based gallery filtering.
+var usecaseFilters = map[string]config.ModelConfigUsecase{
+	config.UsecaseChat:            config.FLAG_CHAT,
+	config.UsecaseImage:           config.FLAG_IMAGE,
+	config.UsecaseVideo:           config.FLAG_VIDEO,
+	config.UsecaseVision:          config.FLAG_VISION,
+	config.UsecaseTTS:             config.FLAG_TTS,
+	config.UsecaseTranscript:      config.FLAG_TRANSCRIPT,
+	config.UsecaseSoundGeneration: config.FLAG_SOUND_GENERATION,
+	config.UsecaseEmbeddings:      config.FLAG_EMBEDDINGS,
+	config.UsecaseRerank:          config.FLAG_RERANK,
+	config.UsecaseDetection:       config.FLAG_DETECTION,
+	config.UsecaseVAD:             config.FLAG_VAD,
+	config.UsecaseAudioTransform:  config.FLAG_AUDIO_TRANSFORM,
+	config.UsecaseDiarization:     config.FLAG_DIARIZATION,
+}
+
+
+// extractHFRepo tries to find a HuggingFace repo ID from model overrides or URLs.
+func extractHFRepo(overrides map[string]any, urls []string) string {
+	if overrides != nil {
+		if params, ok := overrides["parameters"].(map[string]any); ok {
+			if modelRef, ok := params["model"].(string); ok {
+				if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
+					return repoID
+				}
+			}
+		}
+	}
+	for _, u := range urls {
+		if repoID, ok := vram.ExtractHFRepoID(u); ok {
+			return repoID
+		}
+	}
+	return ""
+}
+
+// buildEstimateInput creates a vram.ModelEstimateInput from gallery model metadata.
+func buildEstimateInput(m *gallery.GalleryModel) vram.ModelEstimateInput {
+	var input vram.ModelEstimateInput
+	input.Size = m.Size
+	if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
+		input.HFRepo = hfRepoID
+	}
+	for _, f := range m.AdditionalFiles {
+		if vram.IsWeightFile(f.URI) {
+			input.Files = append(input.Files, vram.FileInput{URI: f.URI, Size: 0})
+		}
+	}
+	return input
+}
+
+// parseContextSizes parses a comma-separated list of context sizes from a query param.
+// Returns a default of [8192] if the param is empty or unparseable.
+func parseContextSizes(raw string) []uint32 {
+	if raw == "" {
+		return []uint32{8192}
+	}
+	var sizes []uint32
+	for _, s := range strings.Split(raw, ",") {
+		s = strings.TrimSpace(s)
+		if v, err := strconv.ParseUint(s, 10, 32); err == nil && v > 0 {
+			sizes = append(sizes, uint32(v))
+		}
+	}
+	if len(sizes) == 0 {
+		return []uint32{8192}
+	}
+	return sizes
+}
+
 // getDirectorySize calculates the total size of files in a directory
 // metaParentOf returns the name of the auto-resolving (meta) backend that
 // declares `name` as one of its hardware-specific variants in its
@@ -260,7 +331,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 			items = "9"
 		}

-		models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
+		models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
 		if err != nil {
 			xlog.Error("could not list models from galleries", "error", err)
 			return c.JSON(http.StatusInternalServerError, map[string]any{
@@ -294,8 +365,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		}
 		slices.Sort(backendNames)

+		// Filter by usecase tags (comma-separated for multi-select).
 		if tag != "" {
-			models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(tag)
+			var combinedFlag config.ModelConfigUsecase
+			hasMultimodal := false
+			var plainTags []string
+			for _, t := range strings.Split(tag, ",") {
+				t = strings.TrimSpace(t)
+				if t == multimodalFilterKey {
+					hasMultimodal = true
+				} else if flag, ok := usecaseFilters[t]; ok {
+					combinedFlag |= flag
+				} else if t != "" {
+					plainTags = append(plainTags, t)
+				}
+			}
+			if hasMultimodal {
+				models = gallery.FilterGalleryModelsByMultimodal(models)
+			}
+			if combinedFlag != config.FLAG_ANY {
+				models = gallery.FilterGalleryModelsByUsecase(models, combinedFlag)
+			}
+			for _, pt := range plainTags {
+				models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(pt)
+			}
 		}
 		if term != "" {
 			models = gallery.GalleryElements[*gallery.GalleryModel](models).Search(term)
@@ -355,41 +448,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		modelsJSON := make([]map[string]any, 0, len(models))
 		seenIDs := make(map[string]bool)

-		weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
-		extractHFRepo := func(overrides map[string]any, urls []string) string {
-			// Try overrides.parameters.model first
-			if overrides != nil {
-				if params, ok := overrides["parameters"].(map[string]any); ok {
-					if modelRef, ok := params["model"].(string); ok {
-						if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
-							return repoID
-						}
-					}
-				}
-			}
-			// Fall back to the first HuggingFace URL in the metadata urls list
-			for _, u := range urls {
-				if repoID, ok := vram.ExtractHFRepoID(u); ok {
-					return repoID
-				}
-			}
-			return ""
-		}
-		hasWeightFiles := func(files []gallery.File) bool {
-			for _, f := range files {
-				ext := strings.ToLower(path.Ext(path.Base(f.URI)))
-				if weightExts[ext] {
-					return true
-				}
-			}
-			return false
-		}
-
-		const hfEstimateTimeout = 10 * time.Second
-		const estimateConcurrency = 3
-		sem := make(chan struct{}, estimateConcurrency)
-		var wg sync.WaitGroup
-
 		for _, m := range models {
 			modelID := m.ID()

@@ -431,63 +489,9 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 				"backend":         m.Backend,
 			}

-			// Build EstimateModel input from available metadata
-			var estimateInput vram.ModelEstimateInput
-			estimateInput.Options = vram.EstimateOptions{ContextLength: 8192}
-			estimateInput.Size = m.Size
-			if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
-				estimateInput.HFRepo = hfRepoID
-			}
-
-			if hasWeightFiles(m.AdditionalFiles) {
-				files := make([]gallery.File, len(m.AdditionalFiles))
-				copy(files, m.AdditionalFiles)
-				for _, f := range files {
-					ext := strings.ToLower(path.Ext(path.Base(f.URI)))
-					if weightExts[ext] {
-						estimateInput.Files = append(estimateInput.Files, vram.FileInput{URI: f.URI, Size: 0})
-					}
-				}
-			}
-
-			// Run estimation (async for file-based and HF repo, sync for size string only)
-			needsAsync := len(estimateInput.Files) > 0 || estimateInput.HFRepo != ""
-			if needsAsync {
-				input := estimateInput
-				wg.Go(func() {
-					sem <- struct{}{}
-					defer func() { <-sem }()
-					ctx, cancel := context.WithTimeout(context.Background(), hfEstimateTimeout)
-					defer cancel()
-					result, err := vram.EstimateModel(ctx, input)
-					if err == nil {
-						if result.SizeBytes > 0 {
-							obj["estimated_size_bytes"] = result.SizeBytes
-							obj["estimated_size_display"] = result.SizeDisplay
-						}
-						if result.VRAMBytes > 0 {
-							obj["estimated_vram_bytes"] = result.VRAMBytes
-							obj["estimated_vram_display"] = result.VRAMDisplay
-						}
-					}
-				})
-			} else if estimateInput.Size != "" {
-				result, _ := vram.EstimateModel(context.Background(), estimateInput)
-				if result.SizeBytes > 0 {
-					obj["estimated_size_bytes"] = result.SizeBytes
-					obj["estimated_size_display"] = result.SizeDisplay
-				}
-				if result.VRAMBytes > 0 {
-					obj["estimated_vram_bytes"] = result.VRAMBytes
-					obj["estimated_vram_display"] = result.VRAMDisplay
-				}
-			}
-
 			modelsJSON = append(modelsJSON, obj)
 		}

-		wg.Wait()
-
 		prevPage := pageNum - 1
 		nextPage := pageNum + 1
 		if prevPage < 1 {
@@ -639,6 +643,65 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		})
 	})

+	// Returns a mapping of backend names to the usecase filter keys they support.
+	// Used by the gallery frontend to grey out usecase filter buttons when a
+	// backend is selected.
+	app.GET("/api/backends/usecases", func(c echo.Context) error {
+		result := make(map[string][]string, len(config.BackendCapabilities))
+		for name, cap := range config.BackendCapabilities {
+			var keys []string
+			for _, uc := range cap.PossibleUsecases {
+				if _, ok := usecaseFilters[uc]; ok {
+					keys = append(keys, uc)
+				}
+			}
+			slices.Sort(keys)
+			result[name] = keys
+		}
+
+		return c.JSON(200, result)
+	}, adminMiddleware)
+
+	// Returns VRAM/size estimates for a single gallery model at multiple
+	// context sizes. The frontend calls this per-model so the gallery page
+	// can load instantly and fill in estimates asynchronously.
+	// Query params:
+	//   contexts - comma-separated context sizes (default: 8192)
+	app.GET("/api/models/estimate/:id", func(c echo.Context) error {
+		modelID, err := url.QueryUnescape(c.Param("id"))
+		if err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid model ID"})
+		}
+
+		contextSizes := parseContextSizes(c.QueryParam("contexts"))
+
+		// Look up the model from the gallery to build the estimate input.
+		models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
+		}
+
+		model := gallery.FindGalleryElement(models, modelID)
+		if model == nil {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "model not found"})
+		}
+
+		input := buildEstimateInput(model)
+		if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
+			return c.JSON(200, vram.MultiContextEstimate{})
+		}
+
+		ctx, cancel := context.WithTimeout(c.Request().Context(), 10*time.Second)
+		defer cancel()
+		result, err := vram.EstimateModelMultiContext(ctx, input, contextSizes)
+		if err != nil {
+			xlog.Debug("model estimate failed", "model", modelID, "error", err)
+			return c.JSON(200, vram.MultiContextEstimate{})
+		}
+
+		return c.JSON(200, result)
+	}, adminMiddleware)
+
 	app.POST("/api/models/install/:id", func(c echo.Context) error {
 		galleryID := c.Param("id")
 		// URL decode the gallery ID (e.g., "localai%40model" -> "localai@model")
@@ -742,7 +805,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		}
 		xlog.Debug("API job submitted to get config", "galleryID", galleryID)

-		models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
+		models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
 		if err != nil {
 			return c.JSON(http.StatusInternalServerError, map[string]any{
 				"error": err.Error(),
--- a/core/services/modeladmin/vram.go
+++ b/core/services/modeladmin/vram.go
@@ -43,17 +43,16 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
 	modelsPath := sysState.Model.ModelsPath

 	var files []vram.FileInput
-	var firstGGUF string
 	seen := make(map[string]bool)

 	for _, f := range cfg.DownloadFiles {
-		addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen)
+		addWeightFile(string(f.URI), modelsPath, &files, seen)
 	}
 	if cfg.Model != "" {
-		addWeightFile(cfg.Model, modelsPath, &files, &firstGGUF, seen)
+		addWeightFile(cfg.Model, modelsPath, &files, seen)
 	}
 	if cfg.MMProj != "" {
-		addWeightFile(cfg.MMProj, modelsPath, &files, &firstGGUF, seen)
+		addWeightFile(cfg.MMProj, modelsPath, &files, seen)
 	}

 	if len(files) == 0 {
@@ -64,39 +63,46 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
 	}

 	contextDefaulted := false
-	opts := vram.EstimateOptions{
-		ContextLength: req.ContextSize,
-		GPULayers:     req.GPULayers,
-		KVQuantBits:   req.KVQuantBits,
-	}
-	if opts.ContextLength == 0 {
+	ctxLen := req.ContextSize
+	if ctxLen == 0 {
 		if cfg.ContextSize != nil {
-			opts.ContextLength = uint32(*cfg.ContextSize)
+			ctxLen = uint32(*cfg.ContextSize)
 		} else {
-			opts.ContextLength = 8192
+			ctxLen = 8192
 			contextDefaulted = true
 		}
 	}

+	opts := vram.EstimateOptions{
+		GPULayers:   req.GPULayers,
+		KVQuantBits: req.KVQuantBits,
+	}
+
 	subCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
 	defer cancel()

-	result, err := vram.Estimate(subCtx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
+	multi, err := vram.EstimateMultiContext(subCtx, files, []uint32{ctxLen}, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
 	if err != nil {
 		return nil, fmt.Errorf("vram estimate: %w", err)
 	}

-	resp := &VRAMResponse{EstimateResult: result}
+	at := multi.Estimates[fmt.Sprint(ctxLen)]
+	resp := &VRAMResponse{
+		EstimateResult: vram.EstimateResult{
+			SizeBytes:     multi.SizeBytes,
+			SizeDisplay:   multi.SizeDisplay,
+			ContextLength: at.ContextLength,
+			VRAMBytes:     at.VRAMBytes,
+			VRAMDisplay:   at.VRAMDisplay,
+		},
+		ModelMaxContext: multi.ModelMaxContext,
+	}

-	if contextDefaulted && firstGGUF != "" {
-		ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(subCtx, firstGGUF)
-		if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
-			resp.ModelMaxContext = ggufMeta.MaximumContextLength
-			resp.ContextNote = fmt.Sprintf(
-				"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
-				ggufMeta.MaximumContextLength,
-			)
-		}
+	if contextDefaulted && multi.ModelMaxContext > 0 {
+		resp.ContextNote = fmt.Sprintf(
+			"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
+			multi.ModelMaxContext,
+		)
 	}
 	return resp, nil
 }
@@ -111,8 +117,8 @@ func resolveModelURI(uri, modelsPath string) string {
 	return "file://" + filepath.Join(modelsPath, uri)
 }

-// addWeightFile appends a resolved weight file to files and tracks the first GGUF.
-func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) {
+// addWeightFile appends a resolved weight file to files.
+func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, seen map[string]bool) {
 	if !vram.IsWeightFile(uri) {
 		return
 	}
@@ -122,7 +128,4 @@ func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *s
 	}
 	seen[resolved] = true
 	*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
-	if *firstGGUF == "" && vram.IsGGUF(uri) {
-		*firstGGUF = resolved
-	}
 }
--- a/core/services/nodes/router.go
+++ b/core/services/nodes/router.go
@@ -628,10 +628,14 @@ func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptio
 	estCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
 	defer cancel()

+	ctxSize := uint32(opts.ContextSize)
+	if ctxSize == 0 {
+		ctxSize = 8192
+	}
+
 	input := vram.ModelEstimateInput{
 		Options: vram.EstimateOptions{
-			ContextLength: uint32(opts.ContextSize),
-			GPULayers:     int(opts.NGPULayers),
+			GPULayers: int(opts.NGPULayers),
 		},
 	}

@@ -649,28 +653,15 @@ func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptio
 		}
 	}

-	// If model file exists, get its size as fallback
-	if opts.ModelFile != "" && len(input.Files) == 0 {
-		if info, err := os.Stat(opts.ModelFile); err == nil {
-			return vram.EstimateFromSize(uint64(info.Size())).VRAMBytes
-		}
-	}
-
 	if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
 		return 0
 	}

-	result, err := vram.EstimateModel(estCtx, input)
-	if err != nil || result.VRAMBytes == 0 {
-		// Last resort: try model file size
-		if opts.ModelFile != "" {
-			if info, statErr := os.Stat(opts.ModelFile); statErr == nil {
-				return vram.EstimateFromSize(uint64(info.Size())).VRAMBytes
-			}
-		}
+	result, err := vram.EstimateModelMultiContext(estCtx, input, []uint32{ctxSize})
+	if err != nil {
 		return 0
 	}
-	return result.VRAMBytes
+	return result.VRAMForContext(ctxSize)
 }

 // installBackendOnNode sends a NATS backend.install request-reply to the node.