mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 04:56:52 -04:00
feat(gallery): Speed up load times and clean gallery entries (#9211)
* feat: Rework VRAM estimation and use known_usecases in gallery Signed-off-by: Richard Palethorpe <io@richiejp.com> Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code] * chore(gallery): regenerate gallery index and add known_usecases to model entries Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
6d56bf98fe
commit
969005b2a1
@@ -17,6 +17,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services/jobs"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/storage"
|
||||
"github.com/mudler/LocalAI/pkg/vram"
|
||||
coreStartup "github.com/mudler/LocalAI/core/startup"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
|
||||
@@ -251,6 +252,10 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
go uc.Run(options.Context)
|
||||
}
|
||||
|
||||
// Wire gallery generation counter into VRAM caches so they invalidate
|
||||
// when gallery data refreshes instead of using a fixed TTL.
|
||||
vram.SetGalleryGenerationFunc(gallery.GalleryGeneration)
|
||||
|
||||
if options.ConfigFile != "" {
|
||||
if err := application.ModelConfigLoader().LoadMultipleModelConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
|
||||
xlog.Error("error loading config file", "error", err)
|
||||
|
||||
480
core/config/backend_capabilities.go
Normal file
480
core/config/backend_capabilities.go
Normal file
@@ -0,0 +1,480 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Usecase name constants — the canonical string values used in gallery entries,
|
||||
// model configs (known_usecases), and UsecaseInfoMap keys.
|
||||
const (
|
||||
UsecaseChat = "chat"
|
||||
UsecaseCompletion = "completion"
|
||||
UsecaseEdit = "edit"
|
||||
UsecaseVision = "vision"
|
||||
UsecaseEmbeddings = "embeddings"
|
||||
UsecaseTokenize = "tokenize"
|
||||
UsecaseImage = "image"
|
||||
UsecaseVideo = "video"
|
||||
UsecaseTranscript = "transcript"
|
||||
UsecaseTTS = "tts"
|
||||
UsecaseSoundGeneration = "sound_generation"
|
||||
UsecaseRerank = "rerank"
|
||||
UsecaseDetection = "detection"
|
||||
UsecaseVAD = "vad"
|
||||
UsecaseAudioTransform = "audio_transform"
|
||||
UsecaseDiarization = "diarization"
|
||||
)
|
||||
|
||||
// GRPCMethod identifies a Backend service RPC from backend.proto.
|
||||
type GRPCMethod string
|
||||
|
||||
const (
|
||||
MethodPredict GRPCMethod = "Predict"
|
||||
MethodPredictStream GRPCMethod = "PredictStream"
|
||||
MethodEmbedding GRPCMethod = "Embedding"
|
||||
MethodGenerateImage GRPCMethod = "GenerateImage"
|
||||
MethodGenerateVideo GRPCMethod = "GenerateVideo"
|
||||
MethodAudioTranscription GRPCMethod = "AudioTranscription"
|
||||
MethodTTS GRPCMethod = "TTS"
|
||||
MethodTTSStream GRPCMethod = "TTSStream"
|
||||
MethodSoundGeneration GRPCMethod = "SoundGeneration"
|
||||
MethodTokenizeString GRPCMethod = "TokenizeString"
|
||||
MethodDetect GRPCMethod = "Detect"
|
||||
MethodRerank GRPCMethod = "Rerank"
|
||||
MethodVAD GRPCMethod = "VAD"
|
||||
MethodAudioTransform GRPCMethod = "AudioTransform"
|
||||
MethodDiarize GRPCMethod = "Diarize"
|
||||
)
|
||||
|
||||
// UsecaseInfo describes a single known_usecase value and how it maps
|
||||
// to the gRPC backend API.
|
||||
type UsecaseInfo struct {
|
||||
// Flag is the ModelConfigUsecase bitmask value.
|
||||
Flag ModelConfigUsecase
|
||||
// GRPCMethod is the primary Backend service RPC this usecase maps to.
|
||||
GRPCMethod GRPCMethod
|
||||
// IsModifier is true when this usecase doesn't map to its own gRPC RPC
|
||||
// but modifies how another RPC behaves (e.g., vision uses Predict with images).
|
||||
IsModifier bool
|
||||
// DependsOn names the usecase(s) this modifier requires (e.g., "chat").
|
||||
DependsOn string
|
||||
// Description is a human/LLM-readable explanation of what this usecase means.
|
||||
Description string
|
||||
}
|
||||
|
||||
// UsecaseInfoMap maps each known_usecase string to its gRPC and semantic info.
|
||||
var UsecaseInfoMap = map[string]UsecaseInfo{
|
||||
UsecaseChat: {
|
||||
Flag: FLAG_CHAT,
|
||||
GRPCMethod: MethodPredict,
|
||||
Description: "Conversational/instruction-following via the Predict RPC with chat templates.",
|
||||
},
|
||||
UsecaseCompletion: {
|
||||
Flag: FLAG_COMPLETION,
|
||||
GRPCMethod: MethodPredict,
|
||||
Description: "Text completion via the Predict RPC with a completion template.",
|
||||
},
|
||||
UsecaseEdit: {
|
||||
Flag: FLAG_EDIT,
|
||||
GRPCMethod: MethodPredict,
|
||||
Description: "Text editing via the Predict RPC with an edit template.",
|
||||
},
|
||||
UsecaseVision: {
|
||||
Flag: FLAG_VISION,
|
||||
GRPCMethod: MethodPredict,
|
||||
IsModifier: true,
|
||||
DependsOn: UsecaseChat,
|
||||
Description: "The model accepts images alongside text in the Predict RPC. For llama-cpp this requires an mmproj file.",
|
||||
},
|
||||
UsecaseEmbeddings: {
|
||||
Flag: FLAG_EMBEDDINGS,
|
||||
GRPCMethod: MethodEmbedding,
|
||||
Description: "Vector embedding generation via the Embedding RPC.",
|
||||
},
|
||||
UsecaseTokenize: {
|
||||
Flag: FLAG_TOKENIZE,
|
||||
GRPCMethod: MethodTokenizeString,
|
||||
Description: "Tokenization via the TokenizeString RPC without running inference.",
|
||||
},
|
||||
UsecaseImage: {
|
||||
Flag: FLAG_IMAGE,
|
||||
GRPCMethod: MethodGenerateImage,
|
||||
Description: "Image generation via the GenerateImage RPC (Stable Diffusion, Flux, etc.).",
|
||||
},
|
||||
UsecaseVideo: {
|
||||
Flag: FLAG_VIDEO,
|
||||
GRPCMethod: MethodGenerateVideo,
|
||||
Description: "Video generation via the GenerateVideo RPC.",
|
||||
},
|
||||
UsecaseTranscript: {
|
||||
Flag: FLAG_TRANSCRIPT,
|
||||
GRPCMethod: MethodAudioTranscription,
|
||||
Description: "Speech-to-text via the AudioTranscription RPC.",
|
||||
},
|
||||
UsecaseTTS: {
|
||||
Flag: FLAG_TTS,
|
||||
GRPCMethod: MethodTTS,
|
||||
Description: "Text-to-speech via the TTS RPC.",
|
||||
},
|
||||
UsecaseSoundGeneration: {
|
||||
Flag: FLAG_SOUND_GENERATION,
|
||||
GRPCMethod: MethodSoundGeneration,
|
||||
Description: "Music/sound generation via the SoundGeneration RPC (not speech).",
|
||||
},
|
||||
UsecaseRerank: {
|
||||
Flag: FLAG_RERANK,
|
||||
GRPCMethod: MethodRerank,
|
||||
Description: "Document reranking via the Rerank RPC.",
|
||||
},
|
||||
UsecaseDetection: {
|
||||
Flag: FLAG_DETECTION,
|
||||
GRPCMethod: MethodDetect,
|
||||
Description: "Object detection via the Detect RPC with bounding boxes.",
|
||||
},
|
||||
UsecaseVAD: {
|
||||
Flag: FLAG_VAD,
|
||||
GRPCMethod: MethodVAD,
|
||||
Description: "Voice activity detection via the VAD RPC.",
|
||||
},
|
||||
UsecaseAudioTransform: {
|
||||
Flag: FLAG_AUDIO_TRANSFORM,
|
||||
GRPCMethod: MethodAudioTransform,
|
||||
Description: "Audio-in / audio-out transformations (echo cancellation, noise suppression, dereverberation, voice conversion) via the AudioTransform RPC.",
|
||||
},
|
||||
UsecaseDiarization: {
|
||||
Flag: FLAG_DIARIZATION,
|
||||
GRPCMethod: MethodDiarize,
|
||||
Description: "Speaker diarization (who-spoke-when, per-speaker segments) via the Diarize RPC.",
|
||||
},
|
||||
}
|
||||
|
||||
// BackendCapability describes which gRPC methods and usecases a backend supports.
|
||||
// Derived from reviewing actual implementations in backend/go/ and backend/python/.
|
||||
type BackendCapability struct {
|
||||
// GRPCMethods lists the Backend service RPCs this backend implements.
|
||||
GRPCMethods []GRPCMethod
|
||||
// PossibleUsecases lists all usecase strings this backend can support.
|
||||
PossibleUsecases []string
|
||||
// DefaultUsecases lists the conservative safe defaults.
|
||||
DefaultUsecases []string
|
||||
// AcceptsImages indicates multimodal image input in Predict.
|
||||
AcceptsImages bool
|
||||
// AcceptsVideos indicates multimodal video input in Predict.
|
||||
AcceptsVideos bool
|
||||
// AcceptsAudios indicates multimodal audio input in Predict.
|
||||
AcceptsAudios bool
|
||||
// Description is a human-readable summary of the backend.
|
||||
Description string
|
||||
}
|
||||
|
||||
// BackendCapabilities maps each backend name (as used in model configs and gallery
|
||||
// entries) to its verified capabilities. This is the single source of truth for
|
||||
// what each backend supports.
|
||||
//
|
||||
// Backend names use hyphens (e.g., "llama-cpp") matching the gallery convention.
|
||||
// Use NormalizeBackendName() for names with dots (e.g., "llama.cpp").
|
||||
var BackendCapabilities = map[string]BackendCapability{
|
||||
// --- LLM / text generation backends ---
|
||||
"llama-cpp": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding, MethodTokenizeString},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEdit, UsecaseEmbeddings, UsecaseTokenize, UsecaseVision},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
AcceptsImages: true, // requires mmproj
|
||||
Description: "llama.cpp GGUF models — LLM inference with optional vision via mmproj",
|
||||
},
|
||||
"vllm": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseVision},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
AcceptsImages: true,
|
||||
AcceptsVideos: true,
|
||||
Description: "vLLM engine — high-throughput LLM serving with optional multimodal",
|
||||
},
|
||||
"vllm-omni": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodGenerateImage, MethodGenerateVideo, MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseImage, UsecaseVideo, UsecaseTTS, UsecaseVision},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
AcceptsImages: true,
|
||||
AcceptsVideos: true,
|
||||
AcceptsAudios: true,
|
||||
Description: "vLLM omni-modal — supports text, image, video generation and TTS",
|
||||
},
|
||||
"transformers": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding, MethodTTS, MethodSoundGeneration},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseTTS, UsecaseSoundGeneration},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
Description: "HuggingFace transformers — general-purpose Python inference",
|
||||
},
|
||||
"mlx": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
Description: "Apple MLX framework — optimized for Apple Silicon",
|
||||
},
|
||||
"mlx-distributed": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
Description: "MLX distributed inference across multiple Apple Silicon devices",
|
||||
},
|
||||
"mlx-vlm": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodEmbedding},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseEmbeddings, UsecaseVision},
|
||||
DefaultUsecases: []string{UsecaseChat, UsecaseVision},
|
||||
AcceptsImages: true,
|
||||
AcceptsAudios: true,
|
||||
Description: "MLX vision-language models with multimodal input",
|
||||
},
|
||||
"mlx-audio": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseChat},
|
||||
Description: "MLX audio models — text generation and TTS",
|
||||
},
|
||||
|
||||
// --- Image/video generation backends ---
|
||||
"diffusers": {
|
||||
GRPCMethods: []GRPCMethod{MethodGenerateImage, MethodGenerateVideo},
|
||||
PossibleUsecases: []string{UsecaseImage, UsecaseVideo},
|
||||
DefaultUsecases: []string{UsecaseImage},
|
||||
Description: "HuggingFace diffusers — Stable Diffusion, Flux, video generation",
|
||||
},
|
||||
"stablediffusion": {
|
||||
GRPCMethods: []GRPCMethod{MethodGenerateImage},
|
||||
PossibleUsecases: []string{UsecaseImage},
|
||||
DefaultUsecases: []string{UsecaseImage},
|
||||
Description: "Stable Diffusion native backend",
|
||||
},
|
||||
"stablediffusion-ggml": {
|
||||
GRPCMethods: []GRPCMethod{MethodGenerateImage},
|
||||
PossibleUsecases: []string{UsecaseImage},
|
||||
DefaultUsecases: []string{UsecaseImage},
|
||||
Description: "Stable Diffusion via GGML quantized models",
|
||||
},
|
||||
|
||||
// --- Speech-to-text backends ---
|
||||
"whisper": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription, MethodVAD},
|
||||
PossibleUsecases: []string{UsecaseTranscript, UsecaseVAD},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "OpenAI Whisper — speech recognition and voice activity detection",
|
||||
},
|
||||
"faster-whisper": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "CTranslate2-accelerated Whisper for faster transcription",
|
||||
},
|
||||
"whisperx": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "WhisperX — Whisper with word-level timestamps and speaker diarization",
|
||||
},
|
||||
"moonshine": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "Moonshine speech recognition",
|
||||
},
|
||||
"nemo": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "NVIDIA NeMo speech recognition",
|
||||
},
|
||||
"qwen-asr": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "Qwen automatic speech recognition",
|
||||
},
|
||||
"voxtral": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription},
|
||||
PossibleUsecases: []string{UsecaseTranscript},
|
||||
DefaultUsecases: []string{UsecaseTranscript},
|
||||
Description: "Voxtral speech recognition",
|
||||
},
|
||||
"vibevoice": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTranscription, MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTranscript, UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTranscript, UsecaseTTS},
|
||||
Description: "VibeVoice — bidirectional speech (transcription and synthesis)",
|
||||
},
|
||||
|
||||
// --- TTS backends ---
|
||||
"piper": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Piper — fast neural TTS optimized for Raspberry Pi",
|
||||
},
|
||||
"kokoro": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Kokoro TTS",
|
||||
},
|
||||
"coqui": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Coqui TTS — multi-speaker neural synthesis",
|
||||
},
|
||||
"kitten-tts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Kitten TTS",
|
||||
},
|
||||
"outetts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "OuteTTS",
|
||||
},
|
||||
"pocket-tts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Pocket TTS — lightweight text-to-speech",
|
||||
},
|
||||
"qwen-tts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Qwen TTS",
|
||||
},
|
||||
"faster-qwen3-tts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Faster Qwen3 TTS — accelerated Qwen TTS",
|
||||
},
|
||||
"fish-speech": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Fish Speech TTS",
|
||||
},
|
||||
"neutts": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "NeuTTS — neural text-to-speech",
|
||||
},
|
||||
"chatterbox": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "Chatterbox TTS",
|
||||
},
|
||||
"voxcpm": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS, MethodTTSStream},
|
||||
PossibleUsecases: []string{UsecaseTTS},
|
||||
DefaultUsecases: []string{UsecaseTTS},
|
||||
Description: "VoxCPM TTS with streaming support",
|
||||
},
|
||||
|
||||
// --- Sound generation backends ---
|
||||
"ace-step": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS, MethodSoundGeneration},
|
||||
PossibleUsecases: []string{UsecaseTTS, UsecaseSoundGeneration},
|
||||
DefaultUsecases: []string{UsecaseSoundGeneration},
|
||||
Description: "ACE-Step — music and sound generation",
|
||||
},
|
||||
"acestep-cpp": {
|
||||
GRPCMethods: []GRPCMethod{MethodSoundGeneration},
|
||||
PossibleUsecases: []string{UsecaseSoundGeneration},
|
||||
DefaultUsecases: []string{UsecaseSoundGeneration},
|
||||
Description: "ACE-Step C++ — native sound generation",
|
||||
},
|
||||
"transformers-musicgen": {
|
||||
GRPCMethods: []GRPCMethod{MethodTTS, MethodSoundGeneration},
|
||||
PossibleUsecases: []string{UsecaseTTS, UsecaseSoundGeneration},
|
||||
DefaultUsecases: []string{UsecaseSoundGeneration},
|
||||
Description: "Meta MusicGen via transformers — music generation from text",
|
||||
},
|
||||
|
||||
// --- Audio transform backends ---
|
||||
"localvqe": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTransform},
|
||||
PossibleUsecases: []string{UsecaseAudioTransform},
|
||||
DefaultUsecases: []string{UsecaseAudioTransform},
|
||||
Description: "LocalVQE — joint AEC, noise suppression, and dereverberation for 16 kHz mono speech",
|
||||
},
|
||||
|
||||
// --- Utility backends ---
|
||||
"rerankers": {
|
||||
GRPCMethods: []GRPCMethod{MethodRerank},
|
||||
PossibleUsecases: []string{UsecaseRerank},
|
||||
DefaultUsecases: []string{UsecaseRerank},
|
||||
Description: "Cross-encoder reranking models",
|
||||
},
|
||||
"rfdetr": {
|
||||
GRPCMethods: []GRPCMethod{MethodDetect},
|
||||
PossibleUsecases: []string{UsecaseDetection},
|
||||
DefaultUsecases: []string{UsecaseDetection},
|
||||
Description: "RF-DETR object detection",
|
||||
},
|
||||
"silero-vad": {
|
||||
GRPCMethods: []GRPCMethod{MethodVAD},
|
||||
PossibleUsecases: []string{UsecaseVAD},
|
||||
DefaultUsecases: []string{UsecaseVAD},
|
||||
Description: "Silero VAD — voice activity detection",
|
||||
},
|
||||
}
|
||||
|
||||
// NormalizeBackendName converts backend names to the canonical hyphenated form
|
||||
// used in gallery entries (e.g., "llama.cpp" → "llama-cpp").
|
||||
func NormalizeBackendName(backend string) string {
|
||||
return strings.ReplaceAll(backend, ".", "-")
|
||||
}
|
||||
|
||||
// GetBackendCapability returns the capability info for a backend, or nil if unknown.
|
||||
// Handles backend name normalization.
|
||||
func GetBackendCapability(backend string) *BackendCapability {
|
||||
if cap, ok := BackendCapabilities[NormalizeBackendName(backend)]; ok {
|
||||
return &cap
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PossibleUsecasesForBackend returns all usecases a backend can support.
|
||||
// Returns nil if the backend is unknown.
|
||||
func PossibleUsecasesForBackend(backend string) []string {
|
||||
if cap := GetBackendCapability(backend); cap != nil {
|
||||
return cap.PossibleUsecases
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DefaultUsecasesForBackend returns the conservative default usecases.
|
||||
// Returns nil if the backend is unknown.
|
||||
func DefaultUsecasesForBackendCap(backend string) []string {
|
||||
if cap := GetBackendCapability(backend); cap != nil {
|
||||
return cap.DefaultUsecases
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsValidUsecaseForBackend checks whether a usecase is in a backend's possible set.
|
||||
// Returns true for unknown backends (permissive fallback).
|
||||
func IsValidUsecaseForBackend(backend, usecase string) bool {
|
||||
cap := GetBackendCapability(backend)
|
||||
if cap == nil {
|
||||
return true // unknown backend — don't restrict
|
||||
}
|
||||
return slices.Contains(cap.PossibleUsecases, usecase)
|
||||
}
|
||||
|
||||
// AllBackendNames returns a sorted list of all known backend names.
|
||||
func AllBackendNames() []string {
|
||||
names := make([]string, 0, len(BackendCapabilities))
|
||||
for name := range BackendCapabilities {
|
||||
names = append(names, name)
|
||||
}
|
||||
slices.Sort(names)
|
||||
return names
|
||||
}
|
||||
95
core/config/backend_capabilities_test.go
Normal file
95
core/config/backend_capabilities_test.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("BackendCapabilities", func() {
|
||||
It("every backend declares possible/default usecases and gRPC methods", func() {
|
||||
for name, cap := range BackendCapabilities {
|
||||
Expect(cap.PossibleUsecases).NotTo(BeEmpty(), "backend %q has no possible usecases", name)
|
||||
Expect(cap.DefaultUsecases).NotTo(BeEmpty(), "backend %q has no default usecases", name)
|
||||
Expect(cap.GRPCMethods).NotTo(BeEmpty(), "backend %q has no gRPC methods", name)
|
||||
}
|
||||
})
|
||||
|
||||
It("default usecases are a subset of possible usecases", func() {
|
||||
for name, cap := range BackendCapabilities {
|
||||
for _, d := range cap.DefaultUsecases {
|
||||
Expect(cap.PossibleUsecases).To(ContainElement(d), "backend %q: default %q not in possible %v", name, d, cap.PossibleUsecases)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
It("every backend's possible usecases map to a known FLAG_*", func() {
|
||||
allFlags := GetAllModelConfigUsecases()
|
||||
for name, cap := range BackendCapabilities {
|
||||
for _, u := range cap.PossibleUsecases {
|
||||
info, ok := UsecaseInfoMap[u]
|
||||
Expect(ok).To(BeTrue(), "backend %q: usecase %q not in UsecaseInfoMap", name, u)
|
||||
flagName := "FLAG_" + strings.ToUpper(u)
|
||||
if _, ok := allFlags[flagName]; ok {
|
||||
continue
|
||||
}
|
||||
// Some usecase names don't transform exactly to FLAG_<UPPER>; fall back to flag value lookup.
|
||||
found := false
|
||||
for _, flag := range allFlags {
|
||||
if flag == info.Flag {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
Expect(found).To(BeTrue(), "backend %q: usecase %q flag %d not in GetAllModelConfigUsecases", name, u, info.Flag)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
It("every UsecaseInfoMap entry has a non-zero flag and a gRPC method", func() {
|
||||
for name, info := range UsecaseInfoMap {
|
||||
Expect(info.Flag).NotTo(Equal(FLAG_ANY), "usecase %q has FLAG_ANY (zero) — should have a real flag", name)
|
||||
Expect(info.GRPCMethod).NotTo(BeEmpty(), "usecase %q has no gRPC method", name)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("GetBackendCapability", func() {
|
||||
It("returns the capability for a known backend", func() {
|
||||
cap := GetBackendCapability("llama-cpp")
|
||||
Expect(cap).NotTo(BeNil())
|
||||
Expect(cap.PossibleUsecases).To(ContainElement("chat"))
|
||||
})
|
||||
|
||||
It("normalizes hyphenated names so llama.cpp resolves to llama-cpp", func() {
|
||||
Expect(GetBackendCapability("llama.cpp")).NotTo(BeNil())
|
||||
})
|
||||
|
||||
It("returns nil for unknown backends", func() {
|
||||
Expect(GetBackendCapability("nonexistent")).To(BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("IsValidUsecaseForBackend", func() {
|
||||
It("accepts a backend's declared usecases", func() {
|
||||
Expect(IsValidUsecaseForBackend("piper", "tts")).To(BeTrue())
|
||||
})
|
||||
|
||||
It("rejects usecases outside a backend's possible set", func() {
|
||||
Expect(IsValidUsecaseForBackend("piper", "chat")).To(BeFalse())
|
||||
})
|
||||
|
||||
It("is permissive for unknown backends", func() {
|
||||
Expect(IsValidUsecaseForBackend("unknown", "anything")).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("AllBackendNames", func() {
|
||||
It("returns 30+ backends in sorted order", func() {
|
||||
names := AllBackendNames()
|
||||
Expect(len(names)).To(BeNumerically(">=", 30))
|
||||
Expect(slices.IsSorted(names)).To(BeTrue())
|
||||
})
|
||||
})
|
||||
@@ -630,16 +630,45 @@ const (
|
||||
FLAG_TOKENIZE ModelConfigUsecase = 0b001000000000
|
||||
FLAG_VAD ModelConfigUsecase = 0b010000000000
|
||||
FLAG_VIDEO ModelConfigUsecase = 0b100000000000
|
||||
FLAG_DETECTION ModelConfigUsecase = 0b1000000000000
|
||||
FLAG_FACE_RECOGNITION ModelConfigUsecase = 0b10000000000000
|
||||
FLAG_SPEAKER_RECOGNITION ModelConfigUsecase = 0b100000000000000
|
||||
FLAG_AUDIO_TRANSFORM ModelConfigUsecase = 0b1000000000000000
|
||||
FLAG_DIARIZATION ModelConfigUsecase = 0b10000000000000000
|
||||
FLAG_DETECTION ModelConfigUsecase = 0b1000000000000
|
||||
FLAG_VISION ModelConfigUsecase = 0b10000000000000
|
||||
FLAG_FACE_RECOGNITION ModelConfigUsecase = 0b100000000000000
|
||||
FLAG_SPEAKER_RECOGNITION ModelConfigUsecase = 0b1000000000000000
|
||||
FLAG_AUDIO_TRANSFORM ModelConfigUsecase = 0b10000000000000000
|
||||
FLAG_DIARIZATION ModelConfigUsecase = 0b100000000000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
)
|
||||
|
||||
// ModalityGroups defines groups of usecases that belong to the same modality.
|
||||
// Flags within the same group are NOT orthogonal (e.g., chat and completion are
|
||||
// both text/language). A model is multimodal when its usecases span 2+ groups.
|
||||
var ModalityGroups = []ModelConfigUsecase{
|
||||
FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT, // text/language
|
||||
FLAG_VISION | FLAG_DETECTION, // visual understanding
|
||||
FLAG_TRANSCRIPT, // speech input
|
||||
FLAG_TTS | FLAG_SOUND_GENERATION, // audio output
|
||||
FLAG_AUDIO_TRANSFORM, // audio in/out transforms
|
||||
FLAG_IMAGE | FLAG_VIDEO, // visual generation
|
||||
}
|
||||
|
||||
// IsMultimodal returns true if the given usecases span two or more orthogonal
|
||||
// modality groups. For example chat+vision is multimodal, but chat+completion
|
||||
// is not (both belong to the text/language group).
|
||||
func IsMultimodal(usecases ModelConfigUsecase) bool {
|
||||
groupCount := 0
|
||||
for _, group := range ModalityGroups {
|
||||
if usecases&group != 0 {
|
||||
groupCount++
|
||||
if groupCount >= 2 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
|
||||
return map[string]ModelConfigUsecase{
|
||||
// Note: FLAG_ANY is intentionally excluded from this map
|
||||
@@ -657,7 +686,8 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
|
||||
"FLAG_VAD": FLAG_VAD,
|
||||
"FLAG_LLM": FLAG_LLM,
|
||||
"FLAG_VIDEO": FLAG_VIDEO,
|
||||
"FLAG_DETECTION": FLAG_DETECTION,
|
||||
"FLAG_DETECTION": FLAG_DETECTION,
|
||||
"FLAG_VISION": FLAG_VISION,
|
||||
"FLAG_FACE_RECOGNITION": FLAG_FACE_RECOGNITION,
|
||||
"FLAG_SPEAKER_RECOGNITION": FLAG_SPEAKER_RECOGNITION,
|
||||
"FLAG_AUDIO_TRANSFORM": FLAG_AUDIO_TRANSFORM,
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/lithammer/fuzzysearch/fuzzy"
|
||||
@@ -92,6 +94,34 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
|
||||
return filteredModels
|
||||
}
|
||||
|
||||
// FilterGalleryModelsByUsecase returns models whose known_usecases include all
|
||||
// the bits set in usecase. For example, passing FLAG_CHAT matches any model
|
||||
// with the chat usecase; passing FLAG_CHAT|FLAG_VISION matches only models
|
||||
// that have both.
|
||||
func FilterGalleryModelsByUsecase(models GalleryElements[*GalleryModel], usecase config.ModelConfigUsecase) GalleryElements[*GalleryModel] {
|
||||
var filtered GalleryElements[*GalleryModel]
|
||||
for _, m := range models {
|
||||
u := m.GetKnownUsecases()
|
||||
if u != nil && (*u&usecase) == usecase {
|
||||
filtered = append(filtered, m)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
// FilterGalleryModelsByMultimodal returns models whose known_usecases span two
|
||||
// or more orthogonal modality groups (e.g. chat+vision, tts+transcript).
|
||||
func FilterGalleryModelsByMultimodal(models GalleryElements[*GalleryModel]) GalleryElements[*GalleryModel] {
|
||||
var filtered GalleryElements[*GalleryModel]
|
||||
for _, m := range models {
|
||||
u := m.GetKnownUsecases()
|
||||
if u != nil && config.IsMultimodal(*u) {
|
||||
filtered = append(filtered, m)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func (gm GalleryElements[T]) FilterByTag(tag string) GalleryElements[T] {
|
||||
var filtered GalleryElements[T]
|
||||
for _, m := range gm {
|
||||
@@ -267,6 +297,77 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst
|
||||
return models, nil
|
||||
}
|
||||
|
||||
var (
|
||||
availableModelsMu sync.RWMutex
|
||||
availableModelsCache GalleryElements[*GalleryModel]
|
||||
refreshing atomic.Bool
|
||||
galleryGeneration atomic.Uint64
|
||||
)
|
||||
|
||||
// GalleryGeneration returns a counter that increments each time the gallery
|
||||
// model list is refreshed from upstream. VRAM estimation caches use this to
|
||||
// invalidate entries when the gallery data changes.
|
||||
func GalleryGeneration() uint64 { return galleryGeneration.Load() }
|
||||
|
||||
// AvailableGalleryModelsCached returns gallery models from an in-memory cache.
|
||||
// Local-only fields (installed status) are refreshed on every call. A background
|
||||
// goroutine is triggered to re-fetch the full model list (including network
|
||||
// calls) so subsequent requests pick up changes without blocking the caller.
|
||||
// The first call with an empty cache blocks until the initial load completes.
|
||||
func AvailableGalleryModelsCached(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryModel], error) {
|
||||
availableModelsMu.RLock()
|
||||
cached := availableModelsCache
|
||||
availableModelsMu.RUnlock()
|
||||
|
||||
if cached != nil {
|
||||
// Refresh installed status under write lock to avoid races with
|
||||
// concurrent readers and the background refresh goroutine.
|
||||
availableModelsMu.Lock()
|
||||
for _, m := range cached {
|
||||
_, err := os.Stat(filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", m.GetName())))
|
||||
m.SetInstalled(err == nil)
|
||||
}
|
||||
availableModelsMu.Unlock()
|
||||
// Trigger a background refresh if one is not already running.
|
||||
triggerGalleryRefresh(galleries, systemState)
|
||||
return cached, nil
|
||||
}
|
||||
|
||||
// No cache yet — must do a blocking load.
|
||||
models, err := AvailableGalleryModels(galleries, systemState)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
availableModelsMu.Lock()
|
||||
availableModelsCache = models
|
||||
galleryGeneration.Add(1)
|
||||
availableModelsMu.Unlock()
|
||||
|
||||
return models, nil
|
||||
}
|
||||
|
||||
// triggerGalleryRefresh starts a background goroutine that refreshes the
|
||||
// gallery model cache. Only one refresh runs at a time; concurrent calls
|
||||
// are no-ops.
|
||||
func triggerGalleryRefresh(galleries []config.Gallery, systemState *system.SystemState) {
|
||||
if !refreshing.CompareAndSwap(false, true) {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
defer refreshing.Store(false)
|
||||
models, err := AvailableGalleryModels(galleries, systemState)
|
||||
if err != nil {
|
||||
xlog.Error("background gallery refresh failed", "error", err)
|
||||
return
|
||||
}
|
||||
availableModelsMu.Lock()
|
||||
availableModelsCache = models
|
||||
galleryGeneration.Add(1)
|
||||
availableModelsMu.Unlock()
|
||||
}()
|
||||
}
|
||||
|
||||
// List available backends
|
||||
func AvailableBackends(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
||||
return availableBackendsWithFilter(galleries, systemState, true)
|
||||
|
||||
@@ -581,4 +581,42 @@ var _ = Describe("Gallery", func() {
|
||||
Expect(mergedParams["model"]).To(Equal("nanbeige4.1-3b-q4_k_m.gguf"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("GetKnownUsecases", func() {
|
||||
It("uses explicit known_usecases from overrides when present", func() {
|
||||
m := &GalleryModel{
|
||||
Metadata: Metadata{Backend: "stablediffusion-ggml"},
|
||||
Overrides: map[string]any{
|
||||
"known_usecases": []any{"chat"},
|
||||
},
|
||||
}
|
||||
u := m.GetKnownUsecases()
|
||||
Expect(u).NotTo(BeNil())
|
||||
// Override wins over the backend's image default.
|
||||
Expect(*u & config.FLAG_CHAT).To(Equal(config.FLAG_CHAT))
|
||||
Expect(*u & config.FLAG_IMAGE).To(Equal(config.ModelConfigUsecase(0)))
|
||||
})
|
||||
|
||||
It("falls back to backend defaults when no override is set", func() {
|
||||
m := &GalleryModel{Metadata: Metadata{Backend: "stablediffusion-ggml"}}
|
||||
u := m.GetKnownUsecases()
|
||||
Expect(u).NotTo(BeNil())
|
||||
Expect(*u & config.FLAG_IMAGE).To(Equal(config.FLAG_IMAGE))
|
||||
})
|
||||
|
||||
It("returns nil when neither overrides nor a known backend provide usecases", func() {
|
||||
m := &GalleryModel{}
|
||||
Expect(m.GetKnownUsecases()).To(BeNil())
|
||||
})
|
||||
|
||||
It("filters models without explicit known_usecases via backend defaults", func() {
|
||||
models := GalleryElements[*GalleryModel]{
|
||||
&GalleryModel{Metadata: Metadata{Name: "sd-model", Backend: "stablediffusion-ggml"}},
|
||||
&GalleryModel{Metadata: Metadata{Name: "whisper-model", Backend: "whisper"}},
|
||||
}
|
||||
filtered := FilterGalleryModelsByUsecase(models, config.FLAG_IMAGE)
|
||||
Expect(filtered).To(HaveLen(1))
|
||||
Expect(filtered[0].Name).To(Equal("sd-model"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -97,7 +97,7 @@ func (i *DiffuserImporter) Import(details Details) (gallery.ModelConfig, error)
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
KnownUsecaseStrings: []string{"image"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseImage},
|
||||
Backend: backend,
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
|
||||
@@ -135,7 +135,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
Options: []string{"use_jinja:true"},
|
||||
Backend: backend,
|
||||
TemplateConfig: config.TemplateConfig{
|
||||
|
||||
@@ -45,7 +45,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: name,
|
||||
Backend: "llama-cpp",
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
Options: []string{"use_jinja:true"},
|
||||
}
|
||||
cfg.Model = relPath(ggufFile)
|
||||
@@ -104,7 +104,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: name,
|
||||
Backend: "transformers",
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
}
|
||||
cfg.Model = baseModel
|
||||
cfg.TemplateConfig.UseTokenizerTemplate = true
|
||||
@@ -120,7 +120,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: name,
|
||||
Backend: "transformers",
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
}
|
||||
cfg.Model = baseModel
|
||||
cfg.TemplateConfig.UseTokenizerTemplate = true
|
||||
@@ -135,7 +135,7 @@ func ImportLocalPath(dirPath, name string) (*config.ModelConfig, error) {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: name,
|
||||
Backend: "transformers",
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
}
|
||||
cfg.Model = relPath(dirPath)
|
||||
cfg.TemplateConfig.UseTokenizerTemplate = true
|
||||
|
||||
@@ -73,7 +73,7 @@ func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
Backend: backend,
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
|
||||
@@ -87,7 +87,7 @@ func (i *TransformersImporter) Import(details Details) (gallery.ModelConfig, err
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
Backend: backend,
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
|
||||
@@ -77,7 +77,7 @@ func (i *VLLMImporter) Import(details Details) (gallery.ModelConfig, error) {
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
KnownUsecaseStrings: []string{"chat"},
|
||||
KnownUsecaseStrings: []string{config.UsecaseChat},
|
||||
Backend: backend,
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
|
||||
@@ -52,3 +52,39 @@ func (m *GalleryModel) GetTags() []string {
|
||||
func (m *GalleryModel) GetDescription() string {
|
||||
return m.Description
|
||||
}
|
||||
|
||||
// GetKnownUsecases returns the usecase flags declared by the gallery entry,
|
||||
// falling back to the resolved backend's default usecases when the entry has
|
||||
// none of its own. Returns nil only when neither source provides any.
|
||||
//
|
||||
// Why the fallback: many gallery entries omit known_usecases because their
|
||||
// backend has only one sensible mode (e.g. stablediffusion-ggml is always
|
||||
// image generation). Without this fallback such models silently disappear
|
||||
// from usecase-based filtering in the UI.
|
||||
func (m *GalleryModel) GetKnownUsecases() *config.ModelConfigUsecase {
|
||||
if strs := overrideUsecaseStrings(m.Overrides); len(strs) > 0 {
|
||||
return config.GetUsecasesFromYAML(strs)
|
||||
}
|
||||
if defaults := config.DefaultUsecasesForBackendCap(m.Backend); len(defaults) > 0 {
|
||||
return config.GetUsecasesFromYAML(defaults)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func overrideUsecaseStrings(overrides map[string]any) []string {
|
||||
raw, ok := overrides["known_usecases"]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
list, ok := raw.([]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
strs := make([]string, 0, len(list))
|
||||
for _, v := range list {
|
||||
if s, ok := v.(string); ok {
|
||||
strs = append(strs, s)
|
||||
}
|
||||
}
|
||||
return strs
|
||||
}
|
||||
|
||||
@@ -116,13 +116,13 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
|
||||
capability := strings.TrimPrefix(provider, "models:")
|
||||
var filterFn config.ModelConfigFilterFn
|
||||
switch capability {
|
||||
case "chat":
|
||||
case config.UsecaseChat:
|
||||
filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
|
||||
case "tts":
|
||||
case config.UsecaseTTS:
|
||||
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
|
||||
case "vad":
|
||||
case config.UsecaseVAD:
|
||||
filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
|
||||
case "transcript":
|
||||
case config.UsecaseTranscript:
|
||||
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
|
||||
default:
|
||||
filterFn = config.NoFilterFn
|
||||
|
||||
@@ -77,18 +77,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
|
||||
}
|
||||
estCtx, cancel := context.WithTimeout(c.Request().Context(), 5*time.Second)
|
||||
defer cancel()
|
||||
result, err := vram.EstimateModel(estCtx, vram.ModelEstimateInput{
|
||||
Files: files,
|
||||
Options: vram.EstimateOptions{ContextLength: 8192},
|
||||
})
|
||||
result, err := vram.EstimateModelMultiContext(estCtx, vram.ModelEstimateInput{
|
||||
Files: files,
|
||||
}, []uint32{8192})
|
||||
if err == nil {
|
||||
if result.SizeBytes > 0 {
|
||||
resp.EstimatedSizeBytes = result.SizeBytes
|
||||
resp.EstimatedSizeDisplay = result.SizeDisplay
|
||||
}
|
||||
if result.VRAMBytes > 0 {
|
||||
resp.EstimatedVRAMBytes = result.VRAMBytes
|
||||
resp.EstimatedVRAMDisplay = result.VRAMDisplay
|
||||
if v := result.VRAMForContext(8192); v > 0 {
|
||||
resp.EstimatedVRAMBytes = v
|
||||
resp.EstimatedVRAMDisplay = vram.FormatBytes(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,9 @@ import (
|
||||
)
|
||||
|
||||
// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
|
||||
// installed model configuration. For uninstalled models (gallery URLs), use
|
||||
// the gallery-level estimates in /api/models instead.
|
||||
// installed model configuration at multiple context sizes.
|
||||
// @Summary Estimate VRAM usage for a model
|
||||
// @Description Estimates VRAM based on model weight files, context size, and GPU layers
|
||||
// @Description Estimates VRAM based on model weight files at multiple context sizes
|
||||
// @Tags config
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
|
||||
@@ -121,13 +121,13 @@ var _ = Describe("VRAM Estimate Endpoint", func() {
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
|
||||
// The response should have non-zero size and vram estimates.
|
||||
// JSON numbers unmarshal as float64.
|
||||
sizeBytes, ok := resp["sizeBytes"].(float64)
|
||||
Expect(ok).To(BeTrue(), "sizeBytes should be a number, got: %v (response: %s)", resp["sizeBytes"], rec.Body.String())
|
||||
sizeBytes, ok := resp["size_bytes"].(float64)
|
||||
Expect(ok).To(BeTrue(), "size_bytes should be a number, got: %v (response: %s)", resp["size_bytes"], rec.Body.String())
|
||||
Expect(sizeBytes).To(BeNumerically(">", 0))
|
||||
vramBytes, ok := resp["vramBytes"].(float64)
|
||||
Expect(ok).To(BeTrue(), "vramBytes should be a number")
|
||||
vramBytes, ok := resp["vram_bytes"].(float64)
|
||||
Expect(ok).To(BeTrue(), "vram_bytes should be a number")
|
||||
Expect(vramBytes).To(BeNumerically(">", 0))
|
||||
Expect(resp["sizeDisplay"]).NotTo(BeEmpty())
|
||||
Expect(resp["vramDisplay"]).NotTo(BeEmpty())
|
||||
Expect(resp["size_display"]).NotTo(BeEmpty())
|
||||
Expect(resp["vram_display"]).NotTo(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,13 +2,13 @@ import { test, expect } from '@playwright/test'
|
||||
|
||||
const MOCK_MODELS_RESPONSE = {
|
||||
models: [
|
||||
{ name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['llm'] },
|
||||
{ name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['stt'] },
|
||||
{ name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['chat'] },
|
||||
{ name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['transcript'] },
|
||||
{ name: 'stablediffusion-model', description: 'An image model', backend: 'stablediffusion', installed: false, tags: ['sd'] },
|
||||
{ name: 'unknown-model', description: 'No backend', backend: '', installed: false, tags: [] },
|
||||
],
|
||||
allBackends: ['llama-cpp', 'stablediffusion', 'whisper'],
|
||||
allTags: ['llm', 'sd', 'stt'],
|
||||
allTags: ['chat', 'sd', 'transcript'],
|
||||
availableModels: 4,
|
||||
installedModels: 1,
|
||||
totalPages: 1,
|
||||
@@ -78,3 +78,121 @@ test.describe('Models Gallery - Backend Features', () => {
|
||||
await expect(detail.locator('text=llama-cpp')).toBeVisible()
|
||||
})
|
||||
})
|
||||
|
||||
const BACKEND_USECASES_MOCK = {
|
||||
'llama-cpp': ['chat', 'embeddings', 'vision'],
|
||||
'whisper': ['transcript'],
|
||||
'stablediffusion': ['image'],
|
||||
}
|
||||
|
||||
test.describe('Models Gallery - Multi-select Filters', () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.route('**/api/models*', (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(MOCK_MODELS_RESPONSE),
|
||||
})
|
||||
})
|
||||
await page.route('**/api/backends/usecases', (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(BACKEND_USECASES_MOCK),
|
||||
})
|
||||
})
|
||||
await page.goto('/app/models')
|
||||
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
|
||||
})
|
||||
|
||||
test('multi-select toggle: click Chat, TTS, then Chat again', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
|
||||
await ttsBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
|
||||
// Click Chat again to deselect it
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).not.toHaveClass(/active/)
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
})
|
||||
|
||||
test('"All" clears selection', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const allBtn = page.locator('.filter-btn', { hasText: 'All' })
|
||||
|
||||
await chatBtn.click()
|
||||
await expect(chatBtn).toHaveClass(/active/)
|
||||
|
||||
await allBtn.click()
|
||||
await expect(allBtn).toHaveClass(/active/)
|
||||
await expect(chatBtn).not.toHaveClass(/active/)
|
||||
})
|
||||
|
||||
test('query param sent correctly with multiple filters', async ({ page }) => {
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
|
||||
// Click Chat and wait for its request to settle
|
||||
await chatBtn.click()
|
||||
await page.waitForResponse(resp => resp.url().includes('/api/models'))
|
||||
|
||||
// Now click TTS and capture the resulting request
|
||||
const [request] = await Promise.all([
|
||||
page.waitForRequest(req => {
|
||||
if (!req.url().includes('/api/models')) return false
|
||||
const u = new URL(req.url())
|
||||
const tag = u.searchParams.get('tag')
|
||||
return tag && tag.split(',').length >= 2
|
||||
}),
|
||||
ttsBtn.click(),
|
||||
])
|
||||
|
||||
const url = new URL(request.url())
|
||||
const tags = url.searchParams.get('tag').split(',').sort()
|
||||
expect(tags).toEqual(['chat', 'tts'])
|
||||
})
|
||||
|
||||
test('backend greys out unavailable filters', async ({ page }) => {
|
||||
// Select llama-cpp backend via dropdown
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
|
||||
await dropdown.locator('text=llama-cpp').click()
|
||||
|
||||
// Wait for filter state to update
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
const sttBtn = page.locator('.filter-btn', { hasText: 'STT' })
|
||||
const imageBtn = page.locator('.filter-btn', { hasText: 'Image' })
|
||||
|
||||
// TTS, STT, Image should be disabled for llama-cpp
|
||||
await expect(ttsBtn).toBeDisabled()
|
||||
await expect(sttBtn).toBeDisabled()
|
||||
await expect(imageBtn).toBeDisabled()
|
||||
|
||||
// Chat, Embeddings, Vision should remain enabled
|
||||
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
|
||||
const embBtn = page.locator('.filter-btn', { hasText: 'Embeddings' })
|
||||
const visBtn = page.locator('.filter-btn', { hasText: 'Vision' })
|
||||
await expect(chatBtn).toBeEnabled()
|
||||
await expect(embBtn).toBeEnabled()
|
||||
await expect(visBtn).toBeEnabled()
|
||||
})
|
||||
|
||||
test('backend clears incompatible filters', async ({ page }) => {
|
||||
// Select TTS filter first
|
||||
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
|
||||
await ttsBtn.click()
|
||||
await expect(ttsBtn).toHaveClass(/active/)
|
||||
|
||||
// Now select llama-cpp backend (which doesn't support TTS)
|
||||
await page.locator('button', { hasText: 'All Backends' }).click()
|
||||
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
|
||||
await dropdown.locator('text=llama-cpp').click()
|
||||
|
||||
// TTS should be auto-removed from selection
|
||||
await expect(ttsBtn).not.toHaveClass(/active/)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
"vision": "Vision",
|
||||
"tts": "TTS",
|
||||
"stt": "STT",
|
||||
"diarization": "Diarisierung",
|
||||
"embedding": "Embedding",
|
||||
"rerank": "Rerank",
|
||||
"allBackends": "Alle Backends",
|
||||
|
||||
@@ -14,14 +14,20 @@
|
||||
},
|
||||
"filters": {
|
||||
"all": "All",
|
||||
"llm": "LLM",
|
||||
"llm": "Chat",
|
||||
"image": "Image",
|
||||
"video": "Video",
|
||||
"multimodal": "Multimodal",
|
||||
"vision": "Vision",
|
||||
"tts": "TTS",
|
||||
"stt": "STT",
|
||||
"embedding": "Embedding",
|
||||
"diarization": "Diarization",
|
||||
"soundGen": "Sound",
|
||||
"audioTransform": "Audio FX",
|
||||
"embedding": "Embeddings",
|
||||
"rerank": "Rerank",
|
||||
"detection": "Detection",
|
||||
"vad": "VAD",
|
||||
"allBackends": "All Backends",
|
||||
"searchBackends": "Search backends..."
|
||||
},
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
"vision": "Visión",
|
||||
"tts": "TTS",
|
||||
"stt": "STT",
|
||||
"diarization": "Diarización",
|
||||
"embedding": "Embedding",
|
||||
"rerank": "Rerank",
|
||||
"allBackends": "Todos los backends",
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
"vision": "Visione",
|
||||
"tts": "TTS",
|
||||
"stt": "STT",
|
||||
"diarization": "Diarizzazione",
|
||||
"embedding": "Embedding",
|
||||
"rerank": "Rerank",
|
||||
"allBackends": "Tutti i backend",
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
"vision": "视觉",
|
||||
"tts": "TTS",
|
||||
"stt": "STT",
|
||||
"diarization": "说话人分离",
|
||||
"embedding": "嵌入",
|
||||
"rerank": "重排",
|
||||
"allBackends": "所有后端",
|
||||
|
||||
@@ -296,11 +296,11 @@ export default function Backends() {
|
||||
|
||||
const FILTERS = [
|
||||
{ key: '', label: 'All', icon: 'fa-layer-group' },
|
||||
{ key: 'llm', label: 'LLM', icon: 'fa-brain' },
|
||||
{ key: 'chat', label: 'Chat', icon: 'fa-brain' },
|
||||
{ key: 'image', label: 'Image', icon: 'fa-image' },
|
||||
{ key: 'video', label: 'Video', icon: 'fa-video' },
|
||||
{ key: 'tts', label: 'TTS', icon: 'fa-microphone' },
|
||||
{ key: 'stt', label: 'STT', icon: 'fa-headphones' },
|
||||
{ key: 'transcript', label: 'STT', icon: 'fa-headphones' },
|
||||
{ key: 'vision', label: 'Vision', icon: 'fa-eye' },
|
||||
]
|
||||
|
||||
|
||||
@@ -11,16 +11,26 @@ import GalleryLoader from '../components/GalleryLoader'
|
||||
import React from 'react'
|
||||
|
||||
|
||||
const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
|
||||
const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
|
||||
|
||||
|
||||
const FILTERS = [
|
||||
{ key: '', labelKey: 'filters.all', icon: 'fa-layer-group' },
|
||||
{ key: 'llm', labelKey: 'filters.llm', icon: 'fa-brain' },
|
||||
{ key: 'sd', labelKey: 'filters.image', icon: 'fa-image' },
|
||||
{ key: 'chat', labelKey: 'filters.llm', icon: 'fa-brain' },
|
||||
{ key: 'image', labelKey: 'filters.image', icon: 'fa-image' },
|
||||
{ key: 'video', labelKey: 'filters.video', icon: 'fa-video' },
|
||||
{ key: 'multimodal', labelKey: 'filters.multimodal', icon: 'fa-shapes' },
|
||||
{ key: 'vision', labelKey: 'filters.vision', icon: 'fa-eye' },
|
||||
{ key: 'tts', labelKey: 'filters.tts', icon: 'fa-microphone' },
|
||||
{ key: 'stt', labelKey: 'filters.stt', icon: 'fa-headphones' },
|
||||
{ key: 'embedding', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
|
||||
{ key: 'reranker', labelKey: 'filters.rerank', icon: 'fa-sort' },
|
||||
{ key: 'transcript', labelKey: 'filters.stt', icon: 'fa-headphones' },
|
||||
{ key: 'diarization', labelKey: 'filters.diarization', icon: 'fa-users' },
|
||||
{ key: 'sound_generation', labelKey: 'filters.soundGen', icon: 'fa-music' },
|
||||
{ key: 'audio_transform', labelKey: 'filters.audioTransform', icon: 'fa-sliders' },
|
||||
{ key: 'embeddings', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
|
||||
{ key: 'rerank', labelKey: 'filters.rerank', icon: 'fa-sort' },
|
||||
{ key: 'detection', labelKey: 'filters.detection', icon: 'fa-bullseye' },
|
||||
{ key: 'vad', labelKey: 'filters.vad', icon: 'fa-wave-square' },
|
||||
]
|
||||
|
||||
export default function Models() {
|
||||
@@ -34,7 +44,7 @@ export default function Models() {
|
||||
const [page, setPage] = useState(1)
|
||||
const [totalPages, setTotalPages] = useState(1)
|
||||
const [search, setSearch] = useState('')
|
||||
const [filter, setFilter] = useState('')
|
||||
const [filters, setFilters] = useState([])
|
||||
const [sort, setSort] = useState('')
|
||||
const [order, setOrder] = useState('asc')
|
||||
const [installing, setInstalling] = useState(new Map())
|
||||
@@ -43,6 +53,9 @@ export default function Models() {
|
||||
const [stats, setStats] = useState({ total: 0, installed: 0, repositories: 0 })
|
||||
const [backendFilter, setBackendFilter] = useState('')
|
||||
const [allBackends, setAllBackends] = useState([])
|
||||
const [backendUsecases, setBackendUsecases] = useState({})
|
||||
const [estimates, setEstimates] = useState({})
|
||||
const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
|
||||
const [confirmDialog, setConfirmDialog] = useState(null)
|
||||
|
||||
// Total GPU memory for "fits" check
|
||||
@@ -52,14 +65,14 @@ export default function Models() {
|
||||
try {
|
||||
setLoading(true)
|
||||
const searchVal = params.search !== undefined ? params.search : search
|
||||
const filterVal = params.filter !== undefined ? params.filter : filter
|
||||
const filtersVal = params.filters !== undefined ? params.filters : filters
|
||||
const sortVal = params.sort !== undefined ? params.sort : sort
|
||||
const backendVal = params.backendFilter !== undefined ? params.backendFilter : backendFilter
|
||||
const queryParams = {
|
||||
page: params.page || page,
|
||||
items: 9,
|
||||
}
|
||||
if (filterVal) queryParams.tag = filterVal
|
||||
if (filtersVal.length > 0) queryParams.tag = filtersVal.join(',')
|
||||
if (searchVal) queryParams.term = searchVal
|
||||
if (backendVal) queryParams.backend = backendVal
|
||||
if (sortVal) {
|
||||
@@ -79,11 +92,27 @@ export default function Models() {
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [page, search, filter, sort, order, backendFilter, addToast, t])
|
||||
}, [page, search, filters, sort, order, backendFilter, addToast, t])
|
||||
|
||||
useEffect(() => {
|
||||
fetchModels()
|
||||
}, [page, filter, sort, order, backendFilter])
|
||||
}, [page, filters, sort, order, backendFilter])
|
||||
|
||||
// Fetch backend→usecase mapping once on mount
|
||||
useEffect(() => {
|
||||
modelsApi.backendUsecases().then(setBackendUsecases).catch(() => {})
|
||||
}, [])
|
||||
|
||||
// When backend changes, remove selected filters that aren't available
|
||||
useEffect(() => {
|
||||
if (backendFilter && backendUsecases[backendFilter]) {
|
||||
setFilters(prev => {
|
||||
const possible = backendUsecases[backendFilter]
|
||||
const filtered = prev.filter(k => k === 'multimodal' || possible.includes(k))
|
||||
return filtered.length !== prev.length ? filtered : prev
|
||||
})
|
||||
}
|
||||
}, [backendFilter, backendUsecases])
|
||||
|
||||
// Re-fetch when operations change (install/delete completion)
|
||||
useEffect(() => {
|
||||
@@ -95,11 +124,42 @@ export default function Models() {
|
||||
fetchModels({ search: value, page: 1 })
|
||||
})
|
||||
|
||||
// Fetch VRAM/size estimates asynchronously for visible models.
|
||||
useEffect(() => {
|
||||
if (models.length === 0) return
|
||||
let cancelled = false
|
||||
models.forEach(model => {
|
||||
const id = model.name || model.id
|
||||
if (estimates[id]) return
|
||||
modelsApi.estimate(id, CONTEXT_SIZES).then(est => {
|
||||
if (cancelled) return
|
||||
if (est && (est.sizeBytes || est.estimates)) {
|
||||
setEstimates(prev => ({ ...prev, [id]: est }))
|
||||
}
|
||||
}).catch(() => {})
|
||||
})
|
||||
return () => { cancelled = true }
|
||||
}, [models])
|
||||
|
||||
const handleSearch = (value) => {
|
||||
setSearch(value)
|
||||
debouncedFetch(value)
|
||||
}
|
||||
|
||||
const toggleFilter = (key) => {
|
||||
if (key === '') { setFilters([]); setPage(1); return }
|
||||
setFilters(prev =>
|
||||
prev.includes(key) ? prev.filter(k => k !== key) : [...prev, key]
|
||||
)
|
||||
setPage(1)
|
||||
}
|
||||
|
||||
const isFilterAvailable = (key) => {
|
||||
if (!backendFilter || key === '' || key === 'multimodal') return true
|
||||
const possible = backendUsecases[backendFilter]
|
||||
return !possible || possible.includes(key)
|
||||
}
|
||||
|
||||
const handleSort = (col) => {
|
||||
if (sort === col) {
|
||||
setOrder(o => o === 'asc' ? 'desc' : 'asc')
|
||||
@@ -221,16 +281,23 @@ export default function Models() {
|
||||
|
||||
{/* Filter buttons */}
|
||||
<div className="filter-bar">
|
||||
{FILTERS.map(f => (
|
||||
<button
|
||||
key={f.key}
|
||||
className={`filter-btn ${filter === f.key ? 'active' : ''}`}
|
||||
onClick={() => { setFilter(f.key); setPage(1) }}
|
||||
>
|
||||
<i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
|
||||
{t(f.labelKey)}
|
||||
</button>
|
||||
))}
|
||||
{FILTERS.map(f => {
|
||||
const isAll = f.key === ''
|
||||
const active = isAll ? filters.length === 0 : filters.includes(f.key)
|
||||
const available = isFilterAvailable(f.key)
|
||||
return (
|
||||
<button
|
||||
key={f.key}
|
||||
className={`filter-btn ${active ? 'active' : ''}`}
|
||||
disabled={!available}
|
||||
style={!available ? { opacity: 0.4, cursor: 'not-allowed' } : undefined}
|
||||
onClick={() => toggleFilter(f.key)}
|
||||
>
|
||||
<i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
|
||||
{t(f.labelKey)}
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
{allBackends.length > 0 && (
|
||||
<SearchableSelect
|
||||
value={backendFilter}
|
||||
@@ -244,6 +311,25 @@ export default function Models() {
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Context size slider for VRAM estimates */}
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)', fontSize: '0.8125rem' }}>
|
||||
<label style={{ color: 'var(--color-text-muted)', whiteSpace: 'nowrap' }}>
|
||||
<i className="fas fa-memory" style={{ marginRight: 4 }} />
|
||||
Context:
|
||||
</label>
|
||||
<input
|
||||
type="range"
|
||||
min={0}
|
||||
max={CONTEXT_SIZES.length - 1}
|
||||
value={CONTEXT_SIZES.indexOf(contextSize)}
|
||||
onChange={(e) => setContextSize(CONTEXT_SIZES[e.target.value])}
|
||||
style={{ width: 140, accentColor: 'var(--color-primary)' }}
|
||||
/>
|
||||
<span style={{ fontWeight: 600, minWidth: '3em' }}>
|
||||
{CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Table */}
|
||||
{loading ? (
|
||||
<GalleryLoader />
|
||||
@@ -252,12 +338,12 @@ export default function Models() {
|
||||
<div className="empty-state-icon"><i className="fas fa-search" /></div>
|
||||
<h2 className="empty-state-title">{t('empty.title')}</h2>
|
||||
<p className="empty-state-text">
|
||||
{search || filter || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
|
||||
{search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
|
||||
</p>
|
||||
{(search || filter || backendFilter) && (
|
||||
{(search || filters.length > 0 || backendFilter) && (
|
||||
<button
|
||||
className="btn btn-secondary btn-sm"
|
||||
onClick={() => { handleSearch(''); setFilter(''); setBackendFilter(''); setPage(1) }}
|
||||
onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
|
||||
>
|
||||
<i className="fas fa-times" /> {t('search.clearFilters')}
|
||||
</button>
|
||||
@@ -286,9 +372,14 @@ export default function Models() {
|
||||
<tbody>
|
||||
{models.map((model, idx) => {
|
||||
const name = model.name || model.id
|
||||
const estData = estimates[name]
|
||||
const sizeDisplay = estData?.sizeDisplay
|
||||
const ctxEst = estData?.estimates?.[String(contextSize)]
|
||||
const vramDisplay = ctxEst?.vramDisplay
|
||||
const vramBytes = ctxEst?.vramBytes
|
||||
const installing = isInstalling(name)
|
||||
const progress = getOperationProgress(name)
|
||||
const fit = fitsGpu(model.estimated_vram_bytes)
|
||||
const fit = fitsGpu(vramBytes)
|
||||
const isExpanded = expandedRow === idx
|
||||
|
||||
return (
|
||||
@@ -355,15 +446,15 @@ export default function Models() {
|
||||
{/* Size / VRAM */}
|
||||
<td>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: '2px' }}>
|
||||
{(model.estimated_size_display || model.estimated_vram_display) ? (
|
||||
{(sizeDisplay || vramDisplay) ? (
|
||||
<>
|
||||
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-secondary)' }}>
|
||||
{model.estimated_size_display && model.estimated_size_display !== '0 B' && (
|
||||
<span>{t('table.size', { size: model.estimated_size_display })}</span>
|
||||
{sizeDisplay && sizeDisplay !== '0 B' && (
|
||||
<span>{t('table.size', { size: sizeDisplay })}</span>
|
||||
)}
|
||||
{model.estimated_size_display && model.estimated_size_display !== '0 B' && model.estimated_vram_display && model.estimated_vram_display !== '0 B' && ' · '}
|
||||
{model.estimated_vram_display && model.estimated_vram_display !== '0 B' && (
|
||||
<span>{t('table.vram', { vram: model.estimated_vram_display })}</span>
|
||||
{sizeDisplay && sizeDisplay !== '0 B' && vramDisplay && vramDisplay !== '0 B' && ' · '}
|
||||
{vramDisplay && vramDisplay !== '0 B' && (
|
||||
<span>{t('table.vram', { vram: vramDisplay })}</span>
|
||||
)}
|
||||
</span>
|
||||
{fit !== null && (
|
||||
@@ -437,7 +528,7 @@ export default function Models() {
|
||||
{isExpanded && (
|
||||
<tr>
|
||||
<td colSpan="8" style={{ padding: 0 }}>
|
||||
<ModelDetail model={model} fit={fit} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
|
||||
<ModelDetail model={model} fit={fit} sizeDisplay={sizeDisplay} vramDisplay={vramDisplay} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
|
||||
</td>
|
||||
</tr>
|
||||
)}
|
||||
@@ -490,7 +581,7 @@ function DetailRow({ label, children }) {
|
||||
)
|
||||
}
|
||||
|
||||
function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
|
||||
function ModelDetail({ model, fit, sizeDisplay, vramDisplay, expandedFiles, setExpandedFiles, t }) {
|
||||
const files = model.additionalFiles || model.files || []
|
||||
return (
|
||||
<div style={{ padding: 'var(--spacing-md) var(--spacing-lg)', background: 'var(--color-bg-primary)', borderTop: '1px solid var(--color-border-subtle)' }}>
|
||||
@@ -516,12 +607,12 @@ function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
|
||||
)}
|
||||
</DetailRow>
|
||||
<DetailRow label={t('detail.size')}>
|
||||
{model.estimated_size_display && model.estimated_size_display !== '0 B' ? model.estimated_size_display : null}
|
||||
{sizeDisplay && sizeDisplay !== '0 B' ? sizeDisplay : null}
|
||||
</DetailRow>
|
||||
<DetailRow label={t('detail.vram')}>
|
||||
{model.estimated_vram_display && model.estimated_vram_display !== '0 B' ? (
|
||||
{vramDisplay && vramDisplay !== '0 B' ? (
|
||||
<span style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)' }}>
|
||||
{model.estimated_vram_display}
|
||||
{vramDisplay}
|
||||
{fit !== null && (
|
||||
<span style={{ fontSize: '0.75rem', color: fit ? 'var(--color-success)' : 'var(--color-error)' }}>
|
||||
<i className="fas fa-microchip" /> {fit ? t('detail.fitsGpu') : t('detail.mayNotFitGpu')}
|
||||
|
||||
5
core/http/react-ui/src/utils/api.js
vendored
5
core/http/react-ui/src/utils/api.js
vendored
@@ -86,6 +86,10 @@ export const modelsApi = {
|
||||
listCapabilities: () => fetchJSON(API_CONFIG.endpoints.modelsCapabilities),
|
||||
install: (id) => postJSON(API_CONFIG.endpoints.installModel(id), {}),
|
||||
delete: (id) => postJSON(API_CONFIG.endpoints.deleteModel(id), {}),
|
||||
estimate: (id, contexts) => fetchJSON(
|
||||
buildUrl(API_CONFIG.endpoints.modelEstimate(id),
|
||||
contexts?.length ? { contexts: contexts.join(',') } : {})
|
||||
),
|
||||
getConfig: (id) => postJSON(API_CONFIG.endpoints.modelConfig(id), {}),
|
||||
getConfigJson: (name) => fetchJSON(API_CONFIG.endpoints.modelConfigJson(name)),
|
||||
getJob: (uid) => fetchJSON(API_CONFIG.endpoints.modelJob(uid)),
|
||||
@@ -116,6 +120,7 @@ export const modelsApi = {
|
||||
method: 'PATCH',
|
||||
body: JSON.stringify(patch),
|
||||
}),
|
||||
backendUsecases: () => fetchJSON('/api/backends/usecases'),
|
||||
}
|
||||
|
||||
// Backends API
|
||||
|
||||
1
core/http/react-ui/src/utils/config.js
vendored
1
core/http/react-ui/src/utils/config.js
vendored
@@ -9,6 +9,7 @@ export const API_CONFIG = {
|
||||
models: '/api/models',
|
||||
installModel: (id) => `/api/models/install/${id}`,
|
||||
deleteModel: (id) => `/api/models/delete/${id}`,
|
||||
modelEstimate: (id) => `/api/models/estimate/${id}`,
|
||||
modelConfig: (id) => `/api/models/config/${id}`,
|
||||
modelConfigJson: (name) => `/api/models/config-json/${name}`,
|
||||
configMetadata: '/api/models/config-metadata',
|
||||
|
||||
@@ -9,11 +9,9 @@ import (
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -37,8 +35,81 @@ const (
|
||||
licenseSortFieldName = "license"
|
||||
statusSortFieldName = "status"
|
||||
ascSortOrder = "asc"
|
||||
multimodalFilterKey = "multimodal"
|
||||
)
|
||||
|
||||
// usecaseFilters maps UI filter keys to ModelConfigUsecase flags for
|
||||
// capability-based gallery filtering.
|
||||
var usecaseFilters = map[string]config.ModelConfigUsecase{
|
||||
config.UsecaseChat: config.FLAG_CHAT,
|
||||
config.UsecaseImage: config.FLAG_IMAGE,
|
||||
config.UsecaseVideo: config.FLAG_VIDEO,
|
||||
config.UsecaseVision: config.FLAG_VISION,
|
||||
config.UsecaseTTS: config.FLAG_TTS,
|
||||
config.UsecaseTranscript: config.FLAG_TRANSCRIPT,
|
||||
config.UsecaseSoundGeneration: config.FLAG_SOUND_GENERATION,
|
||||
config.UsecaseEmbeddings: config.FLAG_EMBEDDINGS,
|
||||
config.UsecaseRerank: config.FLAG_RERANK,
|
||||
config.UsecaseDetection: config.FLAG_DETECTION,
|
||||
config.UsecaseVAD: config.FLAG_VAD,
|
||||
config.UsecaseAudioTransform: config.FLAG_AUDIO_TRANSFORM,
|
||||
config.UsecaseDiarization: config.FLAG_DIARIZATION,
|
||||
}
|
||||
|
||||
|
||||
// extractHFRepo tries to find a HuggingFace repo ID from model overrides or URLs.
|
||||
func extractHFRepo(overrides map[string]any, urls []string) string {
|
||||
if overrides != nil {
|
||||
if params, ok := overrides["parameters"].(map[string]any); ok {
|
||||
if modelRef, ok := params["model"].(string); ok {
|
||||
if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
|
||||
return repoID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, u := range urls {
|
||||
if repoID, ok := vram.ExtractHFRepoID(u); ok {
|
||||
return repoID
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// buildEstimateInput creates a vram.ModelEstimateInput from gallery model metadata.
|
||||
func buildEstimateInput(m *gallery.GalleryModel) vram.ModelEstimateInput {
|
||||
var input vram.ModelEstimateInput
|
||||
input.Size = m.Size
|
||||
if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
|
||||
input.HFRepo = hfRepoID
|
||||
}
|
||||
for _, f := range m.AdditionalFiles {
|
||||
if vram.IsWeightFile(f.URI) {
|
||||
input.Files = append(input.Files, vram.FileInput{URI: f.URI, Size: 0})
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
// parseContextSizes parses a comma-separated list of context sizes from a query param.
|
||||
// Returns a default of [8192] if the param is empty or unparseable.
|
||||
func parseContextSizes(raw string) []uint32 {
|
||||
if raw == "" {
|
||||
return []uint32{8192}
|
||||
}
|
||||
var sizes []uint32
|
||||
for _, s := range strings.Split(raw, ",") {
|
||||
s = strings.TrimSpace(s)
|
||||
if v, err := strconv.ParseUint(s, 10, 32); err == nil && v > 0 {
|
||||
sizes = append(sizes, uint32(v))
|
||||
}
|
||||
}
|
||||
if len(sizes) == 0 {
|
||||
return []uint32{8192}
|
||||
}
|
||||
return sizes
|
||||
}
|
||||
|
||||
// getDirectorySize calculates the total size of files in a directory
|
||||
// metaParentOf returns the name of the auto-resolving (meta) backend that
|
||||
// declares `name` as one of its hardware-specific variants in its
|
||||
@@ -260,7 +331,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
items = "9"
|
||||
}
|
||||
|
||||
models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
|
||||
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
|
||||
if err != nil {
|
||||
xlog.Error("could not list models from galleries", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, map[string]any{
|
||||
@@ -294,8 +365,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
}
|
||||
slices.Sort(backendNames)
|
||||
|
||||
// Filter by usecase tags (comma-separated for multi-select).
|
||||
if tag != "" {
|
||||
models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(tag)
|
||||
var combinedFlag config.ModelConfigUsecase
|
||||
hasMultimodal := false
|
||||
var plainTags []string
|
||||
for _, t := range strings.Split(tag, ",") {
|
||||
t = strings.TrimSpace(t)
|
||||
if t == multimodalFilterKey {
|
||||
hasMultimodal = true
|
||||
} else if flag, ok := usecaseFilters[t]; ok {
|
||||
combinedFlag |= flag
|
||||
} else if t != "" {
|
||||
plainTags = append(plainTags, t)
|
||||
}
|
||||
}
|
||||
if hasMultimodal {
|
||||
models = gallery.FilterGalleryModelsByMultimodal(models)
|
||||
}
|
||||
if combinedFlag != config.FLAG_ANY {
|
||||
models = gallery.FilterGalleryModelsByUsecase(models, combinedFlag)
|
||||
}
|
||||
for _, pt := range plainTags {
|
||||
models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(pt)
|
||||
}
|
||||
}
|
||||
if term != "" {
|
||||
models = gallery.GalleryElements[*gallery.GalleryModel](models).Search(term)
|
||||
@@ -355,41 +448,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
modelsJSON := make([]map[string]any, 0, len(models))
|
||||
seenIDs := make(map[string]bool)
|
||||
|
||||
weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
|
||||
extractHFRepo := func(overrides map[string]any, urls []string) string {
|
||||
// Try overrides.parameters.model first
|
||||
if overrides != nil {
|
||||
if params, ok := overrides["parameters"].(map[string]any); ok {
|
||||
if modelRef, ok := params["model"].(string); ok {
|
||||
if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
|
||||
return repoID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fall back to the first HuggingFace URL in the metadata urls list
|
||||
for _, u := range urls {
|
||||
if repoID, ok := vram.ExtractHFRepoID(u); ok {
|
||||
return repoID
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
hasWeightFiles := func(files []gallery.File) bool {
|
||||
for _, f := range files {
|
||||
ext := strings.ToLower(path.Ext(path.Base(f.URI)))
|
||||
if weightExts[ext] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const hfEstimateTimeout = 10 * time.Second
|
||||
const estimateConcurrency = 3
|
||||
sem := make(chan struct{}, estimateConcurrency)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for _, m := range models {
|
||||
modelID := m.ID()
|
||||
|
||||
@@ -431,63 +489,9 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
"backend": m.Backend,
|
||||
}
|
||||
|
||||
// Build EstimateModel input from available metadata
|
||||
var estimateInput vram.ModelEstimateInput
|
||||
estimateInput.Options = vram.EstimateOptions{ContextLength: 8192}
|
||||
estimateInput.Size = m.Size
|
||||
if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
|
||||
estimateInput.HFRepo = hfRepoID
|
||||
}
|
||||
|
||||
if hasWeightFiles(m.AdditionalFiles) {
|
||||
files := make([]gallery.File, len(m.AdditionalFiles))
|
||||
copy(files, m.AdditionalFiles)
|
||||
for _, f := range files {
|
||||
ext := strings.ToLower(path.Ext(path.Base(f.URI)))
|
||||
if weightExts[ext] {
|
||||
estimateInput.Files = append(estimateInput.Files, vram.FileInput{URI: f.URI, Size: 0})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run estimation (async for file-based and HF repo, sync for size string only)
|
||||
needsAsync := len(estimateInput.Files) > 0 || estimateInput.HFRepo != ""
|
||||
if needsAsync {
|
||||
input := estimateInput
|
||||
wg.Go(func() {
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), hfEstimateTimeout)
|
||||
defer cancel()
|
||||
result, err := vram.EstimateModel(ctx, input)
|
||||
if err == nil {
|
||||
if result.SizeBytes > 0 {
|
||||
obj["estimated_size_bytes"] = result.SizeBytes
|
||||
obj["estimated_size_display"] = result.SizeDisplay
|
||||
}
|
||||
if result.VRAMBytes > 0 {
|
||||
obj["estimated_vram_bytes"] = result.VRAMBytes
|
||||
obj["estimated_vram_display"] = result.VRAMDisplay
|
||||
}
|
||||
}
|
||||
})
|
||||
} else if estimateInput.Size != "" {
|
||||
result, _ := vram.EstimateModel(context.Background(), estimateInput)
|
||||
if result.SizeBytes > 0 {
|
||||
obj["estimated_size_bytes"] = result.SizeBytes
|
||||
obj["estimated_size_display"] = result.SizeDisplay
|
||||
}
|
||||
if result.VRAMBytes > 0 {
|
||||
obj["estimated_vram_bytes"] = result.VRAMBytes
|
||||
obj["estimated_vram_display"] = result.VRAMDisplay
|
||||
}
|
||||
}
|
||||
|
||||
modelsJSON = append(modelsJSON, obj)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
prevPage := pageNum - 1
|
||||
nextPage := pageNum + 1
|
||||
if prevPage < 1 {
|
||||
@@ -639,6 +643,65 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
})
|
||||
})
|
||||
|
||||
// Returns a mapping of backend names to the usecase filter keys they support.
|
||||
// Used by the gallery frontend to grey out usecase filter buttons when a
|
||||
// backend is selected.
|
||||
app.GET("/api/backends/usecases", func(c echo.Context) error {
|
||||
result := make(map[string][]string, len(config.BackendCapabilities))
|
||||
for name, cap := range config.BackendCapabilities {
|
||||
var keys []string
|
||||
for _, uc := range cap.PossibleUsecases {
|
||||
if _, ok := usecaseFilters[uc]; ok {
|
||||
keys = append(keys, uc)
|
||||
}
|
||||
}
|
||||
slices.Sort(keys)
|
||||
result[name] = keys
|
||||
}
|
||||
|
||||
return c.JSON(200, result)
|
||||
}, adminMiddleware)
|
||||
|
||||
// Returns VRAM/size estimates for a single gallery model at multiple
|
||||
// context sizes. The frontend calls this per-model so the gallery page
|
||||
// can load instantly and fill in estimates asynchronously.
|
||||
// Query params:
|
||||
// contexts - comma-separated context sizes (default: 8192)
|
||||
app.GET("/api/models/estimate/:id", func(c echo.Context) error {
|
||||
modelID, err := url.QueryUnescape(c.Param("id"))
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid model ID"})
|
||||
}
|
||||
|
||||
contextSizes := parseContextSizes(c.QueryParam("contexts"))
|
||||
|
||||
// Look up the model from the gallery to build the estimate input.
|
||||
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
|
||||
}
|
||||
|
||||
model := gallery.FindGalleryElement(models, modelID)
|
||||
if model == nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]any{"error": "model not found"})
|
||||
}
|
||||
|
||||
input := buildEstimateInput(model)
|
||||
if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
|
||||
return c.JSON(200, vram.MultiContextEstimate{})
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(c.Request().Context(), 10*time.Second)
|
||||
defer cancel()
|
||||
result, err := vram.EstimateModelMultiContext(ctx, input, contextSizes)
|
||||
if err != nil {
|
||||
xlog.Debug("model estimate failed", "model", modelID, "error", err)
|
||||
return c.JSON(200, vram.MultiContextEstimate{})
|
||||
}
|
||||
|
||||
return c.JSON(200, result)
|
||||
}, adminMiddleware)
|
||||
|
||||
app.POST("/api/models/install/:id", func(c echo.Context) error {
|
||||
galleryID := c.Param("id")
|
||||
// URL decode the gallery ID (e.g., "localai%40model" -> "localai@model")
|
||||
@@ -742,7 +805,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
}
|
||||
xlog.Debug("API job submitted to get config", "galleryID", galleryID)
|
||||
|
||||
models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
|
||||
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]any{
|
||||
"error": err.Error(),
|
||||
|
||||
@@ -43,17 +43,16 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
|
||||
modelsPath := sysState.Model.ModelsPath
|
||||
|
||||
var files []vram.FileInput
|
||||
var firstGGUF string
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, f := range cfg.DownloadFiles {
|
||||
addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(string(f.URI), modelsPath, &files, seen)
|
||||
}
|
||||
if cfg.Model != "" {
|
||||
addWeightFile(cfg.Model, modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(cfg.Model, modelsPath, &files, seen)
|
||||
}
|
||||
if cfg.MMProj != "" {
|
||||
addWeightFile(cfg.MMProj, modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(cfg.MMProj, modelsPath, &files, seen)
|
||||
}
|
||||
|
||||
if len(files) == 0 {
|
||||
@@ -64,39 +63,46 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
|
||||
}
|
||||
|
||||
contextDefaulted := false
|
||||
opts := vram.EstimateOptions{
|
||||
ContextLength: req.ContextSize,
|
||||
GPULayers: req.GPULayers,
|
||||
KVQuantBits: req.KVQuantBits,
|
||||
}
|
||||
if opts.ContextLength == 0 {
|
||||
ctxLen := req.ContextSize
|
||||
if ctxLen == 0 {
|
||||
if cfg.ContextSize != nil {
|
||||
opts.ContextLength = uint32(*cfg.ContextSize)
|
||||
ctxLen = uint32(*cfg.ContextSize)
|
||||
} else {
|
||||
opts.ContextLength = 8192
|
||||
ctxLen = 8192
|
||||
contextDefaulted = true
|
||||
}
|
||||
}
|
||||
|
||||
opts := vram.EstimateOptions{
|
||||
GPULayers: req.GPULayers,
|
||||
KVQuantBits: req.KVQuantBits,
|
||||
}
|
||||
|
||||
subCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
result, err := vram.Estimate(subCtx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
|
||||
multi, err := vram.EstimateMultiContext(subCtx, files, []uint32{ctxLen}, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vram estimate: %w", err)
|
||||
}
|
||||
|
||||
resp := &VRAMResponse{EstimateResult: result}
|
||||
at := multi.Estimates[fmt.Sprint(ctxLen)]
|
||||
resp := &VRAMResponse{
|
||||
EstimateResult: vram.EstimateResult{
|
||||
SizeBytes: multi.SizeBytes,
|
||||
SizeDisplay: multi.SizeDisplay,
|
||||
ContextLength: at.ContextLength,
|
||||
VRAMBytes: at.VRAMBytes,
|
||||
VRAMDisplay: at.VRAMDisplay,
|
||||
},
|
||||
ModelMaxContext: multi.ModelMaxContext,
|
||||
}
|
||||
|
||||
if contextDefaulted && firstGGUF != "" {
|
||||
ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(subCtx, firstGGUF)
|
||||
if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
|
||||
resp.ModelMaxContext = ggufMeta.MaximumContextLength
|
||||
resp.ContextNote = fmt.Sprintf(
|
||||
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
|
||||
ggufMeta.MaximumContextLength,
|
||||
)
|
||||
}
|
||||
if contextDefaulted && multi.ModelMaxContext > 0 {
|
||||
resp.ContextNote = fmt.Sprintf(
|
||||
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
|
||||
multi.ModelMaxContext,
|
||||
)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
@@ -111,8 +117,8 @@ func resolveModelURI(uri, modelsPath string) string {
|
||||
return "file://" + filepath.Join(modelsPath, uri)
|
||||
}
|
||||
|
||||
// addWeightFile appends a resolved weight file to files and tracks the first GGUF.
|
||||
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) {
|
||||
// addWeightFile appends a resolved weight file to files.
|
||||
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, seen map[string]bool) {
|
||||
if !vram.IsWeightFile(uri) {
|
||||
return
|
||||
}
|
||||
@@ -122,7 +128,4 @@ func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *s
|
||||
}
|
||||
seen[resolved] = true
|
||||
*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
|
||||
if *firstGGUF == "" && vram.IsGGUF(uri) {
|
||||
*firstGGUF = resolved
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,10 +628,14 @@ func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptio
|
||||
estCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
ctxSize := uint32(opts.ContextSize)
|
||||
if ctxSize == 0 {
|
||||
ctxSize = 8192
|
||||
}
|
||||
|
||||
input := vram.ModelEstimateInput{
|
||||
Options: vram.EstimateOptions{
|
||||
ContextLength: uint32(opts.ContextSize),
|
||||
GPULayers: int(opts.NGPULayers),
|
||||
GPULayers: int(opts.NGPULayers),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -649,28 +653,15 @@ func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptio
|
||||
}
|
||||
}
|
||||
|
||||
// If model file exists, get its size as fallback
|
||||
if opts.ModelFile != "" && len(input.Files) == 0 {
|
||||
if info, err := os.Stat(opts.ModelFile); err == nil {
|
||||
return vram.EstimateFromSize(uint64(info.Size())).VRAMBytes
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
result, err := vram.EstimateModel(estCtx, input)
|
||||
if err != nil || result.VRAMBytes == 0 {
|
||||
// Last resort: try model file size
|
||||
if opts.ModelFile != "" {
|
||||
if info, statErr := os.Stat(opts.ModelFile); statErr == nil {
|
||||
return vram.EstimateFromSize(uint64(info.Size())).VRAMBytes
|
||||
}
|
||||
}
|
||||
result, err := vram.EstimateModelMultiContext(estCtx, input, []uint32{ctxSize})
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return result.VRAMBytes
|
||||
return result.VRAMForContext(ctxSize)
|
||||
}
|
||||
|
||||
// installBackendOnNode sends a NATS backend.install request-reply to the node.
|
||||
|
||||
Reference in New Issue
Block a user