feat(gallery): Speed up load times and clean gallery entries (#9211)

* feat: Rework VRAM estimation and use known_usecases in gallery

Signed-off-by: Richard Palethorpe <io@richiejp.com>
Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code]

* chore(gallery): regenerate gallery index and add known_usecases to model entries

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
Richard Palethorpe
2026-05-06 13:51:38 +01:00
committed by GitHub
parent 6d56bf98fe
commit 969005b2a1
47 changed files with 17089 additions and 5345 deletions

View File

@@ -116,13 +116,13 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
capability := strings.TrimPrefix(provider, "models:")
var filterFn config.ModelConfigFilterFn
switch capability {
case "chat":
case config.UsecaseChat:
filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
case "tts":
case config.UsecaseTTS:
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
case "vad":
case config.UsecaseVAD:
filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
case "transcript":
case config.UsecaseTranscript:
filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
default:
filterFn = config.NoFilterFn

View File

@@ -77,18 +77,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
}
estCtx, cancel := context.WithTimeout(c.Request().Context(), 5*time.Second)
defer cancel()
result, err := vram.EstimateModel(estCtx, vram.ModelEstimateInput{
Files: files,
Options: vram.EstimateOptions{ContextLength: 8192},
})
result, err := vram.EstimateModelMultiContext(estCtx, vram.ModelEstimateInput{
Files: files,
}, []uint32{8192})
if err == nil {
if result.SizeBytes > 0 {
resp.EstimatedSizeBytes = result.SizeBytes
resp.EstimatedSizeDisplay = result.SizeDisplay
}
if result.VRAMBytes > 0 {
resp.EstimatedVRAMBytes = result.VRAMBytes
resp.EstimatedVRAMDisplay = result.VRAMDisplay
if v := result.VRAMForContext(8192); v > 0 {
resp.EstimatedVRAMBytes = v
resp.EstimatedVRAMDisplay = vram.FormatBytes(v)
}
}
}

View File

@@ -9,10 +9,9 @@ import (
)
// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
// installed model configuration. For uninstalled models (gallery URLs), use
// the gallery-level estimates in /api/models instead.
// installed model configuration at multiple context sizes.
// @Summary Estimate VRAM usage for a model
// @Description Estimates VRAM based on model weight files, context size, and GPU layers
// @Description Estimates VRAM based on model weight files at multiple context sizes
// @Tags config
// @Accept json
// @Produce json

View File

@@ -121,13 +121,13 @@ var _ = Describe("VRAM Estimate Endpoint", func() {
Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
// The response should have non-zero size and vram estimates.
// JSON numbers unmarshal as float64.
sizeBytes, ok := resp["sizeBytes"].(float64)
Expect(ok).To(BeTrue(), "sizeBytes should be a number, got: %v (response: %s)", resp["sizeBytes"], rec.Body.String())
sizeBytes, ok := resp["size_bytes"].(float64)
Expect(ok).To(BeTrue(), "size_bytes should be a number, got: %v (response: %s)", resp["size_bytes"], rec.Body.String())
Expect(sizeBytes).To(BeNumerically(">", 0))
vramBytes, ok := resp["vramBytes"].(float64)
Expect(ok).To(BeTrue(), "vramBytes should be a number")
vramBytes, ok := resp["vram_bytes"].(float64)
Expect(ok).To(BeTrue(), "vram_bytes should be a number")
Expect(vramBytes).To(BeNumerically(">", 0))
Expect(resp["sizeDisplay"]).NotTo(BeEmpty())
Expect(resp["vramDisplay"]).NotTo(BeEmpty())
Expect(resp["size_display"]).NotTo(BeEmpty())
Expect(resp["vram_display"]).NotTo(BeEmpty())
})
})

View File

@@ -2,13 +2,13 @@ import { test, expect } from '@playwright/test'
const MOCK_MODELS_RESPONSE = {
models: [
{ name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['llm'] },
{ name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['stt'] },
{ name: 'llama-model', description: 'A llama model', backend: 'llama-cpp', installed: false, tags: ['chat'] },
{ name: 'whisper-model', description: 'A whisper model', backend: 'whisper', installed: true, tags: ['transcript'] },
{ name: 'stablediffusion-model', description: 'An image model', backend: 'stablediffusion', installed: false, tags: ['sd'] },
{ name: 'unknown-model', description: 'No backend', backend: '', installed: false, tags: [] },
],
allBackends: ['llama-cpp', 'stablediffusion', 'whisper'],
allTags: ['llm', 'sd', 'stt'],
allTags: ['chat', 'sd', 'transcript'],
availableModels: 4,
installedModels: 1,
totalPages: 1,
@@ -78,3 +78,121 @@ test.describe('Models Gallery - Backend Features', () => {
await expect(detail.locator('text=llama-cpp')).toBeVisible()
})
})
const BACKEND_USECASES_MOCK = {
'llama-cpp': ['chat', 'embeddings', 'vision'],
'whisper': ['transcript'],
'stablediffusion': ['image'],
}
test.describe('Models Gallery - Multi-select Filters', () => {
test.beforeEach(async ({ page }) => {
await page.route('**/api/models*', (route) => {
route.fulfill({
contentType: 'application/json',
body: JSON.stringify(MOCK_MODELS_RESPONSE),
})
})
await page.route('**/api/backends/usecases', (route) => {
route.fulfill({
contentType: 'application/json',
body: JSON.stringify(BACKEND_USECASES_MOCK),
})
})
await page.goto('/app/models')
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
})
test('multi-select toggle: click Chat, TTS, then Chat again', async ({ page }) => {
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
await chatBtn.click()
await expect(chatBtn).toHaveClass(/active/)
await ttsBtn.click()
await expect(chatBtn).toHaveClass(/active/)
await expect(ttsBtn).toHaveClass(/active/)
// Click Chat again to deselect it
await chatBtn.click()
await expect(chatBtn).not.toHaveClass(/active/)
await expect(ttsBtn).toHaveClass(/active/)
})
test('"All" clears selection', async ({ page }) => {
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
const allBtn = page.locator('.filter-btn', { hasText: 'All' })
await chatBtn.click()
await expect(chatBtn).toHaveClass(/active/)
await allBtn.click()
await expect(allBtn).toHaveClass(/active/)
await expect(chatBtn).not.toHaveClass(/active/)
})
test('query param sent correctly with multiple filters', async ({ page }) => {
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
// Click Chat and wait for its request to settle
await chatBtn.click()
await page.waitForResponse(resp => resp.url().includes('/api/models'))
// Now click TTS and capture the resulting request
const [request] = await Promise.all([
page.waitForRequest(req => {
if (!req.url().includes('/api/models')) return false
const u = new URL(req.url())
const tag = u.searchParams.get('tag')
return tag && tag.split(',').length >= 2
}),
ttsBtn.click(),
])
const url = new URL(request.url())
const tags = url.searchParams.get('tag').split(',').sort()
expect(tags).toEqual(['chat', 'tts'])
})
test('backend greys out unavailable filters', async ({ page }) => {
// Select llama-cpp backend via dropdown
await page.locator('button', { hasText: 'All Backends' }).click()
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
await dropdown.locator('text=llama-cpp').click()
// Wait for filter state to update
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
const sttBtn = page.locator('.filter-btn', { hasText: 'STT' })
const imageBtn = page.locator('.filter-btn', { hasText: 'Image' })
// TTS, STT, Image should be disabled for llama-cpp
await expect(ttsBtn).toBeDisabled()
await expect(sttBtn).toBeDisabled()
await expect(imageBtn).toBeDisabled()
// Chat, Embeddings, Vision should remain enabled
const chatBtn = page.locator('.filter-btn', { hasText: 'Chat' })
const embBtn = page.locator('.filter-btn', { hasText: 'Embeddings' })
const visBtn = page.locator('.filter-btn', { hasText: 'Vision' })
await expect(chatBtn).toBeEnabled()
await expect(embBtn).toBeEnabled()
await expect(visBtn).toBeEnabled()
})
test('backend clears incompatible filters', async ({ page }) => {
// Select TTS filter first
const ttsBtn = page.locator('.filter-btn', { hasText: 'TTS' })
await ttsBtn.click()
await expect(ttsBtn).toHaveClass(/active/)
// Now select llama-cpp backend (which doesn't support TTS)
await page.locator('button', { hasText: 'All Backends' }).click()
const dropdown = page.locator('input[placeholder="Search backends..."]').locator('..').locator('..')
await dropdown.locator('text=llama-cpp').click()
// TTS should be auto-removed from selection
await expect(ttsBtn).not.toHaveClass(/active/)
})
})

View File

@@ -20,6 +20,7 @@
"vision": "Vision",
"tts": "TTS",
"stt": "STT",
"diarization": "Diarisierung",
"embedding": "Embedding",
"rerank": "Rerank",
"allBackends": "Alle Backends",

View File

@@ -14,14 +14,20 @@
},
"filters": {
"all": "All",
"llm": "LLM",
"llm": "Chat",
"image": "Image",
"video": "Video",
"multimodal": "Multimodal",
"vision": "Vision",
"tts": "TTS",
"stt": "STT",
"embedding": "Embedding",
"diarization": "Diarization",
"soundGen": "Sound",
"audioTransform": "Audio FX",
"embedding": "Embeddings",
"rerank": "Rerank",
"detection": "Detection",
"vad": "VAD",
"allBackends": "All Backends",
"searchBackends": "Search backends..."
},

View File

@@ -20,6 +20,7 @@
"vision": "Visión",
"tts": "TTS",
"stt": "STT",
"diarization": "Diarización",
"embedding": "Embedding",
"rerank": "Rerank",
"allBackends": "Todos los backends",

View File

@@ -20,6 +20,7 @@
"vision": "Visione",
"tts": "TTS",
"stt": "STT",
"diarization": "Diarizzazione",
"embedding": "Embedding",
"rerank": "Rerank",
"allBackends": "Tutti i backend",

View File

@@ -20,6 +20,7 @@
"vision": "视觉",
"tts": "TTS",
"stt": "STT",
"diarization": "说话人分离",
"embedding": "嵌入",
"rerank": "重排",
"allBackends": "所有后端",

View File

@@ -296,11 +296,11 @@ export default function Backends() {
const FILTERS = [
{ key: '', label: 'All', icon: 'fa-layer-group' },
{ key: 'llm', label: 'LLM', icon: 'fa-brain' },
{ key: 'chat', label: 'Chat', icon: 'fa-brain' },
{ key: 'image', label: 'Image', icon: 'fa-image' },
{ key: 'video', label: 'Video', icon: 'fa-video' },
{ key: 'tts', label: 'TTS', icon: 'fa-microphone' },
{ key: 'stt', label: 'STT', icon: 'fa-headphones' },
{ key: 'transcript', label: 'STT', icon: 'fa-headphones' },
{ key: 'vision', label: 'Vision', icon: 'fa-eye' },
]

View File

@@ -11,16 +11,26 @@ import GalleryLoader from '../components/GalleryLoader'
import React from 'react'
const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
const FILTERS = [
{ key: '', labelKey: 'filters.all', icon: 'fa-layer-group' },
{ key: 'llm', labelKey: 'filters.llm', icon: 'fa-brain' },
{ key: 'sd', labelKey: 'filters.image', icon: 'fa-image' },
{ key: 'chat', labelKey: 'filters.llm', icon: 'fa-brain' },
{ key: 'image', labelKey: 'filters.image', icon: 'fa-image' },
{ key: 'video', labelKey: 'filters.video', icon: 'fa-video' },
{ key: 'multimodal', labelKey: 'filters.multimodal', icon: 'fa-shapes' },
{ key: 'vision', labelKey: 'filters.vision', icon: 'fa-eye' },
{ key: 'tts', labelKey: 'filters.tts', icon: 'fa-microphone' },
{ key: 'stt', labelKey: 'filters.stt', icon: 'fa-headphones' },
{ key: 'embedding', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
{ key: 'reranker', labelKey: 'filters.rerank', icon: 'fa-sort' },
{ key: 'transcript', labelKey: 'filters.stt', icon: 'fa-headphones' },
{ key: 'diarization', labelKey: 'filters.diarization', icon: 'fa-users' },
{ key: 'sound_generation', labelKey: 'filters.soundGen', icon: 'fa-music' },
{ key: 'audio_transform', labelKey: 'filters.audioTransform', icon: 'fa-sliders' },
{ key: 'embeddings', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
{ key: 'rerank', labelKey: 'filters.rerank', icon: 'fa-sort' },
{ key: 'detection', labelKey: 'filters.detection', icon: 'fa-bullseye' },
{ key: 'vad', labelKey: 'filters.vad', icon: 'fa-wave-square' },
]
export default function Models() {
@@ -34,7 +44,7 @@ export default function Models() {
const [page, setPage] = useState(1)
const [totalPages, setTotalPages] = useState(1)
const [search, setSearch] = useState('')
const [filter, setFilter] = useState('')
const [filters, setFilters] = useState([])
const [sort, setSort] = useState('')
const [order, setOrder] = useState('asc')
const [installing, setInstalling] = useState(new Map())
@@ -43,6 +53,9 @@ export default function Models() {
const [stats, setStats] = useState({ total: 0, installed: 0, repositories: 0 })
const [backendFilter, setBackendFilter] = useState('')
const [allBackends, setAllBackends] = useState([])
const [backendUsecases, setBackendUsecases] = useState({})
const [estimates, setEstimates] = useState({})
const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
const [confirmDialog, setConfirmDialog] = useState(null)
// Total GPU memory for "fits" check
@@ -52,14 +65,14 @@ export default function Models() {
try {
setLoading(true)
const searchVal = params.search !== undefined ? params.search : search
const filterVal = params.filter !== undefined ? params.filter : filter
const filtersVal = params.filters !== undefined ? params.filters : filters
const sortVal = params.sort !== undefined ? params.sort : sort
const backendVal = params.backendFilter !== undefined ? params.backendFilter : backendFilter
const queryParams = {
page: params.page || page,
items: 9,
}
if (filterVal) queryParams.tag = filterVal
if (filtersVal.length > 0) queryParams.tag = filtersVal.join(',')
if (searchVal) queryParams.term = searchVal
if (backendVal) queryParams.backend = backendVal
if (sortVal) {
@@ -79,11 +92,27 @@ export default function Models() {
} finally {
setLoading(false)
}
}, [page, search, filter, sort, order, backendFilter, addToast, t])
}, [page, search, filters, sort, order, backendFilter, addToast, t])
useEffect(() => {
fetchModels()
}, [page, filter, sort, order, backendFilter])
}, [page, filters, sort, order, backendFilter])
// Fetch backend→usecase mapping once on mount
useEffect(() => {
modelsApi.backendUsecases().then(setBackendUsecases).catch(() => {})
}, [])
// When backend changes, remove selected filters that aren't available
useEffect(() => {
if (backendFilter && backendUsecases[backendFilter]) {
setFilters(prev => {
const possible = backendUsecases[backendFilter]
const filtered = prev.filter(k => k === 'multimodal' || possible.includes(k))
return filtered.length !== prev.length ? filtered : prev
})
}
}, [backendFilter, backendUsecases])
// Re-fetch when operations change (install/delete completion)
useEffect(() => {
@@ -95,11 +124,42 @@ export default function Models() {
fetchModels({ search: value, page: 1 })
})
// Fetch VRAM/size estimates asynchronously for visible models.
useEffect(() => {
if (models.length === 0) return
let cancelled = false
models.forEach(model => {
const id = model.name || model.id
if (estimates[id]) return
modelsApi.estimate(id, CONTEXT_SIZES).then(est => {
if (cancelled) return
if (est && (est.sizeBytes || est.estimates)) {
setEstimates(prev => ({ ...prev, [id]: est }))
}
}).catch(() => {})
})
return () => { cancelled = true }
}, [models])
const handleSearch = (value) => {
setSearch(value)
debouncedFetch(value)
}
const toggleFilter = (key) => {
if (key === '') { setFilters([]); setPage(1); return }
setFilters(prev =>
prev.includes(key) ? prev.filter(k => k !== key) : [...prev, key]
)
setPage(1)
}
const isFilterAvailable = (key) => {
if (!backendFilter || key === '' || key === 'multimodal') return true
const possible = backendUsecases[backendFilter]
return !possible || possible.includes(key)
}
const handleSort = (col) => {
if (sort === col) {
setOrder(o => o === 'asc' ? 'desc' : 'asc')
@@ -221,16 +281,23 @@ export default function Models() {
{/* Filter buttons */}
<div className="filter-bar">
{FILTERS.map(f => (
<button
key={f.key}
className={`filter-btn ${filter === f.key ? 'active' : ''}`}
onClick={() => { setFilter(f.key); setPage(1) }}
>
<i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
{t(f.labelKey)}
</button>
))}
{FILTERS.map(f => {
const isAll = f.key === ''
const active = isAll ? filters.length === 0 : filters.includes(f.key)
const available = isFilterAvailable(f.key)
return (
<button
key={f.key}
className={`filter-btn ${active ? 'active' : ''}`}
disabled={!available}
style={!available ? { opacity: 0.4, cursor: 'not-allowed' } : undefined}
onClick={() => toggleFilter(f.key)}
>
<i className={`fas ${f.icon}`} style={{ marginRight: 4 }} />
{t(f.labelKey)}
</button>
)
})}
{allBackends.length > 0 && (
<SearchableSelect
value={backendFilter}
@@ -244,6 +311,25 @@ export default function Models() {
)}
</div>
{/* Context size slider for VRAM estimates */}
<div style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)', fontSize: '0.8125rem' }}>
<label style={{ color: 'var(--color-text-muted)', whiteSpace: 'nowrap' }}>
<i className="fas fa-memory" style={{ marginRight: 4 }} />
Context:
</label>
<input
type="range"
min={0}
max={CONTEXT_SIZES.length - 1}
value={CONTEXT_SIZES.indexOf(contextSize)}
onChange={(e) => setContextSize(CONTEXT_SIZES[e.target.value])}
style={{ width: 140, accentColor: 'var(--color-primary)' }}
/>
<span style={{ fontWeight: 600, minWidth: '3em' }}>
{CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
</span>
</div>
{/* Table */}
{loading ? (
<GalleryLoader />
@@ -252,12 +338,12 @@ export default function Models() {
<div className="empty-state-icon"><i className="fas fa-search" /></div>
<h2 className="empty-state-title">{t('empty.title')}</h2>
<p className="empty-state-text">
{search || filter || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
{search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
</p>
{(search || filter || backendFilter) && (
{(search || filters.length > 0 || backendFilter) && (
<button
className="btn btn-secondary btn-sm"
onClick={() => { handleSearch(''); setFilter(''); setBackendFilter(''); setPage(1) }}
onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
>
<i className="fas fa-times" /> {t('search.clearFilters')}
</button>
@@ -286,9 +372,14 @@ export default function Models() {
<tbody>
{models.map((model, idx) => {
const name = model.name || model.id
const estData = estimates[name]
const sizeDisplay = estData?.sizeDisplay
const ctxEst = estData?.estimates?.[String(contextSize)]
const vramDisplay = ctxEst?.vramDisplay
const vramBytes = ctxEst?.vramBytes
const installing = isInstalling(name)
const progress = getOperationProgress(name)
const fit = fitsGpu(model.estimated_vram_bytes)
const fit = fitsGpu(vramBytes)
const isExpanded = expandedRow === idx
return (
@@ -355,15 +446,15 @@ export default function Models() {
{/* Size / VRAM */}
<td>
<div style={{ display: 'flex', flexDirection: 'column', gap: '2px' }}>
{(model.estimated_size_display || model.estimated_vram_display) ? (
{(sizeDisplay || vramDisplay) ? (
<>
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-secondary)' }}>
{model.estimated_size_display && model.estimated_size_display !== '0 B' && (
<span>{t('table.size', { size: model.estimated_size_display })}</span>
{sizeDisplay && sizeDisplay !== '0 B' && (
<span>{t('table.size', { size: sizeDisplay })}</span>
)}
{model.estimated_size_display && model.estimated_size_display !== '0 B' && model.estimated_vram_display && model.estimated_vram_display !== '0 B' && ' · '}
{model.estimated_vram_display && model.estimated_vram_display !== '0 B' && (
<span>{t('table.vram', { vram: model.estimated_vram_display })}</span>
{sizeDisplay && sizeDisplay !== '0 B' && vramDisplay && vramDisplay !== '0 B' && ' · '}
{vramDisplay && vramDisplay !== '0 B' && (
<span>{t('table.vram', { vram: vramDisplay })}</span>
)}
</span>
{fit !== null && (
@@ -437,7 +528,7 @@ export default function Models() {
{isExpanded && (
<tr>
<td colSpan="8" style={{ padding: 0 }}>
<ModelDetail model={model} fit={fit} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
<ModelDetail model={model} fit={fit} sizeDisplay={sizeDisplay} vramDisplay={vramDisplay} expandedFiles={expandedFiles} setExpandedFiles={setExpandedFiles} t={t} />
</td>
</tr>
)}
@@ -490,7 +581,7 @@ function DetailRow({ label, children }) {
)
}
function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
function ModelDetail({ model, fit, sizeDisplay, vramDisplay, expandedFiles, setExpandedFiles, t }) {
const files = model.additionalFiles || model.files || []
return (
<div style={{ padding: 'var(--spacing-md) var(--spacing-lg)', background: 'var(--color-bg-primary)', borderTop: '1px solid var(--color-border-subtle)' }}>
@@ -516,12 +607,12 @@ function ModelDetail({ model, fit, expandedFiles, setExpandedFiles, t }) {
)}
</DetailRow>
<DetailRow label={t('detail.size')}>
{model.estimated_size_display && model.estimated_size_display !== '0 B' ? model.estimated_size_display : null}
{sizeDisplay && sizeDisplay !== '0 B' ? sizeDisplay : null}
</DetailRow>
<DetailRow label={t('detail.vram')}>
{model.estimated_vram_display && model.estimated_vram_display !== '0 B' ? (
{vramDisplay && vramDisplay !== '0 B' ? (
<span style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)' }}>
{model.estimated_vram_display}
{vramDisplay}
{fit !== null && (
<span style={{ fontSize: '0.75rem', color: fit ? 'var(--color-success)' : 'var(--color-error)' }}>
<i className="fas fa-microchip" /> {fit ? t('detail.fitsGpu') : t('detail.mayNotFitGpu')}

View File

@@ -86,6 +86,10 @@ export const modelsApi = {
listCapabilities: () => fetchJSON(API_CONFIG.endpoints.modelsCapabilities),
install: (id) => postJSON(API_CONFIG.endpoints.installModel(id), {}),
delete: (id) => postJSON(API_CONFIG.endpoints.deleteModel(id), {}),
estimate: (id, contexts) => fetchJSON(
buildUrl(API_CONFIG.endpoints.modelEstimate(id),
contexts?.length ? { contexts: contexts.join(',') } : {})
),
getConfig: (id) => postJSON(API_CONFIG.endpoints.modelConfig(id), {}),
getConfigJson: (name) => fetchJSON(API_CONFIG.endpoints.modelConfigJson(name)),
getJob: (uid) => fetchJSON(API_CONFIG.endpoints.modelJob(uid)),
@@ -116,6 +120,7 @@ export const modelsApi = {
method: 'PATCH',
body: JSON.stringify(patch),
}),
backendUsecases: () => fetchJSON('/api/backends/usecases'),
}
// Backends API

View File

@@ -9,6 +9,7 @@ export const API_CONFIG = {
models: '/api/models',
installModel: (id) => `/api/models/install/${id}`,
deleteModel: (id) => `/api/models/delete/${id}`,
modelEstimate: (id) => `/api/models/estimate/${id}`,
modelConfig: (id) => `/api/models/config/${id}`,
modelConfigJson: (name) => `/api/models/config-json/${name}`,
configMetadata: '/api/models/config-metadata',

View File

@@ -9,11 +9,9 @@ import (
"math"
"net/http"
"net/url"
"path"
"slices"
"strconv"
"strings"
"sync"
"time"
"github.com/google/uuid"
@@ -37,8 +35,81 @@ const (
licenseSortFieldName = "license"
statusSortFieldName = "status"
ascSortOrder = "asc"
multimodalFilterKey = "multimodal"
)
// usecaseFilters maps UI filter keys to ModelConfigUsecase flags for
// capability-based gallery filtering.
var usecaseFilters = map[string]config.ModelConfigUsecase{
config.UsecaseChat: config.FLAG_CHAT,
config.UsecaseImage: config.FLAG_IMAGE,
config.UsecaseVideo: config.FLAG_VIDEO,
config.UsecaseVision: config.FLAG_VISION,
config.UsecaseTTS: config.FLAG_TTS,
config.UsecaseTranscript: config.FLAG_TRANSCRIPT,
config.UsecaseSoundGeneration: config.FLAG_SOUND_GENERATION,
config.UsecaseEmbeddings: config.FLAG_EMBEDDINGS,
config.UsecaseRerank: config.FLAG_RERANK,
config.UsecaseDetection: config.FLAG_DETECTION,
config.UsecaseVAD: config.FLAG_VAD,
config.UsecaseAudioTransform: config.FLAG_AUDIO_TRANSFORM,
config.UsecaseDiarization: config.FLAG_DIARIZATION,
}
// extractHFRepo tries to find a HuggingFace repo ID from model overrides or URLs.
func extractHFRepo(overrides map[string]any, urls []string) string {
if overrides != nil {
if params, ok := overrides["parameters"].(map[string]any); ok {
if modelRef, ok := params["model"].(string); ok {
if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
return repoID
}
}
}
}
for _, u := range urls {
if repoID, ok := vram.ExtractHFRepoID(u); ok {
return repoID
}
}
return ""
}
// buildEstimateInput creates a vram.ModelEstimateInput from gallery model metadata.
func buildEstimateInput(m *gallery.GalleryModel) vram.ModelEstimateInput {
var input vram.ModelEstimateInput
input.Size = m.Size
if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
input.HFRepo = hfRepoID
}
for _, f := range m.AdditionalFiles {
if vram.IsWeightFile(f.URI) {
input.Files = append(input.Files, vram.FileInput{URI: f.URI, Size: 0})
}
}
return input
}
// parseContextSizes parses a comma-separated list of context sizes from a query param.
// Returns a default of [8192] if the param is empty or unparseable.
func parseContextSizes(raw string) []uint32 {
if raw == "" {
return []uint32{8192}
}
var sizes []uint32
for _, s := range strings.Split(raw, ",") {
s = strings.TrimSpace(s)
if v, err := strconv.ParseUint(s, 10, 32); err == nil && v > 0 {
sizes = append(sizes, uint32(v))
}
}
if len(sizes) == 0 {
return []uint32{8192}
}
return sizes
}
// getDirectorySize calculates the total size of files in a directory
// metaParentOf returns the name of the auto-resolving (meta) backend that
// declares `name` as one of its hardware-specific variants in its
@@ -260,7 +331,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
items = "9"
}
models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
if err != nil {
xlog.Error("could not list models from galleries", "error", err)
return c.JSON(http.StatusInternalServerError, map[string]any{
@@ -294,8 +365,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
slices.Sort(backendNames)
// Filter by usecase tags (comma-separated for multi-select).
if tag != "" {
models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(tag)
var combinedFlag config.ModelConfigUsecase
hasMultimodal := false
var plainTags []string
for _, t := range strings.Split(tag, ",") {
t = strings.TrimSpace(t)
if t == multimodalFilterKey {
hasMultimodal = true
} else if flag, ok := usecaseFilters[t]; ok {
combinedFlag |= flag
} else if t != "" {
plainTags = append(plainTags, t)
}
}
if hasMultimodal {
models = gallery.FilterGalleryModelsByMultimodal(models)
}
if combinedFlag != config.FLAG_ANY {
models = gallery.FilterGalleryModelsByUsecase(models, combinedFlag)
}
for _, pt := range plainTags {
models = gallery.GalleryElements[*gallery.GalleryModel](models).FilterByTag(pt)
}
}
if term != "" {
models = gallery.GalleryElements[*gallery.GalleryModel](models).Search(term)
@@ -355,41 +448,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
modelsJSON := make([]map[string]any, 0, len(models))
seenIDs := make(map[string]bool)
weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
extractHFRepo := func(overrides map[string]any, urls []string) string {
// Try overrides.parameters.model first
if overrides != nil {
if params, ok := overrides["parameters"].(map[string]any); ok {
if modelRef, ok := params["model"].(string); ok {
if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
return repoID
}
}
}
}
// Fall back to the first HuggingFace URL in the metadata urls list
for _, u := range urls {
if repoID, ok := vram.ExtractHFRepoID(u); ok {
return repoID
}
}
return ""
}
hasWeightFiles := func(files []gallery.File) bool {
for _, f := range files {
ext := strings.ToLower(path.Ext(path.Base(f.URI)))
if weightExts[ext] {
return true
}
}
return false
}
const hfEstimateTimeout = 10 * time.Second
const estimateConcurrency = 3
sem := make(chan struct{}, estimateConcurrency)
var wg sync.WaitGroup
for _, m := range models {
modelID := m.ID()
@@ -431,63 +489,9 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
"backend": m.Backend,
}
// Build EstimateModel input from available metadata
var estimateInput vram.ModelEstimateInput
estimateInput.Options = vram.EstimateOptions{ContextLength: 8192}
estimateInput.Size = m.Size
if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
estimateInput.HFRepo = hfRepoID
}
if hasWeightFiles(m.AdditionalFiles) {
files := make([]gallery.File, len(m.AdditionalFiles))
copy(files, m.AdditionalFiles)
for _, f := range files {
ext := strings.ToLower(path.Ext(path.Base(f.URI)))
if weightExts[ext] {
estimateInput.Files = append(estimateInput.Files, vram.FileInput{URI: f.URI, Size: 0})
}
}
}
// Run estimation (async for file-based and HF repo, sync for size string only)
needsAsync := len(estimateInput.Files) > 0 || estimateInput.HFRepo != ""
if needsAsync {
input := estimateInput
wg.Go(func() {
sem <- struct{}{}
defer func() { <-sem }()
ctx, cancel := context.WithTimeout(context.Background(), hfEstimateTimeout)
defer cancel()
result, err := vram.EstimateModel(ctx, input)
if err == nil {
if result.SizeBytes > 0 {
obj["estimated_size_bytes"] = result.SizeBytes
obj["estimated_size_display"] = result.SizeDisplay
}
if result.VRAMBytes > 0 {
obj["estimated_vram_bytes"] = result.VRAMBytes
obj["estimated_vram_display"] = result.VRAMDisplay
}
}
})
} else if estimateInput.Size != "" {
result, _ := vram.EstimateModel(context.Background(), estimateInput)
if result.SizeBytes > 0 {
obj["estimated_size_bytes"] = result.SizeBytes
obj["estimated_size_display"] = result.SizeDisplay
}
if result.VRAMBytes > 0 {
obj["estimated_vram_bytes"] = result.VRAMBytes
obj["estimated_vram_display"] = result.VRAMDisplay
}
}
modelsJSON = append(modelsJSON, obj)
}
wg.Wait()
prevPage := pageNum - 1
nextPage := pageNum + 1
if prevPage < 1 {
@@ -639,6 +643,65 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
})
})
// Returns a mapping of backend names to the usecase filter keys they support.
// Used by the gallery frontend to grey out usecase filter buttons when a
// backend is selected.
app.GET("/api/backends/usecases", func(c echo.Context) error {
result := make(map[string][]string, len(config.BackendCapabilities))
for name, cap := range config.BackendCapabilities {
var keys []string
for _, uc := range cap.PossibleUsecases {
if _, ok := usecaseFilters[uc]; ok {
keys = append(keys, uc)
}
}
slices.Sort(keys)
result[name] = keys
}
return c.JSON(200, result)
}, adminMiddleware)
// Returns VRAM/size estimates for a single gallery model at multiple
// context sizes. The frontend calls this per-model so the gallery page
// can load instantly and fill in estimates asynchronously.
// Query params:
// contexts - comma-separated context sizes (default: 8192)
app.GET("/api/models/estimate/:id", func(c echo.Context) error {
modelID, err := url.QueryUnescape(c.Param("id"))
if err != nil {
return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid model ID"})
}
contextSizes := parseContextSizes(c.QueryParam("contexts"))
// Look up the model from the gallery to build the estimate input.
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
}
model := gallery.FindGalleryElement(models, modelID)
if model == nil {
return c.JSON(http.StatusNotFound, map[string]any{"error": "model not found"})
}
input := buildEstimateInput(model)
if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
return c.JSON(200, vram.MultiContextEstimate{})
}
ctx, cancel := context.WithTimeout(c.Request().Context(), 10*time.Second)
defer cancel()
result, err := vram.EstimateModelMultiContext(ctx, input, contextSizes)
if err != nil {
xlog.Debug("model estimate failed", "model", modelID, "error", err)
return c.JSON(200, vram.MultiContextEstimate{})
}
return c.JSON(200, result)
}, adminMiddleware)
app.POST("/api/models/install/:id", func(c echo.Context) error {
galleryID := c.Param("id")
// URL decode the gallery ID (e.g., "localai%40model" -> "localai@model")
@@ -742,7 +805,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
xlog.Debug("API job submitted to get config", "galleryID", galleryID)
models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
models, err := gallery.AvailableGalleryModelsCached(appConfig.Galleries, appConfig.SystemState)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]any{
"error": err.Error(),