UI: add 'Fits in my GPU' filter on Install Models (#10017)

* feat(ui): add GPU fit filter on models install page

* Delete docs/vram-fits-filter-backend-optionals.md

Signed-off-by: Siddharth More <siddimore@gmail.com>

---------

Signed-off-by: Siddharth More <siddimore@gmail.com>
This commit is contained in:
Siddharth More
2026-05-27 06:17:44 -07:00
committed by GitHub
parent 4d01298048
commit c9a1a7e6a0
7 changed files with 152 additions and 5 deletions

View File

@@ -15,6 +15,62 @@ const MOCK_MODELS_RESPONSE = {
currentPage: 1,
}
const MOCK_GPU_RESOURCES_RESPONSE = {
type: 'gpu',
available: true,
gpus: [
{
index: 0,
name: 'Mock GPU',
vendor: 'nvidia',
total_vram: 12 * 1024 * 1024 * 1024,
used_vram: 2 * 1024 * 1024 * 1024,
free_vram: 10 * 1024 * 1024 * 1024,
usage_percent: 16.7,
},
],
aggregate: {
total_memory: 12 * 1024 * 1024 * 1024,
used_memory: 2 * 1024 * 1024 * 1024,
free_memory: 10 * 1024 * 1024 * 1024,
usage_percent: 16.7,
gpu_count: 1,
},
}
const MOCK_ESTIMATES = {
'llama-model': {
sizeBytes: 4 * 1024 * 1024 * 1024,
sizeDisplay: '4.00 GB',
estimates: {
'8192': {
vramBytes: 8 * 1024 * 1024 * 1024,
vramDisplay: '8.00 GB',
},
},
},
'whisper-model': {
sizeBytes: 1 * 1024 * 1024 * 1024,
sizeDisplay: '1.00 GB',
estimates: {
'8192': {
vramBytes: 2 * 1024 * 1024 * 1024,
vramDisplay: '2.00 GB',
},
},
},
'stablediffusion-model': {
sizeBytes: 8 * 1024 * 1024 * 1024,
sizeDisplay: '8.00 GB',
estimates: {
'8192': {
vramBytes: 16 * 1024 * 1024 * 1024,
vramDisplay: '16.00 GB',
},
},
},
}
test.describe('Models Gallery - Backend Features', () => {
test.beforeEach(async ({ page }) => {
await page.route('**/api/models*', (route) => {
@@ -196,3 +252,54 @@ test.describe('Models Gallery - Multi-select Filters', () => {
await expect(ttsBtn).not.toHaveClass(/active/)
})
})
test.describe('Models Gallery - Fits In GPU Filter', () => {
test.beforeEach(async ({ page }) => {
await page.route('**/api/models*', (route) => {
route.fulfill({
contentType: 'application/json',
body: JSON.stringify(MOCK_MODELS_RESPONSE),
})
})
await page.route('**/api/resources', (route) => {
route.fulfill({
contentType: 'application/json',
body: JSON.stringify(MOCK_GPU_RESOURCES_RESPONSE),
})
})
await page.route('**/api/models/estimate/*', (route) => {
const url = new URL(route.request().url())
const id = decodeURIComponent(url.pathname.split('/').pop() || '')
route.fulfill({
contentType: 'application/json',
body: JSON.stringify(MOCK_ESTIMATES[id] || {}),
})
})
await page.goto('/app/models')
await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
})
test('fits checkbox is visible when GPU resources are available', async ({ page }) => {
await expect(page.getByText('Fits in my GPU')).toBeVisible()
})
test('enabling fits filter hides models that exceed available VRAM', async ({ page }) => {
await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toBeVisible()
await page.getByLabel('Fits in my GPU').check()
await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toHaveCount(0)
await expect(page.locator('tr', { hasText: 'llama-model' })).toBeVisible()
// Unknown estimate stays visible until an explicit non-fit verdict exists.
await expect(page.locator('tr', { hasText: 'unknown-model' })).toBeVisible()
})
test('fits filter state persists after reload', async ({ page }) => {
await page.getByLabel('Fits in my GPU').check()
await page.reload()
await expect(page.getByLabel('Fits in my GPU')).toBeChecked()
})
})

View File

@@ -23,6 +23,7 @@
"diarization": "Diarisierung",
"embedding": "Embedding",
"rerank": "Rerank",
"fitsGpu": "Passt in meine GPU",
"allBackends": "Alle Backends",
"searchBackends": "Backends suchen..."
},

View File

@@ -29,6 +29,7 @@
"rerank": "Rerank",
"detection": "Detection",
"vad": "VAD",
"fitsGpu": "Fits in my GPU",
"allBackends": "All Backends",
"searchBackends": "Search backends..."
},

View File

@@ -23,6 +23,7 @@
"diarization": "Diarización",
"embedding": "Embedding",
"rerank": "Rerank",
"fitsGpu": "Cabe en mi GPU",
"allBackends": "Todos los backends",
"searchBackends": "Buscar backends..."
},

View File

@@ -23,6 +23,7 @@
"diarization": "Diarizzazione",
"embedding": "Embedding",
"rerank": "Rerank",
"fitsGpu": "Entra nella mia GPU",
"allBackends": "Tutti i backend",
"searchBackends": "Cerca backend..."
},

View File

@@ -23,6 +23,7 @@
"diarization": "说话人分离",
"embedding": "嵌入",
"rerank": "重排",
"fitsGpu": "适合我的 GPU",
"allBackends": "所有后端",
"searchBackends": "搜索后端..."
},

View File

@@ -14,6 +14,7 @@ import React from 'react'
const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
const FITS_FILTER_STORAGE_KEY = 'localai-models-fits-filter'
const FILTERS = [
@@ -59,6 +60,13 @@ export default function Models() {
const [estimates, setEstimates] = useState({})
const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
const [confirmDialog, setConfirmDialog] = useState(null)
const [fitsFilter, setFitsFilter] = useState(() => {
try {
return localStorage.getItem(FITS_FILTER_STORAGE_KEY) === '1'
} catch {
return false
}
})
// Total GPU memory for "fits" check
const totalGpuMemory = resources?.aggregate?.total_memory || 0
@@ -240,6 +248,23 @@ export default function Models() {
return vramBytes <= totalGpuMemory * 0.95
}
useEffect(() => {
try {
localStorage.setItem(FITS_FILTER_STORAGE_KEY, fitsFilter ? '1' : '0')
} catch {
// Ignore storage errors (e.g., private browsing restrictions).
}
}, [fitsFilter])
const visibleModels = models.filter((model) => {
if (!fitsFilter) return true
const name = model.name || model.id
const vramBytes = estimates[name]?.estimates?.[String(contextSize)]?.vramBytes
const fit = fitsGpu(vramBytes)
// Keep models visible while estimate is still loading; hide only explicit non-fits.
return fit !== false
})
return (
<div className="page page--wide">
<div className="page-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start' }}>
@@ -330,22 +355,32 @@ export default function Models() {
<span style={{ fontWeight: 600, minWidth: '3em' }}>
{CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
</span>
{totalGpuMemory > 0 && (
<label style={{ marginLeft: 'auto', display: 'inline-flex', alignItems: 'center', gap: 'var(--spacing-xs)', color: 'var(--color-text-secondary)', cursor: 'pointer' }}>
<input
type="checkbox"
checked={fitsFilter}
onChange={(e) => setFitsFilter(e.target.checked)}
/>
<span>{t('filters.fitsGpu')}</span>
</label>
)}
</div>
{/* Table */}
{loading ? (
<GalleryLoader />
) : models.length === 0 ? (
) : visibleModels.length === 0 ? (
<div className="empty-state">
<div className="empty-state-icon"><i className="fas fa-search" /></div>
<h2 className="empty-state-title">{t('empty.title')}</h2>
<p className="empty-state-text">
{search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
{search || filters.length > 0 || backendFilter || fitsFilter ? t('empty.withFilters') : t('empty.noFilters')}
</p>
{(search || filters.length > 0 || backendFilter) && (
{(search || filters.length > 0 || backendFilter || fitsFilter) && (
<button
className="btn btn-secondary btn-sm"
onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setFitsFilter(false); setPage(1) }}
>
<i className="fas fa-times" /> {t('search.clearFilters')}
</button>
@@ -372,7 +407,7 @@ export default function Models() {
</tr>
</thead>
<tbody>
{models.map((model, idx) => {
{visibleModels.map((model, idx) => {
const name = model.name || model.id
const estData = estimates[name]
const sizeDisplay = estData?.sizeDisplay