diff --git a/core/http/react-ui/e2e/models-gallery.spec.js b/core/http/react-ui/e2e/models-gallery.spec.js index b365c409c..5b3f4242f 100644 --- a/core/http/react-ui/e2e/models-gallery.spec.js +++ b/core/http/react-ui/e2e/models-gallery.spec.js @@ -15,6 +15,62 @@ const MOCK_MODELS_RESPONSE = { currentPage: 1, } +const MOCK_GPU_RESOURCES_RESPONSE = { + type: 'gpu', + available: true, + gpus: [ + { + index: 0, + name: 'Mock GPU', + vendor: 'nvidia', + total_vram: 12 * 1024 * 1024 * 1024, + used_vram: 2 * 1024 * 1024 * 1024, + free_vram: 10 * 1024 * 1024 * 1024, + usage_percent: 16.7, + }, + ], + aggregate: { + total_memory: 12 * 1024 * 1024 * 1024, + used_memory: 2 * 1024 * 1024 * 1024, + free_memory: 10 * 1024 * 1024 * 1024, + usage_percent: 16.7, + gpu_count: 1, + }, +} + +const MOCK_ESTIMATES = { + 'llama-model': { + sizeBytes: 4 * 1024 * 1024 * 1024, + sizeDisplay: '4.00 GB', + estimates: { + '8192': { + vramBytes: 8 * 1024 * 1024 * 1024, + vramDisplay: '8.00 GB', + }, + }, + }, + 'whisper-model': { + sizeBytes: 1 * 1024 * 1024 * 1024, + sizeDisplay: '1.00 GB', + estimates: { + '8192': { + vramBytes: 2 * 1024 * 1024 * 1024, + vramDisplay: '2.00 GB', + }, + }, + }, + 'stablediffusion-model': { + sizeBytes: 8 * 1024 * 1024 * 1024, + sizeDisplay: '8.00 GB', + estimates: { + '8192': { + vramBytes: 16 * 1024 * 1024 * 1024, + vramDisplay: '16.00 GB', + }, + }, + }, +} + test.describe('Models Gallery - Backend Features', () => { test.beforeEach(async ({ page }) => { await page.route('**/api/models*', (route) => { @@ -196,3 +252,54 @@ test.describe('Models Gallery - Multi-select Filters', () => { await expect(ttsBtn).not.toHaveClass(/active/) }) }) + +test.describe('Models Gallery - Fits In GPU Filter', () => { + test.beforeEach(async ({ page }) => { + await page.route('**/api/models*', (route) => { + route.fulfill({ + contentType: 'application/json', + body: JSON.stringify(MOCK_MODELS_RESPONSE), + }) + }) + + await page.route('**/api/resources', (route) => { + route.fulfill({ + contentType: 'application/json', + body: JSON.stringify(MOCK_GPU_RESOURCES_RESPONSE), + }) + }) + + await page.route('**/api/models/estimate/*', (route) => { + const url = new URL(route.request().url()) + const id = decodeURIComponent(url.pathname.split('/').pop() || '') + route.fulfill({ + contentType: 'application/json', + body: JSON.stringify(MOCK_ESTIMATES[id] || {}), + }) + }) + + await page.goto('/app/models') + await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 }) + }) + + test('fits checkbox is visible when GPU resources are available', async ({ page }) => { + await expect(page.getByText('Fits in my GPU')).toBeVisible() + }) + + test('enabling fits filter hides models that exceed available VRAM', async ({ page }) => { + await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toBeVisible() + + await page.getByLabel('Fits in my GPU').check() + + await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toHaveCount(0) + await expect(page.locator('tr', { hasText: 'llama-model' })).toBeVisible() + // Unknown estimate stays visible until an explicit non-fit verdict exists. + await expect(page.locator('tr', { hasText: 'unknown-model' })).toBeVisible() + }) + + test('fits filter state persists after reload', async ({ page }) => { + await page.getByLabel('Fits in my GPU').check() + await page.reload() + await expect(page.getByLabel('Fits in my GPU')).toBeChecked() + }) +}) diff --git a/core/http/react-ui/public/locales/de/models.json b/core/http/react-ui/public/locales/de/models.json index e125a8108..affe4ae1d 100644 --- a/core/http/react-ui/public/locales/de/models.json +++ b/core/http/react-ui/public/locales/de/models.json @@ -23,6 +23,7 @@ "diarization": "Diarisierung", "embedding": "Embedding", "rerank": "Rerank", + "fitsGpu": "Passt in meine GPU", "allBackends": "Alle Backends", "searchBackends": "Backends suchen..." }, diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json index b503dd187..0e3832ce8 100644 --- a/core/http/react-ui/public/locales/en/models.json +++ b/core/http/react-ui/public/locales/en/models.json @@ -29,6 +29,7 @@ "rerank": "Rerank", "detection": "Detection", "vad": "VAD", + "fitsGpu": "Fits in my GPU", "allBackends": "All Backends", "searchBackends": "Search backends..." }, diff --git a/core/http/react-ui/public/locales/es/models.json b/core/http/react-ui/public/locales/es/models.json index 21c521670..8a3f9399f 100644 --- a/core/http/react-ui/public/locales/es/models.json +++ b/core/http/react-ui/public/locales/es/models.json @@ -23,6 +23,7 @@ "diarization": "Diarización", "embedding": "Embedding", "rerank": "Rerank", + "fitsGpu": "Cabe en mi GPU", "allBackends": "Todos los backends", "searchBackends": "Buscar backends..." }, diff --git a/core/http/react-ui/public/locales/it/models.json b/core/http/react-ui/public/locales/it/models.json index 17e81695d..273e4cb62 100644 --- a/core/http/react-ui/public/locales/it/models.json +++ b/core/http/react-ui/public/locales/it/models.json @@ -23,6 +23,7 @@ "diarization": "Diarizzazione", "embedding": "Embedding", "rerank": "Rerank", + "fitsGpu": "Entra nella mia GPU", "allBackends": "Tutti i backend", "searchBackends": "Cerca backend..." }, diff --git a/core/http/react-ui/public/locales/zh-CN/models.json b/core/http/react-ui/public/locales/zh-CN/models.json index 3d8628dda..19025fd37 100644 --- a/core/http/react-ui/public/locales/zh-CN/models.json +++ b/core/http/react-ui/public/locales/zh-CN/models.json @@ -23,6 +23,7 @@ "diarization": "说话人分离", "embedding": "嵌入", "rerank": "重排", + "fitsGpu": "适合我的 GPU", "allBackends": "所有后端", "searchBackends": "搜索后端..." }, diff --git a/core/http/react-ui/src/pages/Models.jsx b/core/http/react-ui/src/pages/Models.jsx index 20c64b3ac..6ce87f804 100644 --- a/core/http/react-ui/src/pages/Models.jsx +++ b/core/http/react-ui/src/pages/Models.jsx @@ -14,6 +14,7 @@ import React from 'react' const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144] const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K'] +const FITS_FILTER_STORAGE_KEY = 'localai-models-fits-filter' const FILTERS = [ @@ -59,6 +60,13 @@ export default function Models() { const [estimates, setEstimates] = useState({}) const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0]) const [confirmDialog, setConfirmDialog] = useState(null) + const [fitsFilter, setFitsFilter] = useState(() => { + try { + return localStorage.getItem(FITS_FILTER_STORAGE_KEY) === '1' + } catch { + return false + } + }) // Total GPU memory for "fits" check const totalGpuMemory = resources?.aggregate?.total_memory || 0 @@ -240,6 +248,23 @@ export default function Models() { return vramBytes <= totalGpuMemory * 0.95 } + useEffect(() => { + try { + localStorage.setItem(FITS_FILTER_STORAGE_KEY, fitsFilter ? '1' : '0') + } catch { + // Ignore storage errors (e.g., private browsing restrictions). + } + }, [fitsFilter]) + + const visibleModels = models.filter((model) => { + if (!fitsFilter) return true + const name = model.name || model.id + const vramBytes = estimates[name]?.estimates?.[String(contextSize)]?.vramBytes + const fit = fitsGpu(vramBytes) + // Keep models visible while estimate is still loading; hide only explicit non-fits. + return fit !== false + }) + return (
@@ -330,22 +355,32 @@ export default function Models() { {CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]} + {totalGpuMemory > 0 && ( + + )}
{/* Table */} {loading ? ( - ) : models.length === 0 ? ( + ) : visibleModels.length === 0 ? (

{t('empty.title')}

- {search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')} + {search || filters.length > 0 || backendFilter || fitsFilter ? t('empty.withFilters') : t('empty.noFilters')}

- {(search || filters.length > 0 || backendFilter) && ( + {(search || filters.length > 0 || backendFilter || fitsFilter) && ( @@ -372,7 +407,7 @@ export default function Models() { - {models.map((model, idx) => { + {visibleModels.map((model, idx) => { const name = model.name || model.id const estData = estimates[name] const sizeDisplay = estData?.sizeDisplay