UI: add 'Fits in my GPU' filter on Install Models (#10017)

* feat(ui): add GPU fit filter on models install page * Delete docs/vram-fits-filter-backend-optionals.md Signed-off-by: Siddharth More <siddimore@gmail.com> --------- Signed-off-by: Siddharth More <siddimore@gmail.com>
2026-07-20 13:13:59 -04:00 · 2026-05-27 06:17:44 -07:00
parent 4d01298048
commit c9a1a7e6a0
7 changed files with 152 additions and 5 deletions
--- a/core/http/react-ui/e2e/models-gallery.spec.js
+++ b/core/http/react-ui/e2e/models-gallery.spec.js
@@ -15,6 +15,62 @@ const MOCK_MODELS_RESPONSE = {
  currentPage: 1,
 }

+const MOCK_GPU_RESOURCES_RESPONSE = {
+  type: 'gpu',
+  available: true,
+  gpus: [
+    {
+      index: 0,
+      name: 'Mock GPU',
+      vendor: 'nvidia',
+      total_vram: 12 * 1024 * 1024 * 1024,
+      used_vram: 2 * 1024 * 1024 * 1024,
+      free_vram: 10 * 1024 * 1024 * 1024,
+      usage_percent: 16.7,
+    },
+  ],
+  aggregate: {
+    total_memory: 12 * 1024 * 1024 * 1024,
+    used_memory: 2 * 1024 * 1024 * 1024,
+    free_memory: 10 * 1024 * 1024 * 1024,
+    usage_percent: 16.7,
+    gpu_count: 1,
+  },
+}
+
+const MOCK_ESTIMATES = {
+  'llama-model': {
+    sizeBytes: 4 * 1024 * 1024 * 1024,
+    sizeDisplay: '4.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 8 * 1024 * 1024 * 1024,
+        vramDisplay: '8.00 GB',
+      },
+    },
+  },
+  'whisper-model': {
+    sizeBytes: 1 * 1024 * 1024 * 1024,
+    sizeDisplay: '1.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 2 * 1024 * 1024 * 1024,
+        vramDisplay: '2.00 GB',
+      },
+    },
+  },
+  'stablediffusion-model': {
+    sizeBytes: 8 * 1024 * 1024 * 1024,
+    sizeDisplay: '8.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 16 * 1024 * 1024 * 1024,
+        vramDisplay: '16.00 GB',
+      },
+    },
+  },
+}
+
 test.describe('Models Gallery - Backend Features', () => {
  test.beforeEach(async ({ page }) => {
    await page.route('**/api/models*', (route) => {
@@ -196,3 +252,54 @@ test.describe('Models Gallery - Multi-select Filters', () => {
    await expect(ttsBtn).not.toHaveClass(/active/)
  })
 })
+
+test.describe('Models Gallery - Fits In GPU Filter', () => {
+  test.beforeEach(async ({ page }) => {
+    await page.route('**/api/models*', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_MODELS_RESPONSE),
+      })
+    })
+
+    await page.route('**/api/resources', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_GPU_RESOURCES_RESPONSE),
+      })
+    })
+
+    await page.route('**/api/models/estimate/*', (route) => {
+      const url = new URL(route.request().url())
+      const id = decodeURIComponent(url.pathname.split('/').pop() || '')
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_ESTIMATES[id] || {}),
+      })
+    })
+
+    await page.goto('/app/models')
+    await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
+  })
+
+  test('fits checkbox is visible when GPU resources are available', async ({ page }) => {
+    await expect(page.getByText('Fits in my GPU')).toBeVisible()
+  })
+
+  test('enabling fits filter hides models that exceed available VRAM', async ({ page }) => {
+    await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toBeVisible()
+
+    await page.getByLabel('Fits in my GPU').check()
+
+    await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toHaveCount(0)
+    await expect(page.locator('tr', { hasText: 'llama-model' })).toBeVisible()
+    // Unknown estimate stays visible until an explicit non-fit verdict exists.
+    await expect(page.locator('tr', { hasText: 'unknown-model' })).toBeVisible()
+  })
+
+  test('fits filter state persists after reload', async ({ page }) => {
+    await page.getByLabel('Fits in my GPU').check()
+    await page.reload()
+    await expect(page.getByLabel('Fits in my GPU')).toBeChecked()
+  })
+})
--- a/core/http/react-ui/public/locales/de/models.json
+++ b/core/http/react-ui/public/locales/de/models.json
@@ -23,6 +23,7 @@
    "diarization": "Diarisierung",
    "embedding": "Embedding",
    "rerank": "Rerank",
+    "fitsGpu": "Passt in meine GPU",
    "allBackends": "Alle Backends",
    "searchBackends": "Backends suchen..."
  },
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -29,6 +29,7 @@
    "rerank": "Rerank",
    "detection": "Detection",
    "vad": "VAD",
+    "fitsGpu": "Fits in my GPU",
    "allBackends": "All Backends",
    "searchBackends": "Search backends..."
  },
--- a/core/http/react-ui/public/locales/es/models.json
+++ b/core/http/react-ui/public/locales/es/models.json
@@ -23,6 +23,7 @@
    "diarization": "Diarización",
    "embedding": "Embedding",
    "rerank": "Rerank",
+    "fitsGpu": "Cabe en mi GPU",
    "allBackends": "Todos los backends",
    "searchBackends": "Buscar backends..."
  },
--- a/core/http/react-ui/public/locales/it/models.json
+++ b/core/http/react-ui/public/locales/it/models.json
@@ -23,6 +23,7 @@
    "diarization": "Diarizzazione",
    "embedding": "Embedding",
    "rerank": "Rerank",
+    "fitsGpu": "Entra nella mia GPU",
    "allBackends": "Tutti i backend",
    "searchBackends": "Cerca backend..."
  },
--- a/core/http/react-ui/public/locales/zh-CN/models.json
+++ b/core/http/react-ui/public/locales/zh-CN/models.json
@@ -23,6 +23,7 @@
    "diarization": "说话人分离",
    "embedding": "嵌入",
    "rerank": "重排",
+    "fitsGpu": "适合我的 GPU",
    "allBackends": "所有后端",
    "searchBackends": "搜索后端..."
  },
--- a/core/http/react-ui/src/pages/Models.jsx
+++ b/core/http/react-ui/src/pages/Models.jsx
@@ -14,6 +14,7 @@ import React from 'react'

 const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
 const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
+const FITS_FILTER_STORAGE_KEY = 'localai-models-fits-filter'


 const FILTERS = [
@@ -59,6 +60,13 @@ export default function Models() {
  const [estimates, setEstimates] = useState({})
  const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
  const [confirmDialog, setConfirmDialog] = useState(null)
+  const [fitsFilter, setFitsFilter] = useState(() => {
+    try {
+      return localStorage.getItem(FITS_FILTER_STORAGE_KEY) === '1'
+    } catch {
+      return false
+    }
+  })

  // Total GPU memory for "fits" check
  const totalGpuMemory = resources?.aggregate?.total_memory || 0
@@ -240,6 +248,23 @@ export default function Models() {
    return vramBytes <= totalGpuMemory * 0.95
  }

+  useEffect(() => {
+    try {
+      localStorage.setItem(FITS_FILTER_STORAGE_KEY, fitsFilter ? '1' : '0')
+    } catch {
+      // Ignore storage errors (e.g., private browsing restrictions).
+    }
+  }, [fitsFilter])
+
+  const visibleModels = models.filter((model) => {
+    if (!fitsFilter) return true
+    const name = model.name || model.id
+    const vramBytes = estimates[name]?.estimates?.[String(contextSize)]?.vramBytes
+    const fit = fitsGpu(vramBytes)
+    // Keep models visible while estimate is still loading; hide only explicit non-fits.
+    return fit !== false
+  })
+
  return (
    <div className="page page--wide">
      <div className="page-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start' }}>
@@ -330,22 +355,32 @@ export default function Models() {
        <span style={{ fontWeight: 600, minWidth: '3em' }}>
          {CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
        </span>
+        {totalGpuMemory > 0 && (
+          <label style={{ marginLeft: 'auto', display: 'inline-flex', alignItems: 'center', gap: 'var(--spacing-xs)', color: 'var(--color-text-secondary)', cursor: 'pointer' }}>
+            <input
+              type="checkbox"
+              checked={fitsFilter}
+              onChange={(e) => setFitsFilter(e.target.checked)}
+            />
+            <span>{t('filters.fitsGpu')}</span>
+          </label>
+        )}
      </div>

      {/* Table */}
      {loading ? (
        <GalleryLoader />
-      ) : models.length === 0 ? (
+      ) : visibleModels.length === 0 ? (
        <div className="empty-state">
          <div className="empty-state-icon"><i className="fas fa-search" /></div>
          <h2 className="empty-state-title">{t('empty.title')}</h2>
          <p className="empty-state-text">
-            {search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
+            {search || filters.length > 0 || backendFilter || fitsFilter ? t('empty.withFilters') : t('empty.noFilters')}
          </p>
-          {(search || filters.length > 0 || backendFilter) && (
+          {(search || filters.length > 0 || backendFilter || fitsFilter) && (
            <button
              className="btn btn-secondary btn-sm"
-              onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
+              onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setFitsFilter(false); setPage(1) }}
            >
              <i className="fas fa-times" /> {t('search.clearFilters')}
            </button>
@@ -372,7 +407,7 @@ export default function Models() {
                </tr>
              </thead>
              <tbody>
-                {models.map((model, idx) => {
+                {visibleModels.map((model, idx) => {
                  const name = model.name || model.id
                  const estData = estimates[name]
                  const sizeDisplay = estData?.sizeDisplay