diff --git a/core/http/react-ui/e2e/models-gallery.spec.js b/core/http/react-ui/e2e/models-gallery.spec.js
index b365c409c..5b3f4242f 100644
--- a/core/http/react-ui/e2e/models-gallery.spec.js
+++ b/core/http/react-ui/e2e/models-gallery.spec.js
@@ -15,6 +15,62 @@ const MOCK_MODELS_RESPONSE = {
   currentPage: 1,
 }
 
+const MOCK_GPU_RESOURCES_RESPONSE = {
+  type: 'gpu',
+  available: true,
+  gpus: [
+    {
+      index: 0,
+      name: 'Mock GPU',
+      vendor: 'nvidia',
+      total_vram: 12 * 1024 * 1024 * 1024,
+      used_vram: 2 * 1024 * 1024 * 1024,
+      free_vram: 10 * 1024 * 1024 * 1024,
+      usage_percent: 16.7,
+    },
+  ],
+  aggregate: {
+    total_memory: 12 * 1024 * 1024 * 1024,
+    used_memory: 2 * 1024 * 1024 * 1024,
+    free_memory: 10 * 1024 * 1024 * 1024,
+    usage_percent: 16.7,
+    gpu_count: 1,
+  },
+}
+
+const MOCK_ESTIMATES = {
+  'llama-model': {
+    sizeBytes: 4 * 1024 * 1024 * 1024,
+    sizeDisplay: '4.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 8 * 1024 * 1024 * 1024,
+        vramDisplay: '8.00 GB',
+      },
+    },
+  },
+  'whisper-model': {
+    sizeBytes: 1 * 1024 * 1024 * 1024,
+    sizeDisplay: '1.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 2 * 1024 * 1024 * 1024,
+        vramDisplay: '2.00 GB',
+      },
+    },
+  },
+  'stablediffusion-model': {
+    sizeBytes: 8 * 1024 * 1024 * 1024,
+    sizeDisplay: '8.00 GB',
+    estimates: {
+      '8192': {
+        vramBytes: 16 * 1024 * 1024 * 1024,
+        vramDisplay: '16.00 GB',
+      },
+    },
+  },
+}
+
 test.describe('Models Gallery - Backend Features', () => {
   test.beforeEach(async ({ page }) => {
     await page.route('**/api/models*', (route) => {
@@ -196,3 +252,54 @@ test.describe('Models Gallery - Multi-select Filters', () => {
     await expect(ttsBtn).not.toHaveClass(/active/)
   })
 })
+
+test.describe('Models Gallery - Fits In GPU Filter', () => {
+  test.beforeEach(async ({ page }) => {
+    await page.route('**/api/models*', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_MODELS_RESPONSE),
+      })
+    })
+
+    await page.route('**/api/resources', (route) => {
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_GPU_RESOURCES_RESPONSE),
+      })
+    })
+
+    await page.route('**/api/models/estimate/*', (route) => {
+      const url = new URL(route.request().url())
+      const id = decodeURIComponent(url.pathname.split('/').pop() || '')
+      route.fulfill({
+        contentType: 'application/json',
+        body: JSON.stringify(MOCK_ESTIMATES[id] || {}),
+      })
+    })
+
+    await page.goto('/app/models')
+    await expect(page.locator('th', { hasText: 'Backend' })).toBeVisible({ timeout: 10_000 })
+  })
+
+  test('fits checkbox is visible when GPU resources are available', async ({ page }) => {
+    await expect(page.getByText('Fits in my GPU')).toBeVisible()
+  })
+
+  test('enabling fits filter hides models that exceed available VRAM', async ({ page }) => {
+    await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toBeVisible()
+
+    await page.getByLabel('Fits in my GPU').check()
+
+    await expect(page.locator('tr', { hasText: 'stablediffusion-model' })).toHaveCount(0)
+    await expect(page.locator('tr', { hasText: 'llama-model' })).toBeVisible()
+    // Unknown estimate stays visible until an explicit non-fit verdict exists.
+    await expect(page.locator('tr', { hasText: 'unknown-model' })).toBeVisible()
+  })
+
+  test('fits filter state persists after reload', async ({ page }) => {
+    await page.getByLabel('Fits in my GPU').check()
+    await page.reload()
+    await expect(page.getByLabel('Fits in my GPU')).toBeChecked()
+  })
+})
diff --git a/core/http/react-ui/public/locales/de/models.json b/core/http/react-ui/public/locales/de/models.json
index e125a8108..affe4ae1d 100644
--- a/core/http/react-ui/public/locales/de/models.json
+++ b/core/http/react-ui/public/locales/de/models.json
@@ -23,6 +23,7 @@
     "diarization": "Diarisierung",
     "embedding": "Embedding",
     "rerank": "Rerank",
+    "fitsGpu": "Passt in meine GPU",
     "allBackends": "Alle Backends",
     "searchBackends": "Backends suchen..."
   },
diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json
index b503dd187..0e3832ce8 100644
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -29,6 +29,7 @@
     "rerank": "Rerank",
     "detection": "Detection",
     "vad": "VAD",
+    "fitsGpu": "Fits in my GPU",
     "allBackends": "All Backends",
     "searchBackends": "Search backends..."
   },
diff --git a/core/http/react-ui/public/locales/es/models.json b/core/http/react-ui/public/locales/es/models.json
index 21c521670..8a3f9399f 100644
--- a/core/http/react-ui/public/locales/es/models.json
+++ b/core/http/react-ui/public/locales/es/models.json
@@ -23,6 +23,7 @@
     "diarization": "Diarización",
     "embedding": "Embedding",
     "rerank": "Rerank",
+    "fitsGpu": "Cabe en mi GPU",
     "allBackends": "Todos los backends",
     "searchBackends": "Buscar backends..."
   },
diff --git a/core/http/react-ui/public/locales/it/models.json b/core/http/react-ui/public/locales/it/models.json
index 17e81695d..273e4cb62 100644
--- a/core/http/react-ui/public/locales/it/models.json
+++ b/core/http/react-ui/public/locales/it/models.json
@@ -23,6 +23,7 @@
     "diarization": "Diarizzazione",
     "embedding": "Embedding",
     "rerank": "Rerank",
+    "fitsGpu": "Entra nella mia GPU",
     "allBackends": "Tutti i backend",
     "searchBackends": "Cerca backend..."
   },
diff --git a/core/http/react-ui/public/locales/zh-CN/models.json b/core/http/react-ui/public/locales/zh-CN/models.json
index 3d8628dda..19025fd37 100644
--- a/core/http/react-ui/public/locales/zh-CN/models.json
+++ b/core/http/react-ui/public/locales/zh-CN/models.json
@@ -23,6 +23,7 @@
     "diarization": "说话人分离",
     "embedding": "嵌入",
     "rerank": "重排",
+    "fitsGpu": "适合我的 GPU",
     "allBackends": "所有后端",
     "searchBackends": "搜索后端..."
   },
diff --git a/core/http/react-ui/src/pages/Models.jsx b/core/http/react-ui/src/pages/Models.jsx
index 20c64b3ac..6ce87f804 100644
--- a/core/http/react-ui/src/pages/Models.jsx
+++ b/core/http/react-ui/src/pages/Models.jsx
@@ -14,6 +14,7 @@ import React from 'react'
 
 const CONTEXT_SIZES = [8192, 16384, 32768, 65536, 131072, 262144]
 const CONTEXT_LABELS = ['8K', '16K', '32K', '64K', '128K', '256K']
+const FITS_FILTER_STORAGE_KEY = 'localai-models-fits-filter'
 
 
 const FILTERS = [
@@ -59,6 +60,13 @@ export default function Models() {
   const [estimates, setEstimates] = useState({})
   const [contextSize, setContextSize] = useState(CONTEXT_SIZES[0])
   const [confirmDialog, setConfirmDialog] = useState(null)
+  const [fitsFilter, setFitsFilter] = useState(() => {
+    try {
+      return localStorage.getItem(FITS_FILTER_STORAGE_KEY) === '1'
+    } catch {
+      return false
+    }
+  })
 
   // Total GPU memory for "fits" check
   const totalGpuMemory = resources?.aggregate?.total_memory || 0
@@ -240,6 +248,23 @@ export default function Models() {
     return vramBytes <= totalGpuMemory * 0.95
   }
 
+  useEffect(() => {
+    try {
+      localStorage.setItem(FITS_FILTER_STORAGE_KEY, fitsFilter ? '1' : '0')
+    } catch {
+      // Ignore storage errors (e.g., private browsing restrictions).
+    }
+  }, [fitsFilter])
+
+  const visibleModels = models.filter((model) => {
+    if (!fitsFilter) return true
+    const name = model.name || model.id
+    const vramBytes = estimates[name]?.estimates?.[String(contextSize)]?.vramBytes
+    const fit = fitsGpu(vramBytes)
+    // Keep models visible while estimate is still loading; hide only explicit non-fits.
+    return fit !== false
+  })
+
   return (
     <div className="page page--wide">
       <div className="page-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start' }}>
@@ -330,22 +355,32 @@ export default function Models() {
         <span style={{ fontWeight: 600, minWidth: '3em' }}>
           {CONTEXT_LABELS[CONTEXT_SIZES.indexOf(contextSize)]}
         </span>
+        {totalGpuMemory > 0 && (
+          <label style={{ marginLeft: 'auto', display: 'inline-flex', alignItems: 'center', gap: 'var(--spacing-xs)', color: 'var(--color-text-secondary)', cursor: 'pointer' }}>
+            <input
+              type="checkbox"
+              checked={fitsFilter}
+              onChange={(e) => setFitsFilter(e.target.checked)}
+            />
+            <span>{t('filters.fitsGpu')}</span>
+          </label>
+        )}
       </div>
 
       {/* Table */}
       {loading ? (
         <GalleryLoader />
-      ) : models.length === 0 ? (
+      ) : visibleModels.length === 0 ? (
         <div className="empty-state">
           <div className="empty-state-icon"><i className="fas fa-search" /></div>
           <h2 className="empty-state-title">{t('empty.title')}</h2>
           <p className="empty-state-text">
-            {search || filters.length > 0 || backendFilter ? t('empty.withFilters') : t('empty.noFilters')}
+            {search || filters.length > 0 || backendFilter || fitsFilter ? t('empty.withFilters') : t('empty.noFilters')}
           </p>
-          {(search || filters.length > 0 || backendFilter) && (
+          {(search || filters.length > 0 || backendFilter || fitsFilter) && (
             <button
               className="btn btn-secondary btn-sm"
-              onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setPage(1) }}
+              onClick={() => { handleSearch(''); setFilters([]); setBackendFilter(''); setFitsFilter(false); setPage(1) }}
             >
               <i className="fas fa-times" /> {t('search.clearFilters')}
             </button>
@@ -372,7 +407,7 @@ export default function Models() {
                 </tr>
               </thead>
               <tbody>
-                {models.map((model, idx) => {
+                {visibleModels.map((model, idx) => {
                   const name = model.name || model.id
                   const estData = estimates[name]
                   const sizeDisplay = estData?.sizeDisplay