feat(ui): data-driven hardware model recommendations + gallery surfacing (#10500)

* feat(ui): make hardware starter models data-driven The empty-state starter widget recommended from a hardcoded list, which drifts as the gallery evolves. Add useRecommendedModels: it queries the live gallery for chat-capable models (their natural curated order, since the gallery exposes no popularity signal), estimates size/VRAM for the top candidates via the existing estimate endpoint, and ranks by hardware fit - smallest on CPU-only boxes, largest-that-fits on GPUs. StarterModels now renders those live picks and keeps the curated static list only as an offline/trimmed-gallery fallback. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): recommend models for your hardware in the gallery Hardware-aware recommendations were only shown on the first-run empty state. Surface them on the main Models gallery too: a dismissible "Recommended for your hardware" strip at the top, sharing the useRecommendedModels fit-ranking with the starter widget. CPU-only boxes get small models; GPUs get the largest picks that fit VRAM, with size and VRAM shown per card. One-click install; dismissal persists per browser. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): gpu-mid tier + NVIDIA NVFP4 model recommendations Refine the hardware recommendation tiers and curated picks: - Add a gpu-mid tier (8-24GB VRAM) between gpu-small and gpu-large, so ~27B-class models are suggested separately from the 30B+ large tier. - Detect NVIDIA GPUs (resources.gpus[].vendor) and, on NVIDIA only, prefer NVFP4 + MTP variants (Blackwell-optimised); NVFP4 models are filtered out of recommendations on non-NVIDIA hardware where they can't run. This applies to both the live ranking and the static fallback, with an NVFP4 badge shown on those picks. - Refresh the curated fallback to current models: Gemma-4 QAT Q4 builds at every tier, low qwen3.5 (4B distilled / 9B) on CPU/small, qwen3.6-27b and MTP variants at mid, qwen3.6/qwen3.5 35B-A3B apex/distilled at large. All names verified against gallery/index.yaml. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-25 00:59:28 -04:00 · 2026-06-25 00:22:45 +02:00
parent 0d6de15ae9
commit f88981cdce
7 changed files with 344 additions and 65 deletions
--- a/core/http/react-ui/public/locales/en/home.json
+++ b/core/http/react-ui/public/locales/en/home.json
@@ -82,6 +82,7 @@
    "tier": {
      "cpu": "CPU-only",
      "gpu-small": "GPU",
+      "gpu-mid": "GPU",
      "gpu-large": "GPU"
    },
    "cpuNote": "No GPU detected — these small models stay responsive on CPU.",
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -2,6 +2,16 @@
  "title": "Install Models",
  "subtitle": "Browse and install AI models from the gallery",
  "models": "Models",
+  "recommended": {
+    "title": "Recommended for your hardware",
+    "cpuNote": "No GPU detected - small models that stay responsive on CPU.",
+    "gpuNote": "Sized to fit your available VRAM with room for context.",
+    "install": "Install",
+    "installing": "Installing",
+    "installStarted": "Installing {{model}}…",
+    "installFailed": "Install failed: {{message}}",
+    "dismiss": "Dismiss recommendations"
+  },
  "stats": {
    "available": "Available",
    "installed": "Installed"
--- a/core/http/react-ui/src/App.css
+++ b/core/http/react-ui/src/App.css
@@ -6409,6 +6409,9 @@ select.input {
  font-size: 0.875rem;
  word-break: break-all;
 }
+.home-starters-badge {
+  font-size: 0.625rem;
+}
 .home-starters-size {
  margin-left: auto;
  font-size: 0.75rem;
@@ -6416,6 +6419,74 @@ select.input {
  white-space: nowrap;
 }

+/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
+
+.rec-models {
+  margin-bottom: var(--spacing-md);
+  padding: var(--spacing-md) var(--spacing-lg);
+}
+.rec-models-head {
+  display: flex;
+  align-items: flex-start;
+  justify-content: space-between;
+  gap: var(--spacing-md);
+}
+.rec-models-title {
+  display: flex;
+  align-items: center;
+  gap: var(--spacing-sm);
+  flex-wrap: wrap;
+}
+.rec-models-title i {
+  color: var(--color-primary);
+}
+.rec-models-note {
+  font-size: 0.8125rem;
+  color: var(--color-text-secondary);
+}
+.rec-models-dismiss {
+  background: none;
+  border: none;
+  color: var(--color-text-muted);
+  cursor: pointer;
+  padding: 4px;
+  flex-shrink: 0;
+}
+.rec-models-dismiss:hover {
+  color: var(--color-text-primary);
+}
+.rec-models-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+  gap: var(--spacing-sm);
+  margin-top: var(--spacing-md);
+}
+.rec-models-item {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-xs);
+  padding: var(--spacing-sm) var(--spacing-md);
+  border: 1px solid var(--color-border-subtle);
+  border-radius: var(--radius-md);
+  background: var(--color-bg-primary);
+}
+.rec-models-item-name {
+  font-weight: 500;
+  font-size: 0.8125rem;
+  word-break: break-all;
+}
+.rec-models-item-meta {
+  display: flex;
+  gap: var(--spacing-sm);
+  font-size: 0.75rem;
+  color: var(--color-text-muted);
+}
+.rec-models-item-fit {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+}
+
 /* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */

 .home-connect {
--- a/core/http/react-ui/src/components/RecommendedModels.jsx
+++ b/core/http/react-ui/src/components/RecommendedModels.jsx
@@ -0,0 +1,86 @@
+import { useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { modelsApi } from '../utils/api'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
+
+const DISMISS_KEY = 'localai_rec_models_dismissed'
+
+// "Recommended for your hardware" strip at the top of the Models gallery. Shares
+// the hardware-fit ranking with the empty-state starter widget via
+// useRecommendedModels, but styled for the gallery page and dismissible (the
+// gallery is a repeat-visit surface, so it shouldn't nag).
+export default function RecommendedModels({ addToast }) {
+  const { t } = useTranslation('models')
+  const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
+  const [installing, setInstalling] = useState(() => new Set())
+  const [dismissed, setDismissed] = useState(() => {
+    try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
+  })
+
+  if (loading || dismissed) return null
+  if (!recommended || recommended.length === 0) return null
+
+  const dismiss = () => {
+    try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
+    setDismissed(true)
+  }
+
+  const install = async (name) => {
+    setInstalling(prev => new Set(prev).add(name))
+    try {
+      await modelsApi.install(name)
+      addToast?.(t('recommended.installStarted', { model: name }), 'success')
+    } catch (err) {
+      addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
+      setInstalling(prev => {
+        const next = new Set(prev)
+        next.delete(name)
+        return next
+      })
+    }
+  }
+
+  const isGpu = tier.id !== 'cpu'
+
+  return (
+    <div className="rec-models card">
+      <div className="rec-models-head">
+        <div className="rec-models-title">
+          <i className={`fas ${isGpu ? 'fa-microchip' : 'fa-memory'}`} aria-hidden="true" />
+          <strong>{t('recommended.title')}</strong>
+          <span className="rec-models-note">{isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}</span>
+        </div>
+        <button type="button" className="rec-models-dismiss" onClick={dismiss} aria-label={t('recommended.dismiss')} title={t('recommended.dismiss')}>
+          <i className="fas fa-times" aria-hidden="true" />
+        </button>
+      </div>
+      <div className="rec-models-grid">
+        {recommended.map(m => {
+          const busy = installing.has(m.name)
+          return (
+            <div key={m.name} className="rec-models-item">
+              <div className="rec-models-item-name">{m.name}</div>
+              <div className="rec-models-item-meta">
+                {isNvfp4Name(m.name) && <span className="badge badge-info">NVFP4</span>}
+                {m.sizeDisplay && <span>{m.sizeDisplay}</span>}
+                {isGpu && m.vramDisplay && (
+                  <span className="rec-models-item-fit"><i className="fas fa-microchip" aria-hidden="true" /> {m.vramDisplay}</span>
+                )}
+              </div>
+              <button
+                type="button"
+                className="btn btn-primary btn-sm"
+                disabled={busy}
+                onClick={() => install(m.name)}
+              >
+                {busy
+                  ? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('recommended.installing')}</>)
+                  : (<><i className="fas fa-download" aria-hidden="true" /> {t('recommended.install')}</>)}
+              </button>
+            </div>
+          )
+        })}
+      </div>
+    </div>
+  )
+}
--- a/core/http/react-ui/src/components/StarterModels.jsx
+++ b/core/http/react-ui/src/components/StarterModels.jsx
@@ -1,79 +1,78 @@
-import { useState, useEffect, useMemo } from 'react'
+import { useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { modelsApi } from '../utils/api'
-import { useResources } from '../hooks/useResources'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'

-// Curated, hardware-tiered starter models for the empty-state onboarding. Names
-// are real gallery entries (gallery/index.yaml); we intersect them against the
-// live gallery at render time so a custom/trimmed gallery degrades gracefully
-// (unmatched entries simply don't render).
-//
-// The guiding rule the maintainer asked for: CPU-only machines should be
-// steered to genuinely small models (1-4B, Q4) that stay responsive without a
-// GPU. GPU tiers scale the suggestion up with available VRAM.
-const SMALL = [
-  { name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' },
-  { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-  { name: 'qwen3-1.7b', size: '~1.4 GB' },
-  { name: 'gemma-3-1b-it', size: '~0.8 GB' },
-]
-const MID = [
-  { name: 'qwen3-4b', size: '~2.5 GB' },
-  { name: 'gemma-3-4b-it', size: '~3 GB' },
-  { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-]
-const LARGE = [
-  { name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' },
-  { name: 'qwen3-4b', size: '~2.5 GB' },
-  { name: 'mistral-7b-instruct-v0.3', size: '~4 GB' },
-]
+// Static fallback used only when the live gallery / estimates can't be reached
+// (offline, trimmed gallery). The hook is the primary, data-driven path; these
+// are real gallery names kept as a safety net so onboarding never shows nothing.
+// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
+// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
+const BASE = {
+  cpu: [
+    { name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
+    { name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
+    { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+    { name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
+  ],
+  'gpu-small': [
+    { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+    { name: 'lfm2.5-8b-a1b', size: '~5 GB' },
+    { name: 'qwen3.5-9b', size: '~5.5 GB' },
+    { name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
+  ],
+  'gpu-mid': [
+    { name: 'qwen3.6-27b', size: '~16 GB' },
+    { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+    { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+    { name: 'qwen3.5-27b', size: '~16 GB' },
+  ],
+  'gpu-large': [
+    { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+    { name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
+    { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+    { name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
+  ],
+}

-const GB = 1024 * 1024 * 1024
+// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
+// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
+const NVIDIA = {
+  'gpu-mid': [
+    { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+    { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+    { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+    { name: 'qwen3.6-27b', size: '~16 GB' },
+  ],
+  'gpu-large': [
+    { name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
+    { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+    { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+    { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+  ],
+}

-// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when
-// CPU-only). Thresholds are deliberately conservative so a suggestion that
-// "fits" really does.
-function pickTier(resources) {
-  const isGpu = resources?.type === 'gpu'
-  const vram = resources?.aggregate?.total_memory || 0
-  if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL }
-  if (vram < 8 * GB) return { id: 'gpu-small', list: MID }
-  return { id: 'gpu-large', list: LARGE }
+function fallbackFor(tierId, isNvidia) {
+  if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
+  return BASE[tierId] || BASE.cpu
 }

 export default function StarterModels({ addToast, onInstallStarted }) {
  const { t } = useTranslation('home')
-  const { resources } = useResources()
-  const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading
+  const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
  const [installing, setInstalling] = useState(() => new Set())

-  const tier = useMemo(() => pickTier(resources), [resources])
-  const candidates = tier.list
+  // While the hardware probe + gallery query are in flight, render nothing
+  // rather than flashing fallback content that may be replaced a moment later.
+  if (loading) return null

-  // Verify candidates exist in the live gallery. One search per name (the tier
-  // has at most a handful) keeps this resilient to gallery customization.
-  useEffect(() => {
-    let cancelled = false
-    const names = [...new Set(candidates.map(c => c.name))]
-    Promise.all(names.map(name =>
-      modelsApi.list({ search: name, page: 1 })
-        .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null)
-        .catch(() => null)
-    )).then(found => {
-      if (cancelled) return
-      const hits = found.filter(Boolean)
-      // If verification yielded nothing (e.g. gallery unreachable), fall back to
-      // showing the curated list rather than an empty widget.
-      setAvailable(hits.length > 0 ? new Set(hits) : null)
-    })
-    return () => { cancelled = true }
-  }, [candidates])
+  // Prefer live recommendations; fall back to the static list only when the
+  // gallery yielded nothing.
+  const items = (recommended && recommended.length > 0)
+    ? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
+    : fallbackFor(tier.id, isNvidia)

-  const visible = available === null
-    ? candidates
-    : candidates.filter(c => available.has(c.name))
-
-  if (visible.length === 0) return null
+  if (items.length === 0) return null

  const install = async (name) => {
    setInstalling(prev => new Set(prev).add(name))
@@ -104,12 +103,13 @@ export default function StarterModels({ addToast, onInstallStarted }) {
        {tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
      </p>
      <ul className="home-starters-list">
-        {visible.map(c => {
+        {items.map(c => {
          const busy = installing.has(c.name)
          return (
            <li key={c.name} className="home-starters-item">
              <span className="home-starters-name">{c.name}</span>
-              <span className="home-starters-size">{c.size}</span>
+              {isNvfp4Name(c.name) && <span className="badge badge-info home-starters-badge">NVFP4</span>}
+              {c.size && <span className="home-starters-size">{c.size}</span>}
              <button
                type="button"
                className="btn btn-primary btn-sm"
--- a/core/http/react-ui/src/hooks/useRecommendedModels.js
+++ b/core/http/react-ui/src/hooks/useRecommendedModels.js
@@ -0,0 +1,108 @@
+import { useState, useEffect } from 'react'
+import { modelsApi } from '../utils/api'
+import { useResources } from './useResources'
+
+// Data-driven "recommended for your hardware" model picks. The gallery exposes
+// no popularity/download signal and the list response carries no size, so we:
+//   1. ask the server for chat-capable models in their natural (curated) order,
+//   2. estimate size/VRAM for the top candidates (same endpoint the Models page
+//      uses), and
+//   3. rank by hardware fit — smallest on CPU-only boxes, largest-that-fits on
+//      GPUs (bigger == better quality while still fitting VRAM).
+//
+// Returns `recommended === null` while loading, `[]` when nothing could be
+// resolved (gallery/estimates unavailable) so callers can fall back.
+
+const GB = 1024 * 1024 * 1024
+const DEFAULT_CTX = 4096
+
+// NVFP4 is a Blackwell/NVIDIA-specific 4-bit format — only worth suggesting on
+// NVIDIA hardware, and to be filtered out elsewhere.
+export const isNvfp4Name = (name) => /nvfp4/i.test(name || '')
+
+export function hasNvidiaGpu(resources) {
+  return Array.isArray(resources?.gpus) &&
+    resources.gpus.some(g => (g?.vendor || '').toLowerCase() === 'nvidia')
+}
+
+export function recommendTier(resources) {
+  const isGpu = resources?.type === 'gpu'
+  const vram = resources?.aggregate?.total_memory || 0
+  if (!isGpu || vram <= 0) return { id: 'cpu', vram: 0 }
+  if (vram < 8 * GB) return { id: 'gpu-small', vram }
+  if (vram < 24 * GB) return { id: 'gpu-mid', vram }
+  return { id: 'gpu-large', vram }
+}
+
+function rank(candidates, tier, count, isNvidia) {
+  // NVFP4 only runs on NVIDIA (Blackwell) — drop it everywhere else, and prefer
+  // it on NVIDIA boxes where it's the fastest path.
+  const pool = candidates.filter(c => c.sizeBytes != null && (isNvidia || !isNvfp4Name(c.name)))
+  if (tier.id === 'cpu') {
+    // No GPU: smallest models stay responsive on CPU.
+    return [...pool].sort((a, b) => a.sizeBytes - b.sizeBytes).slice(0, count)
+  }
+  const limit = tier.vram * 0.95
+  const fits = pool.filter(c => c.vramBytes != null && c.vramBytes <= limit)
+  const base = fits.length > 0 ? fits : pool // tiny GPU where nothing fits → fall through to smallest
+  const byPreference = (a, b) => {
+    // On NVIDIA, surface NVFP4 first; then largest-that-fits (best quality).
+    if (isNvidia) {
+      const an = isNvfp4Name(a.name), bn = isNvfp4Name(b.name)
+      if (an !== bn) return an ? -1 : 1
+    }
+    return fits.length > 0 ? b.sizeBytes - a.sizeBytes : a.sizeBytes - b.sizeBytes
+  }
+  return [...base].sort(byPreference).slice(0, count)
+}
+
+export function useRecommendedModels({ count = 4, candidatePool = 10 } = {}) {
+  const { resources } = useResources()
+  const [recommended, setRecommended] = useState(null)
+  const [error, setError] = useState(null)
+
+  const resReady = resources !== null
+  const tier = recommendTier(resources)
+  const isNvidia = hasNvidiaGpu(resources)
+
+  useEffect(() => {
+    if (!resReady) return
+    let cancelled = false
+    setRecommended(null)
+    setError(null)
+    ;(async () => {
+      try {
+        const data = await modelsApi.list({ tag: 'chat', items: candidatePool, page: 1 })
+        // Recommend models the user hasn't installed yet.
+        const models = (data?.models || []).filter(m => !m.installed)
+        const estimated = await Promise.all(models.map(async (m) => {
+          const name = m.name || m.id
+          try {
+            const e = await modelsApi.estimate(name, [DEFAULT_CTX])
+            const ctx = e?.estimates?.[String(DEFAULT_CTX)]
+            return {
+              name,
+              description: m.description,
+              sizeBytes: e?.sizeBytes ?? null,
+              sizeDisplay: e?.sizeDisplay ?? null,
+              vramBytes: ctx?.vramBytes ?? null,
+              vramDisplay: ctx?.vramDisplay ?? null,
+            }
+          } catch {
+            return { name, sizeBytes: null }
+          }
+        }))
+        if (cancelled) return
+        setRecommended(rank(estimated, tier, count, isNvidia))
+      } catch (e) {
+        if (cancelled) return
+        setError(e.message)
+        setRecommended([])
+      }
+    })()
+    return () => { cancelled = true }
+    // tier.id / tier.vram / isNvidia are primitives, so resource polling doesn't re-run this.
+  }, [resReady, tier.id, tier.vram, isNvidia, count, candidatePool])
+
+  return { recommended, tier, isNvidia, error, loading: recommended === null }
+}
--- a/core/http/react-ui/src/pages/Models.jsx
+++ b/core/http/react-ui/src/pages/Models.jsx
@@ -13,6 +13,7 @@ import ConfirmDialog from '../components/ConfirmDialog'
 import GalleryLoader from '../components/GalleryLoader'
 import Toggle from '../components/Toggle'
 import ResponsiveTable from '../components/ResponsiveTable'
+import RecommendedModels from '../components/RecommendedModels'
 import React from 'react'


@@ -301,6 +302,8 @@ export default function Models() {
        }
      />

+      <RecommendedModels addToast={addToast} />
+
      {/* Search */}
      <div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}>
        <i className="fas fa-search search-icon" />