From f88981cdce96cd0056119e97500a4b8f31679d67 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 00:22:45 +0200
Subject: [PATCH] feat(ui): data-driven hardware model recommendations +
gallery surfacing (#10500)
* feat(ui): make hardware starter models data-driven
The empty-state starter widget recommended from a hardcoded list, which
drifts as the gallery evolves. Add useRecommendedModels: it queries the
live gallery for chat-capable models (their natural curated order, since
the gallery exposes no popularity signal), estimates size/VRAM for the top
candidates via the existing estimate endpoint, and ranks by hardware fit -
smallest on CPU-only boxes, largest-that-fits on GPUs.
StarterModels now renders those live picks and keeps the curated static
list only as an offline/trimmed-gallery fallback.
Signed-off-by: Ettore Di Giacinto
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
* feat(ui): recommend models for your hardware in the gallery
Hardware-aware recommendations were only shown on the first-run empty
state. Surface them on the main Models gallery too: a dismissible
"Recommended for your hardware" strip at the top, sharing the
useRecommendedModels fit-ranking with the starter widget. CPU-only boxes
get small models; GPUs get the largest picks that fit VRAM, with size and
VRAM shown per card. One-click install; dismissal persists per browser.
Signed-off-by: Ettore Di Giacinto
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
* feat(ui): gpu-mid tier + NVIDIA NVFP4 model recommendations
Refine the hardware recommendation tiers and curated picks:
- Add a gpu-mid tier (8-24GB VRAM) between gpu-small and gpu-large, so
~27B-class models are suggested separately from the 30B+ large tier.
- Detect NVIDIA GPUs (resources.gpus[].vendor) and, on NVIDIA only, prefer
NVFP4 + MTP variants (Blackwell-optimised); NVFP4 models are filtered out
of recommendations on non-NVIDIA hardware where they can't run. This
applies to both the live ranking and the static fallback, with an NVFP4
badge shown on those picks.
- Refresh the curated fallback to current models: Gemma-4 QAT Q4 builds at
every tier, low qwen3.5 (4B distilled / 9B) on CPU/small, qwen3.6-27b
and MTP variants at mid, qwen3.6/qwen3.5 35B-A3B apex/distilled at large.
All names verified against gallery/index.yaml.
Signed-off-by: Ettore Di Giacinto
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
---------
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Ettore Di Giacinto
---
.../http/react-ui/public/locales/en/home.json | 1 +
.../react-ui/public/locales/en/models.json | 10 ++
core/http/react-ui/src/App.css | 71 ++++++++++
.../src/components/RecommendedModels.jsx | 86 ++++++++++++
.../react-ui/src/components/StarterModels.jsx | 130 +++++++++---------
.../src/hooks/useRecommendedModels.js | 108 +++++++++++++++
core/http/react-ui/src/pages/Models.jsx | 3 +
7 files changed, 344 insertions(+), 65 deletions(-)
create mode 100644 core/http/react-ui/src/components/RecommendedModels.jsx
create mode 100644 core/http/react-ui/src/hooks/useRecommendedModels.js
diff --git a/core/http/react-ui/public/locales/en/home.json b/core/http/react-ui/public/locales/en/home.json
index 142767999..35533a5a8 100644
--- a/core/http/react-ui/public/locales/en/home.json
+++ b/core/http/react-ui/public/locales/en/home.json
@@ -82,6 +82,7 @@
"tier": {
"cpu": "CPU-only",
"gpu-small": "GPU",
+ "gpu-mid": "GPU",
"gpu-large": "GPU"
},
"cpuNote": "No GPU detected — these small models stay responsive on CPU.",
diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json
index 2bf7b018d..bd23d389e 100644
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -2,6 +2,16 @@
"title": "Install Models",
"subtitle": "Browse and install AI models from the gallery",
"models": "Models",
+ "recommended": {
+ "title": "Recommended for your hardware",
+ "cpuNote": "No GPU detected - small models that stay responsive on CPU.",
+ "gpuNote": "Sized to fit your available VRAM with room for context.",
+ "install": "Install",
+ "installing": "Installing",
+ "installStarted": "Installing {{model}}…",
+ "installFailed": "Install failed: {{message}}",
+ "dismiss": "Dismiss recommendations"
+ },
"stats": {
"available": "Available",
"installed": "Installed"
diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css
index 40eddc2e9..4578a3dd8 100644
--- a/core/http/react-ui/src/App.css
+++ b/core/http/react-ui/src/App.css
@@ -6409,6 +6409,9 @@ select.input {
font-size: 0.875rem;
word-break: break-all;
}
+.home-starters-badge {
+ font-size: 0.625rem;
+}
.home-starters-size {
margin-left: auto;
font-size: 0.75rem;
@@ -6416,6 +6419,74 @@ select.input {
white-space: nowrap;
}
+/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
+
+.rec-models {
+ margin-bottom: var(--spacing-md);
+ padding: var(--spacing-md) var(--spacing-lg);
+}
+.rec-models-head {
+ display: flex;
+ align-items: flex-start;
+ justify-content: space-between;
+ gap: var(--spacing-md);
+}
+.rec-models-title {
+ display: flex;
+ align-items: center;
+ gap: var(--spacing-sm);
+ flex-wrap: wrap;
+}
+.rec-models-title i {
+ color: var(--color-primary);
+}
+.rec-models-note {
+ font-size: 0.8125rem;
+ color: var(--color-text-secondary);
+}
+.rec-models-dismiss {
+ background: none;
+ border: none;
+ color: var(--color-text-muted);
+ cursor: pointer;
+ padding: 4px;
+ flex-shrink: 0;
+}
+.rec-models-dismiss:hover {
+ color: var(--color-text-primary);
+}
+.rec-models-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+ gap: var(--spacing-sm);
+ margin-top: var(--spacing-md);
+}
+.rec-models-item {
+ display: flex;
+ flex-direction: column;
+ gap: var(--spacing-xs);
+ padding: var(--spacing-sm) var(--spacing-md);
+ border: 1px solid var(--color-border-subtle);
+ border-radius: var(--radius-md);
+ background: var(--color-bg-primary);
+}
+.rec-models-item-name {
+ font-weight: 500;
+ font-size: 0.8125rem;
+ word-break: break-all;
+}
+.rec-models-item-meta {
+ display: flex;
+ gap: var(--spacing-sm);
+ font-size: 0.75rem;
+ color: var(--color-text-muted);
+}
+.rec-models-item-fit {
+ display: inline-flex;
+ align-items: center;
+ gap: 4px;
+}
+
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
.home-connect {
diff --git a/core/http/react-ui/src/components/RecommendedModels.jsx b/core/http/react-ui/src/components/RecommendedModels.jsx
new file mode 100644
index 000000000..7620406c8
--- /dev/null
+++ b/core/http/react-ui/src/components/RecommendedModels.jsx
@@ -0,0 +1,86 @@
+import { useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { modelsApi } from '../utils/api'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
+
+const DISMISS_KEY = 'localai_rec_models_dismissed'
+
+// "Recommended for your hardware" strip at the top of the Models gallery. Shares
+// the hardware-fit ranking with the empty-state starter widget via
+// useRecommendedModels, but styled for the gallery page and dismissible (the
+// gallery is a repeat-visit surface, so it shouldn't nag).
+export default function RecommendedModels({ addToast }) {
+ const { t } = useTranslation('models')
+ const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
+ const [installing, setInstalling] = useState(() => new Set())
+ const [dismissed, setDismissed] = useState(() => {
+ try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
+ })
+
+ if (loading || dismissed) return null
+ if (!recommended || recommended.length === 0) return null
+
+ const dismiss = () => {
+ try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
+ setDismissed(true)
+ }
+
+ const install = async (name) => {
+ setInstalling(prev => new Set(prev).add(name))
+ try {
+ await modelsApi.install(name)
+ addToast?.(t('recommended.installStarted', { model: name }), 'success')
+ } catch (err) {
+ addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
+ setInstalling(prev => {
+ const next = new Set(prev)
+ next.delete(name)
+ return next
+ })
+ }
+ }
+
+ const isGpu = tier.id !== 'cpu'
+
+ return (
+
+
+
+
+ {t('recommended.title')}
+ {isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}
+
+
+
+
+ {recommended.map(m => {
+ const busy = installing.has(m.name)
+ return (
+
+
{m.name}
+
+ {isNvfp4Name(m.name) && NVFP4}
+ {m.sizeDisplay && {m.sizeDisplay}}
+ {isGpu && m.vramDisplay && (
+ {m.vramDisplay}
+ )}
+
+
+
+ )
+ })}
+
+
+ )
+}
diff --git a/core/http/react-ui/src/components/StarterModels.jsx b/core/http/react-ui/src/components/StarterModels.jsx
index 9273ae147..d5f8122b6 100644
--- a/core/http/react-ui/src/components/StarterModels.jsx
+++ b/core/http/react-ui/src/components/StarterModels.jsx
@@ -1,79 +1,78 @@
-import { useState, useEffect, useMemo } from 'react'
+import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { modelsApi } from '../utils/api'
-import { useResources } from '../hooks/useResources'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
-// Curated, hardware-tiered starter models for the empty-state onboarding. Names
-// are real gallery entries (gallery/index.yaml); we intersect them against the
-// live gallery at render time so a custom/trimmed gallery degrades gracefully
-// (unmatched entries simply don't render).
-//
-// The guiding rule the maintainer asked for: CPU-only machines should be
-// steered to genuinely small models (1-4B, Q4) that stay responsive without a
-// GPU. GPU tiers scale the suggestion up with available VRAM.
-const SMALL = [
- { name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' },
- { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
- { name: 'qwen3-1.7b', size: '~1.4 GB' },
- { name: 'gemma-3-1b-it', size: '~0.8 GB' },
-]
-const MID = [
- { name: 'qwen3-4b', size: '~2.5 GB' },
- { name: 'gemma-3-4b-it', size: '~3 GB' },
- { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-]
-const LARGE = [
- { name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' },
- { name: 'qwen3-4b', size: '~2.5 GB' },
- { name: 'mistral-7b-instruct-v0.3', size: '~4 GB' },
-]
+// Static fallback used only when the live gallery / estimates can't be reached
+// (offline, trimmed gallery). The hook is the primary, data-driven path; these
+// are real gallery names kept as a safety net so onboarding never shows nothing.
+// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
+// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
+const BASE = {
+ cpu: [
+ { name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
+ { name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
+ { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+ { name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
+ ],
+ 'gpu-small': [
+ { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+ { name: 'lfm2.5-8b-a1b', size: '~5 GB' },
+ { name: 'qwen3.5-9b', size: '~5.5 GB' },
+ { name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
+ ],
+ 'gpu-mid': [
+ { name: 'qwen3.6-27b', size: '~16 GB' },
+ { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+ { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+ { name: 'qwen3.5-27b', size: '~16 GB' },
+ ],
+ 'gpu-large': [
+ { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+ { name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
+ { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+ { name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
+ ],
+}
-const GB = 1024 * 1024 * 1024
+// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
+// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
+const NVIDIA = {
+ 'gpu-mid': [
+ { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+ { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+ { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+ { name: 'qwen3.6-27b', size: '~16 GB' },
+ ],
+ 'gpu-large': [
+ { name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
+ { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+ { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+ { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+ ],
+}
-// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when
-// CPU-only). Thresholds are deliberately conservative so a suggestion that
-// "fits" really does.
-function pickTier(resources) {
- const isGpu = resources?.type === 'gpu'
- const vram = resources?.aggregate?.total_memory || 0
- if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL }
- if (vram < 8 * GB) return { id: 'gpu-small', list: MID }
- return { id: 'gpu-large', list: LARGE }
+function fallbackFor(tierId, isNvidia) {
+ if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
+ return BASE[tierId] || BASE.cpu
}
export default function StarterModels({ addToast, onInstallStarted }) {
const { t } = useTranslation('home')
- const { resources } = useResources()
- const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading
+ const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
const [installing, setInstalling] = useState(() => new Set())
- const tier = useMemo(() => pickTier(resources), [resources])
- const candidates = tier.list
+ // While the hardware probe + gallery query are in flight, render nothing
+ // rather than flashing fallback content that may be replaced a moment later.
+ if (loading) return null
- // Verify candidates exist in the live gallery. One search per name (the tier
- // has at most a handful) keeps this resilient to gallery customization.
- useEffect(() => {
- let cancelled = false
- const names = [...new Set(candidates.map(c => c.name))]
- Promise.all(names.map(name =>
- modelsApi.list({ search: name, page: 1 })
- .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null)
- .catch(() => null)
- )).then(found => {
- if (cancelled) return
- const hits = found.filter(Boolean)
- // If verification yielded nothing (e.g. gallery unreachable), fall back to
- // showing the curated list rather than an empty widget.
- setAvailable(hits.length > 0 ? new Set(hits) : null)
- })
- return () => { cancelled = true }
- }, [candidates])
+ // Prefer live recommendations; fall back to the static list only when the
+ // gallery yielded nothing.
+ const items = (recommended && recommended.length > 0)
+ ? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
+ : fallbackFor(tier.id, isNvidia)
- const visible = available === null
- ? candidates
- : candidates.filter(c => available.has(c.name))
-
- if (visible.length === 0) return null
+ if (items.length === 0) return null
const install = async (name) => {
setInstalling(prev => new Set(prev).add(name))
@@ -104,12 +103,13 @@ export default function StarterModels({ addToast, onInstallStarted }) {
{tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
- {visible.map(c => {
+ {items.map(c => {
const busy = installing.has(c.name)
return (
-
{c.name}
- {c.size}
+ {isNvfp4Name(c.name) && NVFP4}
+ {c.size && {c.size}}