From f88981cdce96cd0056119e97500a4b8f31679d67 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Jun 2026 00:22:45 +0200 Subject: [PATCH 01/11] feat(ui): data-driven hardware model recommendations + gallery surfacing (#10500) * feat(ui): make hardware starter models data-driven The empty-state starter widget recommended from a hardcoded list, which drifts as the gallery evolves. Add useRecommendedModels: it queries the live gallery for chat-capable models (their natural curated order, since the gallery exposes no popularity signal), estimates size/VRAM for the top candidates via the existing estimate endpoint, and ranks by hardware fit - smallest on CPU-only boxes, largest-that-fits on GPUs. StarterModels now renders those live picks and keeps the curated static list only as an offline/trimmed-gallery fallback. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): recommend models for your hardware in the gallery Hardware-aware recommendations were only shown on the first-run empty state. Surface them on the main Models gallery too: a dismissible "Recommended for your hardware" strip at the top, sharing the useRecommendedModels fit-ranking with the starter widget. CPU-only boxes get small models; GPUs get the largest picks that fit VRAM, with size and VRAM shown per card. One-click install; dismissal persists per browser. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): gpu-mid tier + NVIDIA NVFP4 model recommendations Refine the hardware recommendation tiers and curated picks: - Add a gpu-mid tier (8-24GB VRAM) between gpu-small and gpu-large, so ~27B-class models are suggested separately from the 30B+ large tier. - Detect NVIDIA GPUs (resources.gpus[].vendor) and, on NVIDIA only, prefer NVFP4 + MTP variants (Blackwell-optimised); NVFP4 models are filtered out of recommendations on non-NVIDIA hardware where they can't run. This applies to both the live ranking and the static fallback, with an NVFP4 badge shown on those picks. - Refresh the curated fallback to current models: Gemma-4 QAT Q4 builds at every tier, low qwen3.5 (4B distilled / 9B) on CPU/small, qwen3.6-27b and MTP variants at mid, qwen3.6/qwen3.5 35B-A3B apex/distilled at large. All names verified against gallery/index.yaml. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .../http/react-ui/public/locales/en/home.json | 1 + .../react-ui/public/locales/en/models.json | 10 ++ core/http/react-ui/src/App.css | 71 ++++++++++ .../src/components/RecommendedModels.jsx | 86 ++++++++++++ .../react-ui/src/components/StarterModels.jsx | 130 +++++++++--------- .../src/hooks/useRecommendedModels.js | 108 +++++++++++++++ core/http/react-ui/src/pages/Models.jsx | 3 + 7 files changed, 344 insertions(+), 65 deletions(-) create mode 100644 core/http/react-ui/src/components/RecommendedModels.jsx create mode 100644 core/http/react-ui/src/hooks/useRecommendedModels.js diff --git a/core/http/react-ui/public/locales/en/home.json b/core/http/react-ui/public/locales/en/home.json index 142767999..35533a5a8 100644 --- a/core/http/react-ui/public/locales/en/home.json +++ b/core/http/react-ui/public/locales/en/home.json @@ -82,6 +82,7 @@ "tier": { "cpu": "CPU-only", "gpu-small": "GPU", + "gpu-mid": "GPU", "gpu-large": "GPU" }, "cpuNote": "No GPU detected — these small models stay responsive on CPU.", diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json index 2bf7b018d..bd23d389e 100644 --- a/core/http/react-ui/public/locales/en/models.json +++ b/core/http/react-ui/public/locales/en/models.json @@ -2,6 +2,16 @@ "title": "Install Models", "subtitle": "Browse and install AI models from the gallery", "models": "Models", + "recommended": { + "title": "Recommended for your hardware", + "cpuNote": "No GPU detected - small models that stay responsive on CPU.", + "gpuNote": "Sized to fit your available VRAM with room for context.", + "install": "Install", + "installing": "Installing", + "installStarted": "Installing {{model}}…", + "installFailed": "Install failed: {{message}}", + "dismiss": "Dismiss recommendations" + }, "stats": { "available": "Available", "installed": "Installed" diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css index 40eddc2e9..4578a3dd8 100644 --- a/core/http/react-ui/src/App.css +++ b/core/http/react-ui/src/App.css @@ -6409,6 +6409,9 @@ select.input { font-size: 0.875rem; word-break: break-all; } +.home-starters-badge { + font-size: 0.625rem; +} .home-starters-size { margin-left: auto; font-size: 0.75rem; @@ -6416,6 +6419,74 @@ select.input { white-space: nowrap; } +/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */ + +.rec-models { + margin-bottom: var(--spacing-md); + padding: var(--spacing-md) var(--spacing-lg); +} +.rec-models-head { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: var(--spacing-md); +} +.rec-models-title { + display: flex; + align-items: center; + gap: var(--spacing-sm); + flex-wrap: wrap; +} +.rec-models-title i { + color: var(--color-primary); +} +.rec-models-note { + font-size: 0.8125rem; + color: var(--color-text-secondary); +} +.rec-models-dismiss { + background: none; + border: none; + color: var(--color-text-muted); + cursor: pointer; + padding: 4px; + flex-shrink: 0; +} +.rec-models-dismiss:hover { + color: var(--color-text-primary); +} +.rec-models-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); + gap: var(--spacing-sm); + margin-top: var(--spacing-md); +} +.rec-models-item { + display: flex; + flex-direction: column; + gap: var(--spacing-xs); + padding: var(--spacing-sm) var(--spacing-md); + border: 1px solid var(--color-border-subtle); + border-radius: var(--radius-md); + background: var(--color-bg-primary); +} +.rec-models-item-name { + font-weight: 500; + font-size: 0.8125rem; + word-break: break-all; +} +.rec-models-item-meta { + display: flex; + gap: var(--spacing-sm); + font-size: 0.75rem; + color: var(--color-text-muted); +} +.rec-models-item-fit { + display: inline-flex; + align-items: center; + gap: 4px; +} + /* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */ .home-connect { diff --git a/core/http/react-ui/src/components/RecommendedModels.jsx b/core/http/react-ui/src/components/RecommendedModels.jsx new file mode 100644 index 000000000..7620406c8 --- /dev/null +++ b/core/http/react-ui/src/components/RecommendedModels.jsx @@ -0,0 +1,86 @@ +import { useState } from 'react' +import { useTranslation } from 'react-i18next' +import { modelsApi } from '../utils/api' +import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels' + +const DISMISS_KEY = 'localai_rec_models_dismissed' + +// "Recommended for your hardware" strip at the top of the Models gallery. Shares +// the hardware-fit ranking with the empty-state starter widget via +// useRecommendedModels, but styled for the gallery page and dismissible (the +// gallery is a repeat-visit surface, so it shouldn't nag). +export default function RecommendedModels({ addToast }) { + const { t } = useTranslation('models') + const { recommended, tier, loading } = useRecommendedModels({ count: 4 }) + const [installing, setInstalling] = useState(() => new Set()) + const [dismissed, setDismissed] = useState(() => { + try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false } + }) + + if (loading || dismissed) return null + if (!recommended || recommended.length === 0) return null + + const dismiss = () => { + try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ } + setDismissed(true) + } + + const install = async (name) => { + setInstalling(prev => new Set(prev).add(name)) + try { + await modelsApi.install(name) + addToast?.(t('recommended.installStarted', { model: name }), 'success') + } catch (err) { + addToast?.(t('recommended.installFailed', { message: err.message }), 'error') + setInstalling(prev => { + const next = new Set(prev) + next.delete(name) + return next + }) + } + } + + const isGpu = tier.id !== 'cpu' + + return ( + + + + + {t('recommended.title')} + {isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')} + + + + + + + {recommended.map(m => { + const busy = installing.has(m.name) + return ( + + {m.name} + + {isNvfp4Name(m.name) && NVFP4} + {m.sizeDisplay && {m.sizeDisplay}} + {isGpu && m.vramDisplay && ( + {m.vramDisplay} + )} + + install(m.name)} + > + {busy + ? (<> {t('recommended.installing')}>) + : (<> {t('recommended.install')}>)} + + + ) + })} + + + ) +} diff --git a/core/http/react-ui/src/components/StarterModels.jsx b/core/http/react-ui/src/components/StarterModels.jsx index 9273ae147..d5f8122b6 100644 --- a/core/http/react-ui/src/components/StarterModels.jsx +++ b/core/http/react-ui/src/components/StarterModels.jsx @@ -1,79 +1,78 @@ -import { useState, useEffect, useMemo } from 'react' +import { useState } from 'react' import { useTranslation } from 'react-i18next' import { modelsApi } from '../utils/api' -import { useResources } from '../hooks/useResources' +import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels' -// Curated, hardware-tiered starter models for the empty-state onboarding. Names -// are real gallery entries (gallery/index.yaml); we intersect them against the -// live gallery at render time so a custom/trimmed gallery degrades gracefully -// (unmatched entries simply don't render). -// -// The guiding rule the maintainer asked for: CPU-only machines should be -// steered to genuinely small models (1-4B, Q4) that stay responsive without a -// GPU. GPU tiers scale the suggestion up with available VRAM. -const SMALL = [ - { name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' }, - { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' }, - { name: 'qwen3-1.7b', size: '~1.4 GB' }, - { name: 'gemma-3-1b-it', size: '~0.8 GB' }, -] -const MID = [ - { name: 'qwen3-4b', size: '~2.5 GB' }, - { name: 'gemma-3-4b-it', size: '~3 GB' }, - { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' }, -] -const LARGE = [ - { name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' }, - { name: 'qwen3-4b', size: '~2.5 GB' }, - { name: 'mistral-7b-instruct-v0.3', size: '~4 GB' }, -] +// Static fallback used only when the live gallery / estimates can't be reached +// (offline, trimmed gallery). The hook is the primary, data-driven path; these +// are real gallery names kept as a safety net so onboarding never shows nothing. +// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes +// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below). +const BASE = { + cpu: [ + { name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' }, + { name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' }, + { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' }, + { name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' }, + ], + 'gpu-small': [ + { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' }, + { name: 'lfm2.5-8b-a1b', size: '~5 GB' }, + { name: 'qwen3.5-9b', size: '~5.5 GB' }, + { name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' }, + ], + 'gpu-mid': [ + { name: 'qwen3.6-27b', size: '~16 GB' }, + { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' }, + { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' }, + { name: 'qwen3.5-27b', size: '~16 GB' }, + ], + 'gpu-large': [ + { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' }, + { name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' }, + { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' }, + { name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' }, + ], +} -const GB = 1024 * 1024 * 1024 +// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with +// MTP (multi-token prediction) for speed. Only the mid/large tiers have these. +const NVIDIA = { + 'gpu-mid': [ + { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' }, + { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' }, + { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' }, + { name: 'qwen3.6-27b', size: '~16 GB' }, + ], + 'gpu-large': [ + { name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' }, + { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' }, + { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' }, + { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' }, + ], +} -// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when -// CPU-only). Thresholds are deliberately conservative so a suggestion that -// "fits" really does. -function pickTier(resources) { - const isGpu = resources?.type === 'gpu' - const vram = resources?.aggregate?.total_memory || 0 - if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL } - if (vram < 8 * GB) return { id: 'gpu-small', list: MID } - return { id: 'gpu-large', list: LARGE } +function fallbackFor(tierId, isNvidia) { + if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId] + return BASE[tierId] || BASE.cpu } export default function StarterModels({ addToast, onInstallStarted }) { const { t } = useTranslation('home') - const { resources } = useResources() - const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading + const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 }) const [installing, setInstalling] = useState(() => new Set()) - const tier = useMemo(() => pickTier(resources), [resources]) - const candidates = tier.list + // While the hardware probe + gallery query are in flight, render nothing + // rather than flashing fallback content that may be replaced a moment later. + if (loading) return null - // Verify candidates exist in the live gallery. One search per name (the tier - // has at most a handful) keeps this resilient to gallery customization. - useEffect(() => { - let cancelled = false - const names = [...new Set(candidates.map(c => c.name))] - Promise.all(names.map(name => - modelsApi.list({ search: name, page: 1 }) - .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null) - .catch(() => null) - )).then(found => { - if (cancelled) return - const hits = found.filter(Boolean) - // If verification yielded nothing (e.g. gallery unreachable), fall back to - // showing the curated list rather than an empty widget. - setAvailable(hits.length > 0 ? new Set(hits) : null) - }) - return () => { cancelled = true } - }, [candidates]) + // Prefer live recommendations; fall back to the static list only when the + // gallery yielded nothing. + const items = (recommended && recommended.length > 0) + ? recommended.map(r => ({ name: r.name, size: r.sizeDisplay })) + : fallbackFor(tier.id, isNvidia) - const visible = available === null - ? candidates - : candidates.filter(c => available.has(c.name)) - - if (visible.length === 0) return null + if (items.length === 0) return null const install = async (name) => { setInstalling(prev => new Set(prev).add(name)) @@ -104,12 +103,13 @@ export default function StarterModels({ addToast, onInstallStarted }) { {tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}