From f88981cdce96cd0056119e97500a4b8f31679d67 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 00:22:45 +0200
Subject: [PATCH 01/11] feat(ui): data-driven hardware model recommendations +
 gallery surfacing (#10500)

* feat(ui): make hardware starter models data-driven

The empty-state starter widget recommended from a hardcoded list, which
drifts as the gallery evolves. Add useRecommendedModels: it queries the
live gallery for chat-capable models (their natural curated order, since
the gallery exposes no popularity signal), estimates size/VRAM for the top
candidates via the existing estimate endpoint, and ranks by hardware fit -
smallest on CPU-only boxes, largest-that-fits on GPUs.

StarterModels now renders those live picks and keeps the curated static
list only as an offline/trimmed-gallery fallback.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(ui): recommend models for your hardware in the gallery

Hardware-aware recommendations were only shown on the first-run empty
state. Surface them on the main Models gallery too: a dismissible
"Recommended for your hardware" strip at the top, sharing the
useRecommendedModels fit-ranking with the starter widget. CPU-only boxes
get small models; GPUs get the largest picks that fit VRAM, with size and
VRAM shown per card. One-click install; dismissal persists per browser.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(ui): gpu-mid tier + NVIDIA NVFP4 model recommendations

Refine the hardware recommendation tiers and curated picks:

- Add a gpu-mid tier (8-24GB VRAM) between gpu-small and gpu-large, so
  ~27B-class models are suggested separately from the 30B+ large tier.
- Detect NVIDIA GPUs (resources.gpus[].vendor) and, on NVIDIA only, prefer
  NVFP4 + MTP variants (Blackwell-optimised); NVFP4 models are filtered out
  of recommendations on non-NVIDIA hardware where they can't run. This
  applies to both the live ranking and the static fallback, with an NVFP4
  badge shown on those picks.
- Refresh the curated fallback to current models: Gemma-4 QAT Q4 builds at
  every tier, low qwen3.5 (4B distilled / 9B) on CPU/small, qwen3.6-27b
  and MTP variants at mid, qwen3.6/qwen3.5 35B-A3B apex/distilled at large.
  All names verified against gallery/index.yaml.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../http/react-ui/public/locales/en/home.json |   1 +
 .../react-ui/public/locales/en/models.json    |  10 ++
 core/http/react-ui/src/App.css                |  71 ++++++++++
 .../src/components/RecommendedModels.jsx      |  86 ++++++++++++
 .../react-ui/src/components/StarterModels.jsx | 130 +++++++++---------
 .../src/hooks/useRecommendedModels.js         | 108 +++++++++++++++
 core/http/react-ui/src/pages/Models.jsx       |   3 +
 7 files changed, 344 insertions(+), 65 deletions(-)
 create mode 100644 core/http/react-ui/src/components/RecommendedModels.jsx
 create mode 100644 core/http/react-ui/src/hooks/useRecommendedModels.js

diff --git a/core/http/react-ui/public/locales/en/home.json b/core/http/react-ui/public/locales/en/home.json
index 142767999..35533a5a8 100644
--- a/core/http/react-ui/public/locales/en/home.json
+++ b/core/http/react-ui/public/locales/en/home.json
@@ -82,6 +82,7 @@
     "tier": {
       "cpu": "CPU-only",
       "gpu-small": "GPU",
+      "gpu-mid": "GPU",
       "gpu-large": "GPU"
     },
     "cpuNote": "No GPU detected — these small models stay responsive on CPU.",
diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json
index 2bf7b018d..bd23d389e 100644
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -2,6 +2,16 @@
   "title": "Install Models",
   "subtitle": "Browse and install AI models from the gallery",
   "models": "Models",
+  "recommended": {
+    "title": "Recommended for your hardware",
+    "cpuNote": "No GPU detected - small models that stay responsive on CPU.",
+    "gpuNote": "Sized to fit your available VRAM with room for context.",
+    "install": "Install",
+    "installing": "Installing",
+    "installStarted": "Installing {{model}}…",
+    "installFailed": "Install failed: {{message}}",
+    "dismiss": "Dismiss recommendations"
+  },
   "stats": {
     "available": "Available",
     "installed": "Installed"
diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css
index 40eddc2e9..4578a3dd8 100644
--- a/core/http/react-ui/src/App.css
+++ b/core/http/react-ui/src/App.css
@@ -6409,6 +6409,9 @@ select.input {
   font-size: 0.875rem;
   word-break: break-all;
 }
+.home-starters-badge {
+  font-size: 0.625rem;
+}
 .home-starters-size {
   margin-left: auto;
   font-size: 0.75rem;
@@ -6416,6 +6419,74 @@ select.input {
   white-space: nowrap;
 }
 
+/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
+
+.rec-models {
+  margin-bottom: var(--spacing-md);
+  padding: var(--spacing-md) var(--spacing-lg);
+}
+.rec-models-head {
+  display: flex;
+  align-items: flex-start;
+  justify-content: space-between;
+  gap: var(--spacing-md);
+}
+.rec-models-title {
+  display: flex;
+  align-items: center;
+  gap: var(--spacing-sm);
+  flex-wrap: wrap;
+}
+.rec-models-title i {
+  color: var(--color-primary);
+}
+.rec-models-note {
+  font-size: 0.8125rem;
+  color: var(--color-text-secondary);
+}
+.rec-models-dismiss {
+  background: none;
+  border: none;
+  color: var(--color-text-muted);
+  cursor: pointer;
+  padding: 4px;
+  flex-shrink: 0;
+}
+.rec-models-dismiss:hover {
+  color: var(--color-text-primary);
+}
+.rec-models-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+  gap: var(--spacing-sm);
+  margin-top: var(--spacing-md);
+}
+.rec-models-item {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-xs);
+  padding: var(--spacing-sm) var(--spacing-md);
+  border: 1px solid var(--color-border-subtle);
+  border-radius: var(--radius-md);
+  background: var(--color-bg-primary);
+}
+.rec-models-item-name {
+  font-weight: 500;
+  font-size: 0.8125rem;
+  word-break: break-all;
+}
+.rec-models-item-meta {
+  display: flex;
+  gap: var(--spacing-sm);
+  font-size: 0.75rem;
+  color: var(--color-text-muted);
+}
+.rec-models-item-fit {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+}
+
 /* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
 
 .home-connect {
diff --git a/core/http/react-ui/src/components/RecommendedModels.jsx b/core/http/react-ui/src/components/RecommendedModels.jsx
new file mode 100644
index 000000000..7620406c8
--- /dev/null
+++ b/core/http/react-ui/src/components/RecommendedModels.jsx
@@ -0,0 +1,86 @@
+import { useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { modelsApi } from '../utils/api'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
+
+const DISMISS_KEY = 'localai_rec_models_dismissed'
+
+// "Recommended for your hardware" strip at the top of the Models gallery. Shares
+// the hardware-fit ranking with the empty-state starter widget via
+// useRecommendedModels, but styled for the gallery page and dismissible (the
+// gallery is a repeat-visit surface, so it shouldn't nag).
+export default function RecommendedModels({ addToast }) {
+  const { t } = useTranslation('models')
+  const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
+  const [installing, setInstalling] = useState(() => new Set())
+  const [dismissed, setDismissed] = useState(() => {
+    try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
+  })
+
+  if (loading || dismissed) return null
+  if (!recommended || recommended.length === 0) return null
+
+  const dismiss = () => {
+    try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
+    setDismissed(true)
+  }
+
+  const install = async (name) => {
+    setInstalling(prev => new Set(prev).add(name))
+    try {
+      await modelsApi.install(name)
+      addToast?.(t('recommended.installStarted', { model: name }), 'success')
+    } catch (err) {
+      addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
+      setInstalling(prev => {
+        const next = new Set(prev)
+        next.delete(name)
+        return next
+      })
+    }
+  }
+
+  const isGpu = tier.id !== 'cpu'
+
+  return (
+    <div className="rec-models card">
+      <div className="rec-models-head">
+        <div className="rec-models-title">
+          <i className={`fas ${isGpu ? 'fa-microchip' : 'fa-memory'}`} aria-hidden="true" />
+          <strong>{t('recommended.title')}</strong>
+          <span className="rec-models-note">{isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}</span>
+        </div>
+        <button type="button" className="rec-models-dismiss" onClick={dismiss} aria-label={t('recommended.dismiss')} title={t('recommended.dismiss')}>
+          <i className="fas fa-times" aria-hidden="true" />
+        </button>
+      </div>
+      <div className="rec-models-grid">
+        {recommended.map(m => {
+          const busy = installing.has(m.name)
+          return (
+            <div key={m.name} className="rec-models-item">
+              <div className="rec-models-item-name">{m.name}</div>
+              <div className="rec-models-item-meta">
+                {isNvfp4Name(m.name) && <span className="badge badge-info">NVFP4</span>}
+                {m.sizeDisplay && <span>{m.sizeDisplay}</span>}
+                {isGpu && m.vramDisplay && (
+                  <span className="rec-models-item-fit"><i className="fas fa-microchip" aria-hidden="true" /> {m.vramDisplay}</span>
+                )}
+              </div>
+              <button
+                type="button"
+                className="btn btn-primary btn-sm"
+                disabled={busy}
+                onClick={() => install(m.name)}
+              >
+                {busy
+                  ? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('recommended.installing')}</>)
+                  : (<><i className="fas fa-download" aria-hidden="true" /> {t('recommended.install')}</>)}
+              </button>
+            </div>
+          )
+        })}
+      </div>
+    </div>
+  )
+}
diff --git a/core/http/react-ui/src/components/StarterModels.jsx b/core/http/react-ui/src/components/StarterModels.jsx
index 9273ae147..d5f8122b6 100644
--- a/core/http/react-ui/src/components/StarterModels.jsx
+++ b/core/http/react-ui/src/components/StarterModels.jsx
@@ -1,79 +1,78 @@
-import { useState, useEffect, useMemo } from 'react'
+import { useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { modelsApi } from '../utils/api'
-import { useResources } from '../hooks/useResources'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
 
-// Curated, hardware-tiered starter models for the empty-state onboarding. Names
-// are real gallery entries (gallery/index.yaml); we intersect them against the
-// live gallery at render time so a custom/trimmed gallery degrades gracefully
-// (unmatched entries simply don't render).
-//
-// The guiding rule the maintainer asked for: CPU-only machines should be
-// steered to genuinely small models (1-4B, Q4) that stay responsive without a
-// GPU. GPU tiers scale the suggestion up with available VRAM.
-const SMALL = [
-  { name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' },
-  { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-  { name: 'qwen3-1.7b', size: '~1.4 GB' },
-  { name: 'gemma-3-1b-it', size: '~0.8 GB' },
-]
-const MID = [
-  { name: 'qwen3-4b', size: '~2.5 GB' },
-  { name: 'gemma-3-4b-it', size: '~3 GB' },
-  { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-]
-const LARGE = [
-  { name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' },
-  { name: 'qwen3-4b', size: '~2.5 GB' },
-  { name: 'mistral-7b-instruct-v0.3', size: '~4 GB' },
-]
+// Static fallback used only when the live gallery / estimates can't be reached
+// (offline, trimmed gallery). The hook is the primary, data-driven path; these
+// are real gallery names kept as a safety net so onboarding never shows nothing.
+// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
+// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
+const BASE = {
+  cpu: [
+    { name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
+    { name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
+    { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+    { name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
+  ],
+  'gpu-small': [
+    { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+    { name: 'lfm2.5-8b-a1b', size: '~5 GB' },
+    { name: 'qwen3.5-9b', size: '~5.5 GB' },
+    { name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
+  ],
+  'gpu-mid': [
+    { name: 'qwen3.6-27b', size: '~16 GB' },
+    { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+    { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+    { name: 'qwen3.5-27b', size: '~16 GB' },
+  ],
+  'gpu-large': [
+    { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+    { name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
+    { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+    { name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
+  ],
+}
 
-const GB = 1024 * 1024 * 1024
+// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
+// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
+const NVIDIA = {
+  'gpu-mid': [
+    { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+    { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+    { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+    { name: 'qwen3.6-27b', size: '~16 GB' },
+  ],
+  'gpu-large': [
+    { name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
+    { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+    { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+    { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+  ],
+}
 
-// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when
-// CPU-only). Thresholds are deliberately conservative so a suggestion that
-// "fits" really does.
-function pickTier(resources) {
-  const isGpu = resources?.type === 'gpu'
-  const vram = resources?.aggregate?.total_memory || 0
-  if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL }
-  if (vram < 8 * GB) return { id: 'gpu-small', list: MID }
-  return { id: 'gpu-large', list: LARGE }
+function fallbackFor(tierId, isNvidia) {
+  if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
+  return BASE[tierId] || BASE.cpu
 }
 
 export default function StarterModels({ addToast, onInstallStarted }) {
   const { t } = useTranslation('home')
-  const { resources } = useResources()
-  const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading
+  const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
   const [installing, setInstalling] = useState(() => new Set())
 
-  const tier = useMemo(() => pickTier(resources), [resources])
-  const candidates = tier.list
+  // While the hardware probe + gallery query are in flight, render nothing
+  // rather than flashing fallback content that may be replaced a moment later.
+  if (loading) return null
 
-  // Verify candidates exist in the live gallery. One search per name (the tier
-  // has at most a handful) keeps this resilient to gallery customization.
-  useEffect(() => {
-    let cancelled = false
-    const names = [...new Set(candidates.map(c => c.name))]
-    Promise.all(names.map(name =>
-      modelsApi.list({ search: name, page: 1 })
-        .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null)
-        .catch(() => null)
-    )).then(found => {
-      if (cancelled) return
-      const hits = found.filter(Boolean)
-      // If verification yielded nothing (e.g. gallery unreachable), fall back to
-      // showing the curated list rather than an empty widget.
-      setAvailable(hits.length > 0 ? new Set(hits) : null)
-    })
-    return () => { cancelled = true }
-  }, [candidates])
+  // Prefer live recommendations; fall back to the static list only when the
+  // gallery yielded nothing.
+  const items = (recommended && recommended.length > 0)
+    ? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
+    : fallbackFor(tier.id, isNvidia)
 
-  const visible = available === null
-    ? candidates
-    : candidates.filter(c => available.has(c.name))
-
-  if (visible.length === 0) return null
+  if (items.length === 0) return null
 
   const install = async (name) => {
     setInstalling(prev => new Set(prev).add(name))
@@ -104,12 +103,13 @@ export default function StarterModels({ addToast, onInstallStarted }) {
         {tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
       </p>
       <ul className="home-starters-list">
-        {visible.map(c => {
+        {items.map(c => {
           const busy = installing.has(c.name)
           return (
             <li key={c.name} className="home-starters-item">
               <span className="home-starters-name">{c.name}</span>
-              <span className="home-starters-size">{c.size}</span>
+              {isNvfp4Name(c.name) && <span className="badge badge-info home-starters-badge">NVFP4</span>}
+              {c.size && <span className="home-starters-size">{c.size}</span>}
               <button
                 type="button"
                 className="btn btn-primary btn-sm"
diff --git a/core/http/react-ui/src/hooks/useRecommendedModels.js b/core/http/react-ui/src/hooks/useRecommendedModels.js
new file mode 100644
index 000000000..ca6090177
--- /dev/null
+++ b/core/http/react-ui/src/hooks/useRecommendedModels.js
@@ -0,0 +1,108 @@
+import { useState, useEffect } from 'react'
+import { modelsApi } from '../utils/api'
+import { useResources } from './useResources'
+
+// Data-driven "recommended for your hardware" model picks. The gallery exposes
+// no popularity/download signal and the list response carries no size, so we:
+//   1. ask the server for chat-capable models in their natural (curated) order,
+//   2. estimate size/VRAM for the top candidates (same endpoint the Models page
+//      uses), and
+//   3. rank by hardware fit — smallest on CPU-only boxes, largest-that-fits on
+//      GPUs (bigger == better quality while still fitting VRAM).
+//
+// Returns `recommended === null` while loading, `[]` when nothing could be
+// resolved (gallery/estimates unavailable) so callers can fall back.
+
+const GB = 1024 * 1024 * 1024
+const DEFAULT_CTX = 4096
+
+// NVFP4 is a Blackwell/NVIDIA-specific 4-bit format — only worth suggesting on
+// NVIDIA hardware, and to be filtered out elsewhere.
+export const isNvfp4Name = (name) => /nvfp4/i.test(name || '')
+
+export function hasNvidiaGpu(resources) {
+  return Array.isArray(resources?.gpus) &&
+    resources.gpus.some(g => (g?.vendor || '').toLowerCase() === 'nvidia')
+}
+
+export function recommendTier(resources) {
+  const isGpu = resources?.type === 'gpu'
+  const vram = resources?.aggregate?.total_memory || 0
+  if (!isGpu || vram <= 0) return { id: 'cpu', vram: 0 }
+  if (vram < 8 * GB) return { id: 'gpu-small', vram }
+  if (vram < 24 * GB) return { id: 'gpu-mid', vram }
+  return { id: 'gpu-large', vram }
+}
+
+function rank(candidates, tier, count, isNvidia) {
+  // NVFP4 only runs on NVIDIA (Blackwell) — drop it everywhere else, and prefer
+  // it on NVIDIA boxes where it's the fastest path.
+  const pool = candidates.filter(c => c.sizeBytes != null && (isNvidia || !isNvfp4Name(c.name)))
+  if (tier.id === 'cpu') {
+    // No GPU: smallest models stay responsive on CPU.
+    return [...pool].sort((a, b) => a.sizeBytes - b.sizeBytes).slice(0, count)
+  }
+  const limit = tier.vram * 0.95
+  const fits = pool.filter(c => c.vramBytes != null && c.vramBytes <= limit)
+  const base = fits.length > 0 ? fits : pool // tiny GPU where nothing fits → fall through to smallest
+  const byPreference = (a, b) => {
+    // On NVIDIA, surface NVFP4 first; then largest-that-fits (best quality).
+    if (isNvidia) {
+      const an = isNvfp4Name(a.name), bn = isNvfp4Name(b.name)
+      if (an !== bn) return an ? -1 : 1
+    }
+    return fits.length > 0 ? b.sizeBytes - a.sizeBytes : a.sizeBytes - b.sizeBytes
+  }
+  return [...base].sort(byPreference).slice(0, count)
+}
+
+export function useRecommendedModels({ count = 4, candidatePool = 10 } = {}) {
+  const { resources } = useResources()
+  const [recommended, setRecommended] = useState(null)
+  const [error, setError] = useState(null)
+
+  const resReady = resources !== null
+  const tier = recommendTier(resources)
+  const isNvidia = hasNvidiaGpu(resources)
+
+  useEffect(() => {
+    if (!resReady) return
+    let cancelled = false
+    setRecommended(null)
+    setError(null)
+    ;(async () => {
+      try {
+        const data = await modelsApi.list({ tag: 'chat', items: candidatePool, page: 1 })
+        // Recommend models the user hasn't installed yet.
+        const models = (data?.models || []).filter(m => !m.installed)
+        const estimated = await Promise.all(models.map(async (m) => {
+          const name = m.name || m.id
+          try {
+            const e = await modelsApi.estimate(name, [DEFAULT_CTX])
+            const ctx = e?.estimates?.[String(DEFAULT_CTX)]
+            return {
+              name,
+              description: m.description,
+              sizeBytes: e?.sizeBytes ?? null,
+              sizeDisplay: e?.sizeDisplay ?? null,
+              vramBytes: ctx?.vramBytes ?? null,
+              vramDisplay: ctx?.vramDisplay ?? null,
+            }
+          } catch {
+            return { name, sizeBytes: null }
+          }
+        }))
+        if (cancelled) return
+        setRecommended(rank(estimated, tier, count, isNvidia))
+      } catch (e) {
+        if (cancelled) return
+        setError(e.message)
+        setRecommended([])
+      }
+    })()
+    return () => { cancelled = true }
+    // tier.id / tier.vram / isNvidia are primitives, so resource polling doesn't re-run this.
+  }, [resReady, tier.id, tier.vram, isNvidia, count, candidatePool])
+
+  return { recommended, tier, isNvidia, error, loading: recommended === null }
+}
diff --git a/core/http/react-ui/src/pages/Models.jsx b/core/http/react-ui/src/pages/Models.jsx
index 5f3a3908d..cf10fdfb5 100644
--- a/core/http/react-ui/src/pages/Models.jsx
+++ b/core/http/react-ui/src/pages/Models.jsx
@@ -13,6 +13,7 @@ import ConfirmDialog from '../components/ConfirmDialog'
 import GalleryLoader from '../components/GalleryLoader'
 import Toggle from '../components/Toggle'
 import ResponsiveTable from '../components/ResponsiveTable'
+import RecommendedModels from '../components/RecommendedModels'
 import React from 'react'
 
 
@@ -301,6 +302,8 @@ export default function Models() {
         }
       />
 
+      <RecommendedModels addToast={addToast} />
+
       {/* Search */}
       <div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}>
         <i className="fas fa-search search-icon" />

From 3f647a2764749dff7a240019fae1f89f7a2580bb Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 00:57:42 +0200
Subject: [PATCH 02/11] chore: :arrow_up: Update ikawrakow/ik_llama.cpp to
 `d5507e33ae7ee2b7b41475f08044d3bde3b839ee` (#10498)

:arrow_up: Update ikawrakow/ik_llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 backend/cpp/ik-llama-cpp/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cpp/ik-llama-cpp/Makefile b/backend/cpp/ik-llama-cpp/Makefile
index 0fbcf0bdb..860606253 100644
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
+IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
 
 CMAKE_ARGS?=

From 3c63431e467f99d7e544215fe59597cf4a69c23b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 00:57:58 +0200
Subject: [PATCH 03/11] chore: :arrow_up: Update ServeurpersoCom/omnivoice.cpp
 to `0f37401bebe9b20c0160a888e592108fc1d17607` (#10492)

:arrow_up: Update ServeurpersoCom/omnivoice.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 backend/go/omnivoice-cpp/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/go/omnivoice-cpp/Makefile b/backend/go/omnivoice-cpp/Makefile
index b42610aac..c245acf58 100644
--- a/backend/go/omnivoice-cpp/Makefile
+++ b/backend/go/omnivoice-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 
 # omnivoice.cpp version
 OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
-OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
+OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
 SO_TARGET?=libgomnivoicecpp.so
 
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

From c678530cf0e3ff273678841cd24dbc73a072ab2b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:09:18 +0200
Subject: [PATCH 04/11] fix(backends): darwin/metal support across purego Go
 backends (#10481)

* fix(parakeet-cpp): darwin/metal support (libparakeet.dylib + DYLD path)

The parakeet-cpp backend had no macOS support and panicked at startup on
Apple/Metal nodes when purego.Dlopen could not find "libparakeet.so".
Fix it across the same four layers the sibling voxtral backend already
handles correctly:

- main.go: default the dlopen target to libparakeet.dylib on darwin
  (runtime.GOOS), libparakeet.so elsewhere; PARAKEET_LIBRARY still wins.
- Makefile: also stage the built libparakeet.dylib next to the Go sources.
- package.sh: accept either the Linux .so[.X.Y] or the macOS .dylib when
  bundling instead of hard-failing when no .so is present (the macOS case);
  note that on Darwin only system frameworks are linked.
- run.sh: on Darwin set DYLD_LIBRARY_PATH and PARAKEET_LIBRARY to the
  packaged .dylib; keep LD_LIBRARY_PATH + .so on Linux.

Mirrors backend/go/voxtral.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(backends): darwin/metal support across purego Go backends

The parakeet-cpp fix in the previous commit was an instance of a bug
shared by nearly every purego/dlopen Go backend: the dlopen target was
hardcoded to a .so name and run.sh exported only LD_LIBRARY_PATH, so the
backend panicked at startup on macOS/Apple-Metal nodes (dyld needs the
.dylib name and DYLD_LIBRARY_PATH). voxtral was the only backend handling
this correctly.

Apply the same four-layer fix (mirroring backend/go/voxtral) to the
remaining affected backends:

  whisper, sherpa-onnx, ced, stablediffusion-ggml, vibevoice-cpp,
  qwen3-tts-cpp, omnivoice-cpp, crispasr, acestep-cpp, locate-anything-cpp,
  depth-anything-cpp, rfdetr-cpp, sam3-cpp, localvqe

Per backend:
- main.go (sherpa-onnx: backend.go, two libraries): default the dlopen
  target to the .dylib on darwin (runtime.GOOS), .so elsewhere; the
  existing <BACKEND>_LIBRARY env override still wins.
- run.sh: on Darwin set DYLD_LIBRARY_PATH and point <BACKEND>_LIBRARY at
  the packaged .dylib; keep LD_LIBRARY_PATH + the Linux CPU-variant
  (avx/avx2/avx512) selection unchanged in the else branch.
- package.sh: also bundle the .dylib and stop hard-failing when no .so is
  present (the macOS case).
- Makefile: also stage the built .dylib.

Notes:
- stablediffusion-ggml and acestep-cpp build their lib as a CMake MODULE,
  which emits .so (not .dylib) on macOS; run.sh prefers .dylib and falls
  back to .so so both layouts work.
- sherpa-onnx was already partly darwin-aware (Makefile/package.sh); only
  run.sh and the two dlopen defaults needed fixing.

Linux behavior is unchanged. Verified gofmt-clean and
`CGO_ENABLED=0 go build` for every backend.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/go/acestep-cpp/Makefile            |  3 ++-
 backend/go/acestep-cpp/main.go             |  7 ++++++-
 backend/go/acestep-cpp/package.sh          |  1 +
 backend/go/acestep-cpp/run.sh              | 17 ++++++++++++++---
 backend/go/ced/Makefile                    |  1 +
 backend/go/ced/main.go                     |  7 ++++++-
 backend/go/ced/package.sh                  |  8 +++++---
 backend/go/ced/run.sh                      |  7 ++++++-
 backend/go/crispasr/Makefile               | 15 ++++++++++++---
 backend/go/crispasr/main.go                |  7 ++++++-
 backend/go/crispasr/package.sh             |  3 ++-
 backend/go/crispasr/run.sh                 | 11 ++++++++---
 backend/go/depth-anything-cpp/Makefile     | 15 ++++++++++++---
 backend/go/depth-anything-cpp/main.go      |  7 ++++++-
 backend/go/depth-anything-cpp/package.sh   |  3 ++-
 backend/go/depth-anything-cpp/run.sh       | 11 ++++++++---
 backend/go/localvqe/Makefile               |  3 ++-
 backend/go/localvqe/main.go                |  7 ++++++-
 backend/go/localvqe/package.sh             |  2 ++
 backend/go/localvqe/run.sh                 | 15 +++++++++++++--
 backend/go/locate-anything-cpp/Makefile    | 15 ++++++++++++---
 backend/go/locate-anything-cpp/main.go     |  7 ++++++-
 backend/go/locate-anything-cpp/package.sh  |  3 ++-
 backend/go/locate-anything-cpp/run.sh      | 11 ++++++++---
 backend/go/omnivoice-cpp/Makefile          | 14 +++++++++++---
 backend/go/omnivoice-cpp/main.go           |  7 ++++++-
 backend/go/omnivoice-cpp/package.sh        |  3 ++-
 backend/go/omnivoice-cpp/run.sh            | 11 ++++++++---
 backend/go/parakeet-cpp/Makefile           |  1 +
 backend/go/parakeet-cpp/main.go            | 18 ++++++++++++------
 backend/go/parakeet-cpp/package.sh         | 15 +++++++++------
 backend/go/parakeet-cpp/run.sh             | 10 ++++++++--
 backend/go/qwen3-tts-cpp/Makefile          | 15 +++++++++++----
 backend/go/qwen3-tts-cpp/main.go           |  7 ++++++-
 backend/go/qwen3-tts-cpp/package.sh        |  3 ++-
 backend/go/qwen3-tts-cpp/run.sh            | 11 ++++++++---
 backend/go/rfdetr-cpp/Makefile             | 15 ++++++++++++---
 backend/go/rfdetr-cpp/main.go              |  7 ++++++-
 backend/go/rfdetr-cpp/package.sh           |  3 ++-
 backend/go/rfdetr-cpp/run.sh               | 11 ++++++++---
 backend/go/sam3-cpp/Makefile               | 15 ++++++++++++---
 backend/go/sam3-cpp/main.go                |  7 ++++++-
 backend/go/sam3-cpp/package.sh             |  3 ++-
 backend/go/sam3-cpp/run.sh                 | 11 ++++++++---
 backend/go/sherpa-onnx/backend.go          | 13 +++++++++++--
 backend/go/sherpa-onnx/run.sh              |  8 +++++++-
 backend/go/stablediffusion-ggml/Makefile   |  3 ++-
 backend/go/stablediffusion-ggml/main.go    |  7 ++++++-
 backend/go/stablediffusion-ggml/package.sh |  1 +
 backend/go/stablediffusion-ggml/run.sh     | 16 +++++++++++++---
 backend/go/vibevoice-cpp/Makefile          | 16 ++++++++++++----
 backend/go/vibevoice-cpp/main.go           |  7 ++++++-
 backend/go/vibevoice-cpp/package.sh        |  3 ++-
 backend/go/vibevoice-cpp/run.sh            | 11 ++++++++---
 backend/go/whisper/Makefile                |  3 ++-
 backend/go/whisper/main.go                 |  7 ++++++-
 backend/go/whisper/package.sh              |  3 ++-
 backend/go/whisper/run.sh                  | 11 ++++++++---
 58 files changed, 374 insertions(+), 108 deletions(-)

diff --git a/backend/go/acestep-cpp/Makefile b/backend/go/acestep-cpp/Makefile
index 0b1929b94..3332ce1b6 100644
--- a/backend/go/acestep-cpp/Makefile
+++ b/backend/go/acestep-cpp/Makefile
@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 test: acestep-cpp
 	@echo "Running acestep-cpp tests..."
diff --git a/backend/go/acestep-cpp/main.go b/backend/go/acestep-cpp/main.go
index c65afb335..e4c1378b8 100644
--- a/backend/go/acestep-cpp/main.go
+++ b/backend/go/acestep-cpp/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("ACESTEP_LIBRARY")
 	if libName == "" {
-		libName = "./libgoacestepcpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgoacestepcpp-fallback.dylib"
+		} else {
+			libName = "./libgoacestepcpp-fallback.so"
+		}
 	}
 
 	gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/acestep-cpp/package.sh b/backend/go/acestep-cpp/package.sh
index d922c5b86..5fecf3455 100755
--- a/backend/go/acestep-cpp/package.sh
+++ b/backend/go/acestep-cpp/package.sh
@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/acestep-cpp $CURDIR/package/
 cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/acestep-cpp/run.sh b/backend/go/acestep-cpp/run.sh
index d901e2c85..bcdfbc09e 100755
--- a/backend/go/acestep-cpp/run.sh
+++ b/backend/go/acestep-cpp/run.sh
@@ -12,9 +12,19 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single library variant (Metal or Accelerate). The goacestepcpp
+	# target is built as a CMake MODULE, which emits a .dylib for a SHARED
+	# build but a .so for a MODULE build on Apple, so prefer .dylib and fall
+	# back to .so.
+	LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
+	if [ ! -e "$LIBRARY" ]; then
+		LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
+	fi
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
@@ -36,9 +46,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export ACESTEP_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/ced/Makefile b/backend/go/ced/Makefile
index 632c0e255..2b15990ec 100644
--- a/backend/go/ced/Makefile
+++ b/backend/go/ced/Makefile
@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
 	cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
 	cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
 	cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
+	cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
 	cp -fv sources/ced.cpp/include/ced_capi.h ./
 
 ced-grpc: libced.so main.go goced.go
diff --git a/backend/go/ced/main.go b/backend/go/ced/main.go
index ea8aa8549..b6c93a9f9 100644
--- a/backend/go/ced/main.go
+++ b/backend/go/ced/main.go
@@ -12,6 +12,7 @@ import (
 	"flag"
 	"fmt"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ type libFunc struct {
 func main() {
 	libName := os.Getenv("CED_LIBRARY")
 	if libName == "" {
-		libName = "libced.so"
+		if runtime.GOOS == "darwin" {
+			libName = "libced.dylib"
+		} else {
+			libName = "libced.so"
+		}
 	}
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
 	if err != nil {
diff --git a/backend/go/ced/package.sh b/backend/go/ced/package.sh
index bde0adad6..ff20d727f 100755
--- a/backend/go/ced/package.sh
+++ b/backend/go/ced/package.sh
@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
 cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
 cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
 
-cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || {
-	echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2
+cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
+cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
+if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
+	echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
 	exit 1
-}
+fi
 
 if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
     echo "Detected x86_64 architecture, copying x86_64 libraries..."
diff --git a/backend/go/ced/run.sh b/backend/go/ced/run.sh
index bce6fec8e..1f95f748f 100755
--- a/backend/go/ced/run.sh
+++ b/backend/go/ced/run.sh
@@ -3,7 +3,12 @@ set -e
 
 CURDIR=$(dirname "$(realpath "$0")")
 
-export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+if [ "$(uname)" = "Darwin" ]; then
+	export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
+	export CED_LIBRARY="$CURDIR/lib/libced.dylib"
+else
+	export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+fi
 
 # If a self-contained ld.so was packaged, route through it so the packaged
 # libc / libstdc++ are used instead of the host's (matches the sibling backends).
diff --git a/backend/go/crispasr/Makefile b/backend/go/crispasr/Makefile
index ba55b485e..1b32240e3 100644
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
 else
-	VARIANT_TARGETS = libgocrispasr-fallback.so
+	# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+	VARIANT_TARGETS = libgocrispasr-fallback.dylib
 endif
 
 crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
@@ -87,7 +88,7 @@ package: crispasr
 build: package
 
 clean: purge
-	rm -rf libgocrispasr*.so package sources/CrispASR crispasr
+	rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
 
 purge:
 	rm -rf build*
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
 	SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
 	rm -rfv build*
 
+# Build fallback variant as a dylib (Darwin)
+libgocrispasr-fallback.dylib: sources/CrispASR
+	$(MAKE) purge
+	$(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
+	SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
+	rm -rfv build*
+
 libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
 	mkdir -p build-$(SO_TARGET) && \
 	cd build-$(SO_TARGET) && \
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
 
 test: crispasr
 	CGO_ENABLED=0 $(GOCMD) test -v ./...
diff --git a/backend/go/crispasr/main.go b/backend/go/crispasr/main.go
index 9f3ef14d0..a1f132cc5 100644
--- a/backend/go/crispasr/main.go
+++ b/backend/go/crispasr/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("CRISPASR_LIBRARY")
 	if libName == "" {
-		libName = "./libgocrispasr-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgocrispasr-fallback.dylib"
+		} else {
+			libName = "./libgocrispasr-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/crispasr/package.sh b/backend/go/crispasr/package.sh
index baee12944..9b89dad1b 100755
--- a/backend/go/crispasr/package.sh
+++ b/backend/go/crispasr/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/crispasr $CURDIR/package/
-cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/
+cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/crispasr/run.sh b/backend/go/crispasr/run.sh
index ccb264833..6d3c4b216 100755
--- a/backend/go/crispasr/run.sh
+++ b/backend/go/crispasr/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgocrispasr-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgocrispasr-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgocrispasr-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgocrispasr-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export CRISPASR_LIBRARY=$LIBRARY
 
 # Point piper's espeak-ng phonemizer at the bundled voice data. The variable
diff --git a/backend/go/depth-anything-cpp/Makefile b/backend/go/depth-anything-cpp/Makefile
index f1a0b9f97..efe99a626 100644
--- a/backend/go/depth-anything-cpp/Makefile
+++ b/backend/go/depth-anything-cpp/Makefile
@@ -77,7 +77,7 @@ ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
 else
 	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = libdepthanythingcpp-fallback.so
+	VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
 endif
 
 depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
@@ -89,7 +89,7 @@ package: depth-anything-cpp
 build: package
 
 clean: purge
-	rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
+	rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
 
 purge:
 	rm -rf build*
@@ -116,11 +116,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
 endif
 
 # Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
+	rm -rfv build-$@
+	$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
+	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
+	rm -rfv build-$@
+else
 libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
 	rm -rfv build-$@
 	$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
 	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
 	rm -rfv build-$@
+endif
 
 libdepthanythingcpp-custom: CMakeLists.txt
 	mkdir -p build-$(SO_TARGET) && \
@@ -128,7 +136,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
 
 all: depth-anything-cpp package
 
diff --git a/backend/go/depth-anything-cpp/main.go b/backend/go/depth-anything-cpp/main.go
index 4c4546797..cfad88b23 100644
--- a/backend/go/depth-anything-cpp/main.go
+++ b/backend/go/depth-anything-cpp/main.go
@@ -9,6 +9,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("DEPTHANYTHING_LIBRARY")
 	if libName == "" {
-		libName = "./libdepthanythingcpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libdepthanythingcpp-fallback.dylib"
+		} else {
+			libName = "./libdepthanythingcpp-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/depth-anything-cpp/package.sh b/backend/go/depth-anything-cpp/package.sh
index 4690555ea..5bbd5559b 100755
--- a/backend/go/depth-anything-cpp/package.sh
+++ b/backend/go/depth-anything-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
 # Create lib directory
 mkdir -p $CURDIR/package/lib
 
-cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
diff --git a/backend/go/depth-anything-cpp/run.sh b/backend/go/depth-anything-cpp/run.sh
index 984aa5849..cbff6b0b5 100755
--- a/backend/go/depth-anything-cpp/run.sh
+++ b/backend/go/depth-anything-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export DEPTHANYTHING_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/localvqe/Makefile b/backend/go/localvqe/Makefile
index 7b66e9371..049da0cdd 100644
--- a/backend/go/localvqe/Makefile
+++ b/backend/go/localvqe/Makefile
@@ -67,8 +67,9 @@ $(LIB_SENTINEL): sources/LocalVQE
 	# that the loader picks at runtime. We must build every target — the
 	# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
 	# routes all of them into build/bin; copy them out next to the binary.
-	cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* .
+	cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
 	cp -P build/bin/libggml*.so* . 2>/dev/null || true
+	cp -P build/bin/libggml*.dylib . 2>/dev/null || true
 	touch $(LIB_SENTINEL)
 
 liblocalvqe.so: $(LIB_SENTINEL)
diff --git a/backend/go/localvqe/main.go b/backend/go/localvqe/main.go
index 56ed2de2f..cbaa2a134 100644
--- a/backend/go/localvqe/main.go
+++ b/backend/go/localvqe/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("LOCALVQE_LIBRARY")
 	if libName == "" {
-		libName = "./liblocalvqe.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./liblocalvqe.dylib"
+		} else {
+			libName = "./liblocalvqe.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/localvqe/package.sh b/backend/go/localvqe/package.sh
index ca8dfd3ab..9f9f2533d 100755
--- a/backend/go/localvqe/package.sh
+++ b/backend/go/localvqe/package.sh
@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
 # liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
 # variants — LocalVQE picks the matching CPU variant at load time.
 cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
+cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
 cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
+cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/localvqe/run.sh b/backend/go/localvqe/run.sh
index 0f3192e31..d14d427c4 100755
--- a/backend/go/localvqe/run.sh
+++ b/backend/go/localvqe/run.sh
@@ -10,8 +10,19 @@ CURDIR=$(dirname "$(realpath $0)")
 # exec'ing the binary.
 cd "$CURDIR"
 
-export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
-export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
+	# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
+	export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
+	LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
+	if [ ! -e "$LOCALVQE_LIBRARY" ]; then
+		LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+	fi
+	export LOCALVQE_LIBRARY
+else
+	export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
+	export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+fi
 
 if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
diff --git a/backend/go/locate-anything-cpp/Makefile b/backend/go/locate-anything-cpp/Makefile
index 91dbc41c2..ba12c7195 100644
--- a/backend/go/locate-anything-cpp/Makefile
+++ b/backend/go/locate-anything-cpp/Makefile
@@ -70,7 +70,7 @@ ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
 else
 	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = liblocateanythingcpp-fallback.so
+	VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
 endif
 
 locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
@@ -82,7 +82,7 @@ package: locate-anything-cpp
 build: package
 
 clean: purge
-	rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources
+	rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
 
 purge:
 	rm -rf build*
@@ -109,11 +109,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
 endif
 
 # Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
+	rm -rfv build-$@
+	$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
+	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
+	rm -rfv build-$@
+else
 liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
 	rm -rfv build-$@
 	$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
 	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
 	rm -rfv build-$@
+endif
 
 liblocateanythingcpp-custom: CMakeLists.txt
 	mkdir -p build-$(SO_TARGET) && \
@@ -121,7 +129,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 all: locate-anything-cpp package
 
diff --git a/backend/go/locate-anything-cpp/main.go b/backend/go/locate-anything-cpp/main.go
index 91ccaf38e..77e53bb95 100644
--- a/backend/go/locate-anything-cpp/main.go
+++ b/backend/go/locate-anything-cpp/main.go
@@ -9,6 +9,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("LOCATEANYTHING_LIBRARY")
 	if libName == "" {
-		libName = "./liblocateanythingcpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./liblocateanythingcpp-fallback.dylib"
+		} else {
+			libName = "./liblocateanythingcpp-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/locate-anything-cpp/package.sh b/backend/go/locate-anything-cpp/package.sh
index 3b1f13428..1e6cbee80 100755
--- a/backend/go/locate-anything-cpp/package.sh
+++ b/backend/go/locate-anything-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
 # Create lib directory
 mkdir -p $CURDIR/package/lib
 
-cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
diff --git a/backend/go/locate-anything-cpp/run.sh b/backend/go/locate-anything-cpp/run.sh
index cefbff629..4eebb3c63 100755
--- a/backend/go/locate-anything-cpp/run.sh
+++ b/backend/go/locate-anything-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export LOCATEANYTHING_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/omnivoice-cpp/Makefile b/backend/go/omnivoice-cpp/Makefile
index c245acf58..36b447b13 100644
--- a/backend/go/omnivoice-cpp/Makefile
+++ b/backend/go/omnivoice-cpp/Makefile
@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
 else
-	VARIANT_TARGETS = libgomnivoicecpp-fallback.so
+	# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+	VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
 endif
 
 omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
@@ -77,7 +78,7 @@ package: omnivoice-cpp
 build: package
 
 clean: purge
-	rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp
+	rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
 
 purge:
 	rm -rf build*
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
 	SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
 	rm -rf build-libgomnivoicecpp-fallback.so
 
+# Build fallback variant as a dylib (Darwin)
+libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
+	$(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
+	SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
+	rm -rf build-libgomnivoicecpp-fallback.dylib
+
 libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
 	mkdir -p build-$(SO_TARGET) && \
 	cd build-$(SO_TARGET) && \
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 test: omnivoice-cpp
 	@echo "Running omnivoice-cpp tests..."
diff --git a/backend/go/omnivoice-cpp/main.go b/backend/go/omnivoice-cpp/main.go
index 891201f49..f44eb31a7 100644
--- a/backend/go/omnivoice-cpp/main.go
+++ b/backend/go/omnivoice-cpp/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("OMNIVOICE_LIBRARY")
 	if libName == "" {
-		libName = "./libgomnivoicecpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgomnivoicecpp-fallback.dylib"
+		} else {
+			libName = "./libgomnivoicecpp-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/omnivoice-cpp/package.sh b/backend/go/omnivoice-cpp/package.sh
index b8313d9d7..97a8d7809 100755
--- a/backend/go/omnivoice-cpp/package.sh
+++ b/backend/go/omnivoice-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
-cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/omnivoice-cpp/run.sh b/backend/go/omnivoice-cpp/run.sh
index f677ca21c..81ea2b719 100755
--- a/backend/go/omnivoice-cpp/run.sh
+++ b/backend/go/omnivoice-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export OMNIVOICE_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/parakeet-cpp/Makefile b/backend/go/parakeet-cpp/Makefile
index f9848dc34..7fc46f8e2 100644
--- a/backend/go/parakeet-cpp/Makefile
+++ b/backend/go/parakeet-cpp/Makefile
@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
 	cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
 	cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
 	cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
+	cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
 	cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
 
 parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
diff --git a/backend/go/parakeet-cpp/main.go b/backend/go/parakeet-cpp/main.go
index 963056e23..9c6466b13 100644
--- a/backend/go/parakeet-cpp/main.go
+++ b/backend/go/parakeet-cpp/main.go
@@ -2,15 +2,17 @@ package main
 
 // Started internally by LocalAI - one gRPC server per loaded model.
 //
-// Loads libparakeet.so via purego and registers the flat C-API entry
-// points declared in parakeet_capi.h. The library name can be overridden
-// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
-// convention in the sibling backends); the default looks for the .so next
-// to this binary.
+// Loads the parakeet shared library via purego and registers the flat
+// C-API entry points declared in parakeet_capi.h. The library name can be
+// overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
+// VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
+// looks next to this binary for libparakeet.so on Linux and
+// libparakeet.dylib on macOS.
 import (
 	"flag"
 	"fmt"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -28,7 +30,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("PARAKEET_LIBRARY")
 	if libName == "" {
-		libName = "libparakeet.so"
+		if runtime.GOOS == "darwin" {
+			libName = "libparakeet.dylib"
+		} else {
+			libName = "libparakeet.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/parakeet-cpp/package.sh b/backend/go/parakeet-cpp/package.sh
index 0b580324c..af8e6b9e1 100755
--- a/backend/go/parakeet-cpp/package.sh
+++ b/backend/go/parakeet-cpp/package.sh
@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
 cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
 cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
 
-# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen
-# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/.
-cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || {
-	echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2
+# libparakeet shared lib + any soname symlinks. On Linux this is
+# libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
+# resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
+cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
+cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
+if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
+	echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
 	exit 1
-}
+fi
 
 # Detect architecture and copy the core runtime libs libparakeet.so links
 # against, plus the matching dynamic loader as lib/ld.so.
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
     cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
     cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
 elif [ "$(uname -s)" = "Darwin" ]; then
-    echo "Detected Darwin"
+    echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
 else
     echo "Error: Could not detect architecture"
     exit 1
diff --git a/backend/go/parakeet-cpp/run.sh b/backend/go/parakeet-cpp/run.sh
index 6f371d4f0..be859f381 100755
--- a/backend/go/parakeet-cpp/run.sh
+++ b/backend/go/parakeet-cpp/run.sh
@@ -3,11 +3,17 @@ set -e
 
 CURDIR=$(dirname "$(realpath "$0")")
 
-export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+if [ "$(uname)" = "Darwin" ]; then
+	export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
+	export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
+else
+	export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+	export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
+fi
 
 # If a self-contained ld.so was packaged, route through it so the
 # packaged libc / libstdc++ are used instead of the host's (matches the
-# whisper backend's runtime layout).
+# whisper backend's runtime layout). Linux only.
 if [ -f "$CURDIR/lib/ld.so" ]; then
 	echo "Using lib/ld.so"
 	exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
diff --git a/backend/go/qwen3-tts-cpp/Makefile b/backend/go/qwen3-tts-cpp/Makefile
index 4015f790e..3311f93c3 100644
--- a/backend/go/qwen3-tts-cpp/Makefile
+++ b/backend/go/qwen3-tts-cpp/Makefile
@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
 else
-	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so
+	# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+	VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
 endif
 
 qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
 build: package
 
 clean: purge
-	rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp
+	rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
 
 purge:
 	rm -rf build*
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
 	SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
 	rm -rf build-libgoqwen3ttscpp-fallback.so
 
+# Build fallback variant as a dylib (Darwin)
+libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
+	$(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
+	SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
+	rm -rf build-libgoqwen3ttscpp-fallback.dylib
+
 libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
 	mkdir -p build-$(SO_TARGET) && \
 	cd build-$(SO_TARGET) && \
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 test: qwen3-tts-cpp
 	@echo "Running qwen3-tts-cpp tests..."
diff --git a/backend/go/qwen3-tts-cpp/main.go b/backend/go/qwen3-tts-cpp/main.go
index b788229cd..041a23ad0 100644
--- a/backend/go/qwen3-tts-cpp/main.go
+++ b/backend/go/qwen3-tts-cpp/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("QWEN3TTS_LIBRARY")
 	if libName == "" {
-		libName = "./libgoqwen3ttscpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgoqwen3ttscpp-fallback.dylib"
+		} else {
+			libName = "./libgoqwen3ttscpp-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/qwen3-tts-cpp/package.sh b/backend/go/qwen3-tts-cpp/package.sh
index bb73df968..11d4c57c3 100755
--- a/backend/go/qwen3-tts-cpp/package.sh
+++ b/backend/go/qwen3-tts-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
-cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/qwen3-tts-cpp/run.sh b/backend/go/qwen3-tts-cpp/run.sh
index 6416779fa..638cf9661 100755
--- a/backend/go/qwen3-tts-cpp/run.sh
+++ b/backend/go/qwen3-tts-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export QWEN3TTS_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/rfdetr-cpp/Makefile b/backend/go/rfdetr-cpp/Makefile
index 7c598f732..3282720ff 100644
--- a/backend/go/rfdetr-cpp/Makefile
+++ b/backend/go/rfdetr-cpp/Makefile
@@ -71,7 +71,7 @@ ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
 else
 	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = librfdetrcpp-fallback.so
+	VARIANT_TARGETS = librfdetrcpp-fallback.dylib
 endif
 
 rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: rfdetr-cpp
 build: package
 
 clean: purge
-	rm -rf librfdetrcpp*.so rfdetr-cpp package sources
+	rm -rf librfdetrcpp*.so librfdetrcpp*.dylib rfdetr-cpp package sources
 
 purge:
 	rm -rf build*
@@ -110,11 +110,19 @@ librfdetrcpp-avx512.so: sources/rt-detr.cpp
 endif
 
 # Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+librfdetrcpp-fallback.dylib: sources/rt-detr.cpp
+	rm -rfv build-$@
+	$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
+	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
+	rm -rfv build-$@
+else
 librfdetrcpp-fallback.so: sources/rt-detr.cpp
 	rm -rfv build-$@
 	$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
 	SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
 	rm -rfv build-$@
+endif
 
 librfdetrcpp-custom: CMakeLists.txt
 	mkdir -p build-$(SO_TARGET) && \
@@ -122,7 +130,8 @@ librfdetrcpp-custom: CMakeLists.txt
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/librfdetrcpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 all: rfdetr-cpp package
 
diff --git a/backend/go/rfdetr-cpp/main.go b/backend/go/rfdetr-cpp/main.go
index 3c95df1c2..58637122a 100644
--- a/backend/go/rfdetr-cpp/main.go
+++ b/backend/go/rfdetr-cpp/main.go
@@ -9,6 +9,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("RFDETR_LIBRARY")
 	if libName == "" {
-		libName = "./librfdetrcpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./librfdetrcpp-fallback.dylib"
+		} else {
+			libName = "./librfdetrcpp-fallback.so"
+		}
 	}
 
 	rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/rfdetr-cpp/package.sh b/backend/go/rfdetr-cpp/package.sh
index 9591b79dc..17319bf27 100755
--- a/backend/go/rfdetr-cpp/package.sh
+++ b/backend/go/rfdetr-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
 # Create lib directory
 mkdir -p $CURDIR/package/lib
 
-cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/librfdetrcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/librfdetrcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
diff --git a/backend/go/rfdetr-cpp/run.sh b/backend/go/rfdetr-cpp/run.sh
index 042904e45..ffbd604dd 100755
--- a/backend/go/rfdetr-cpp/run.sh
+++ b/backend/go/rfdetr-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export RFDETR_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/sam3-cpp/Makefile b/backend/go/sam3-cpp/Makefile
index 53b0dfb5e..27b6cedf7 100644
--- a/backend/go/sam3-cpp/Makefile
+++ b/backend/go/sam3-cpp/Makefile
@@ -66,7 +66,7 @@ ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
 else
 	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = libgosam3-fallback.so
+	VARIANT_TARGETS = libgosam3-fallback.dylib
 endif
 
 sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: sam3-cpp
 build: package
 
 clean: purge
-	rm -rf libgosam3*.so sam3-cpp package sources
+	rm -rf libgosam3*.so libgosam3*.dylib sam3-cpp package sources
 
 purge:
 	rm -rf build*
@@ -105,11 +105,19 @@ libgosam3-avx512.so: sources/sam3.cpp
 endif
 
 # Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+libgosam3-fallback.dylib: sources/sam3.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
+	SO_TARGET=libgosam3-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
+	rm -rfv build*
+else
 libgosam3-fallback.so: sources/sam3.cpp
 	$(MAKE) purge
 	$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
 	SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
 	rm -rfv build*
+endif
 
 libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
 	mkdir -p build-$(SO_TARGET) && \
@@ -117,6 +125,7 @@ libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgosam3.dylib ./$(SO_TARGET) 2>/dev/null)
 
 all: sam3-cpp package
diff --git a/backend/go/sam3-cpp/main.go b/backend/go/sam3-cpp/main.go
index c83a59285..e36849f69 100644
--- a/backend/go/sam3-cpp/main.go
+++ b/backend/go/sam3-cpp/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("SAM3_LIBRARY")
 	if libName == "" {
-		libName = "./libgosam3-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgosam3-fallback.dylib"
+		} else {
+			libName = "./libgosam3-fallback.so"
+		}
 	}
 
 	gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/sam3-cpp/package.sh b/backend/go/sam3-cpp/package.sh
index 254aef286..a648ee93c 100755
--- a/backend/go/sam3-cpp/package.sh
+++ b/backend/go/sam3-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
 # Create lib directory
 mkdir -p $CURDIR/package/lib
 
-cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
+cp -fv $CURDIR/libgosam3-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgosam3-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -avf $CURDIR/sam3-cpp $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
diff --git a/backend/go/sam3-cpp/run.sh b/backend/go/sam3-cpp/run.sh
index 423ed9199..7bff52df6 100755
--- a/backend/go/sam3-cpp/run.sh
+++ b/backend/go/sam3-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgosam3-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgosam3-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgosam3-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgosam3-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgosam3-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export SAM3_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/sherpa-onnx/backend.go b/backend/go/sherpa-onnx/backend.go
index 0a092acf7..8bfe5e75c 100644
--- a/backend/go/sherpa-onnx/backend.go
+++ b/backend/go/sherpa-onnx/backend.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"runtime"
 	"strconv"
 	"strings"
 	"sync"
@@ -238,11 +239,19 @@ func loadSherpaLibs() error {
 func loadSherpaLibsOnce() error {
 	shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
 	if shimLib == "" {
-		shimLib = "libsherpa-shim.so"
+		if runtime.GOOS == "darwin" {
+			shimLib = "libsherpa-shim.dylib"
+		} else {
+			shimLib = "libsherpa-shim.so"
+		}
 	}
 	capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
 	if capiLib == "" {
-		capiLib = "libsherpa-onnx-c-api.so"
+		if runtime.GOOS == "darwin" {
+			capiLib = "libsherpa-onnx-c-api.dylib"
+		} else {
+			capiLib = "libsherpa-onnx-c-api.so"
+		}
 	}
 
 	shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/sherpa-onnx/run.sh b/backend/go/sherpa-onnx/run.sh
index b703e5155..771324326 100755
--- a/backend/go/sherpa-onnx/run.sh
+++ b/backend/go/sherpa-onnx/run.sh
@@ -3,7 +3,13 @@ set -ex
 
 CURDIR=$(dirname "$(realpath $0)")
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+if [ "$(uname)" = "Darwin" ]; then
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+	export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
+	export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
+else
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+fi
 
 if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index 05b57b254..d161a5b47 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -131,6 +131,7 @@ libgosd-custom: CMakeLists.txt cpp/gosd.cpp cpp/gosd.h
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgosd.dylib ./$(SO_TARGET) 2>/dev/null)
 
 all: stablediffusion-ggml package
\ No newline at end of file
diff --git a/backend/go/stablediffusion-ggml/main.go b/backend/go/stablediffusion-ggml/main.go
index 998f2a5ab..b509c6a2b 100644
--- a/backend/go/stablediffusion-ggml/main.go
+++ b/backend/go/stablediffusion-ggml/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("SD_LIBRARY")
 	if libName == "" {
-		libName = "./libgosd-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgosd-fallback.dylib"
+		} else {
+			libName = "./libgosd-fallback.so"
+		}
 	}
 
 	gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/stablediffusion-ggml/package.sh b/backend/go/stablediffusion-ggml/package.sh
index 8006e081f..922fb71ea 100755
--- a/backend/go/stablediffusion-ggml/package.sh
+++ b/backend/go/stablediffusion-ggml/package.sh
@@ -12,6 +12,7 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
+cp -fv $CURDIR/libgosd-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
diff --git a/backend/go/stablediffusion-ggml/run.sh b/backend/go/stablediffusion-ggml/run.sh
index 71342e43b..e026b4b28 100755
--- a/backend/go/stablediffusion-ggml/run.sh
+++ b/backend/go/stablediffusion-ggml/run.sh
@@ -12,9 +12,18 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgosd-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single library variant (Metal or Accelerate). The gosd target is
+	# built as a CMake MODULE, which emits a .dylib for a SHARED build but a
+	# .so for a MODULE build on Apple, so prefer .dylib and fall back to .so.
+	LIBRARY="$CURDIR/libgosd-fallback.dylib"
+	if [ ! -e "$LIBRARY" ]; then
+		LIBRARY="$CURDIR/libgosd-fallback.so"
+	fi
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgosd-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgosd-avx.so ]; then
@@ -36,9 +45,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgosd-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export SD_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/vibevoice-cpp/Makefile b/backend/go/vibevoice-cpp/Makefile
index 199df9cc4..dc71eaa5d 100644
--- a/backend/go/vibevoice-cpp/Makefile
+++ b/backend/go/vibevoice-cpp/Makefile
@@ -70,8 +70,8 @@ UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
 else
-	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = libgovibevoicecpp-fallback.so
+	# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+	VARIANT_TARGETS = libgovibevoicecpp-fallback.dylib
 endif
 
 vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: vibevoice-cpp
 build: package
 
 clean: purge
-	rm -rf libgovibevoicecpp*.so package sources/vibevoice.cpp vibevoice-cpp
+	rm -rf libgovibevoicecpp*.so libgovibevoicecpp*.dylib package sources/vibevoice.cpp vibevoice-cpp
 
 purge:
 	rm -rf build*
@@ -119,13 +119,21 @@ libgovibevoicecpp-fallback.so: sources/vibevoice.cpp
 	SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
 	rm -rfv build*
 
+# Build fallback variant as a dylib (Darwin)
+libgovibevoicecpp-fallback.dylib: sources/vibevoice.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I vibevoice-cpp build info:fallback (dylib)${RESET})
+	SO_TARGET=libgovibevoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
+	rm -rfv build*
+
 libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
 	mkdir -p build-$(SO_TARGET) && \
 	cd build-$(SO_TARGET) && \
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET)
+	(mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
+	 mv build-$(SO_TARGET)/libgovibevoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
 
 test: vibevoice-cpp
 	@echo "Running vibevoice-cpp tests..."
diff --git a/backend/go/vibevoice-cpp/main.go b/backend/go/vibevoice-cpp/main.go
index dd1f1ba43..b9a696d82 100644
--- a/backend/go/vibevoice-cpp/main.go
+++ b/backend/go/vibevoice-cpp/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
 func main() {
 	libName := os.Getenv("VIBEVOICECPP_LIBRARY")
 	if libName == "" {
-		libName = "./libgovibevoicecpp-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgovibevoicecpp-fallback.dylib"
+		} else {
+			libName = "./libgovibevoicecpp-fallback.so"
+		}
 	}
 
 	lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/vibevoice-cpp/package.sh b/backend/go/vibevoice-cpp/package.sh
index 88010846f..62860b8d6 100755
--- a/backend/go/vibevoice-cpp/package.sh
+++ b/backend/go/vibevoice-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
-cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgovibevoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/vibevoice-cpp/run.sh b/backend/go/vibevoice-cpp/run.sh
index 93e92d5b8..ec5a39c14 100755
--- a/backend/go/vibevoice-cpp/run.sh
+++ b/backend/go/vibevoice-cpp/run.sh
@@ -11,9 +11,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
@@ -34,9 +38,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export VIBEVOICECPP_LIBRARY=$LIBRARY
 
 if [ -f $CURDIR/lib/ld.so ]; then
diff --git a/backend/go/whisper/Makefile b/backend/go/whisper/Makefile
index e8ad8545f..6dd13dd2c 100644
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -117,6 +117,7 @@ libgowhisper-custom: CMakeLists.txt cpp/gowhisper.cpp cpp/gowhisper.h
 	cmake .. $(CMAKE_ARGS) && \
 	cmake --build . --config Release -j$(JOBS) && \
 	cd .. && \
-	mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET)
+	mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) 2>/dev/null || \
+		mv build-$(SO_TARGET)/libgowhisper.dylib ./$(SO_TARGET:.so=.dylib)
 
 all: whisper package
diff --git a/backend/go/whisper/main.go b/backend/go/whisper/main.go
index e48b24519..ab102f4c4 100644
--- a/backend/go/whisper/main.go
+++ b/backend/go/whisper/main.go
@@ -4,6 +4,7 @@ package main
 import (
 	"flag"
 	"os"
+	"runtime"
 
 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
 	// Get library name from environment variable, default to fallback
 	libName := os.Getenv("WHISPER_LIBRARY")
 	if libName == "" {
-		libName = "./libgowhisper-fallback.so"
+		if runtime.GOOS == "darwin" {
+			libName = "./libgowhisper-fallback.dylib"
+		} else {
+			libName = "./libgowhisper-fallback.so"
+		}
 	}
 
 	gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/whisper/package.sh b/backend/go/whisper/package.sh
index dfecdf5c6..efeaa7009 100755
--- a/backend/go/whisper/package.sh
+++ b/backend/go/whisper/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
 mkdir -p $CURDIR/package/lib
 
 cp -avf $CURDIR/whisper $CURDIR/package/
-cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/
+cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgowhisper-*.dylib $CURDIR/package/ 2>/dev/null || true
 cp -fv $CURDIR/run.sh $CURDIR/package/
 
 # Detect architecture and copy appropriate libraries
diff --git a/backend/go/whisper/run.sh b/backend/go/whisper/run.sh
index 1af2c0535..0e2bd7eb0 100755
--- a/backend/go/whisper/run.sh
+++ b/backend/go/whisper/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
 	grep -e "flags" /proc/cpuinfo | head -1
 fi
 
-LIBRARY="$CURDIR/libgowhisper-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+	LIBRARY="$CURDIR/libgowhisper-fallback.so"
 
-if [ "$(uname)" != "Darwin" ]; then
 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
 		if [ -e $CURDIR/libgowhisper-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
 			LIBRARY="$CURDIR/libgowhisper-avx512.so"
 		fi
 	fi
+
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 export WHISPER_LIBRARY=$LIBRARY
 
 # If there is a lib/ld.so, use it

From a7fec9a49db3f33dc6c879c0f41b993c4afd4635 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:09:36 +0200
Subject: [PATCH 05/11] feat(backends): add darwin/metal (MPS) build for trl
 (#10487)

* feat(backends): add darwin/metal (MPS) build for trl

Authors backend/python/trl/requirements-mps.txt and wires trl into the
darwin CI matrix and gallery so the MPS training path can be built and
validated on Apple Silicon. The MPS variant installs plain PyPI torch
wheels (MPS-capable on macOS arm64) and the trl training stack; bitsandbytes
is omitted as it is a CUDA-only dependency with poor Apple Silicon support.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:opus-4.8 [Claude Code]

* fix(trl): guard uv-only --index-strategy for the pip/darwin path

The darwin/MPS build installs with pip (USE_PIP=true), which rejects the
uv-only --index-strategy flag and failed the darwin backend build. Add it
only on the uv path; Linux/CUDA resolution is unchanged.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:opus-4.8 [Claude Code]

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/backend-matrix.yml              |  3 +++
 backend/index.yaml                      | 11 +++++++++++
 backend/python/trl/install.sh           |  8 +++++++-
 backend/python/trl/requirements-mps.txt | 12 ++++++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 backend/python/trl/requirements-mps.txt

diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml
index 17d436cc1..f34921db9 100644
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -4974,6 +4974,9 @@ includeDarwin:
   - backend: "kitten-tts"
     tag-suffix: "-metal-darwin-arm64-kitten-tts"
     build-type: "mps"
+  - backend: "trl"
+    tag-suffix: "-metal-darwin-arm64-trl"
+    build-type: "mps"
   - backend: "liquid-audio"
     tag-suffix: "-metal-darwin-arm64-liquid-audio"
     build-type: "mps"
diff --git a/backend/index.yaml b/backend/index.yaml
index f3a2b892d..381aa073b 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -5295,6 +5295,7 @@
     nvidia: "cuda12-trl"
     nvidia-cuda-12: "cuda12-trl"
     nvidia-cuda-13: "cuda13-trl"
+    metal: "metal-trl"
 ## TRL backend images
 - !!merge <<: *trl
   name: "cpu-trl"
@@ -5326,6 +5327,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
   mirrors:
     - localai/localai-backends:master-gpu-nvidia-cuda-13-trl
+- !!merge <<: *trl
+  name: "metal-trl"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
+  mirrors:
+    - localai/localai-backends:latest-metal-darwin-arm64-trl
+- !!merge <<: *trl
+  name: "metal-trl-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
+  mirrors:
+    - localai/localai-backends:master-metal-darwin-arm64-trl
 ## llama.cpp quantization backend
 - &llama-cpp-quantization
   name: "llama-cpp-quantization"
diff --git a/backend/python/trl/install.sh b/backend/python/trl/install.sh
index 6963e60ed..ce0552f87 100644
--- a/backend/python/trl/install.sh
+++ b/backend/python/trl/install.sh
@@ -8,7 +8,13 @@ else
     source $backend_dir/../common/libbackend.sh
 fi
 
-EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
+# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
+# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
+# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
+if [ "x${USE_PIP:-}" != "xtrue" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
+fi
 installRequirements
 
 # Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
diff --git a/backend/python/trl/requirements-mps.txt b/backend/python/trl/requirements-mps.txt
new file mode 100644
index 000000000..fbdfb6536
--- /dev/null
+++ b/backend/python/trl/requirements-mps.txt
@@ -0,0 +1,12 @@
+torch==2.10.0
+trl
+peft
+datasets>=3.0.0
+transformers>=4.56.2
+accelerate>=1.4.0
+huggingface-hub>=1.3.0
+sentencepiece
+# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
+# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
+# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
+# on macOS arm64.

From 066abf82c08d966dc1ca254c32e64d64e676e4f0 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:10:08 +0200
Subject: [PATCH 06/11] feat(llama-cpp): cpu_moe/n_cpu_moe options + generic
 upstream-flag passthrough (#10490)

* feat(llama-cpp): add main-model cpu_moe/n_cpu_moe options

Mirror the existing draft_cpu_moe/draft_n_cpu_moe siblings for the main
model, matching upstream --cpu-moe / --n-cpu-moe (common/arg.cpp). Lets
users keep MoE expert weights on CPU to manage VRAM on large MoE models.

Closes part of #10483

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(llama-cpp): forward unknown '-' options to upstream arg parser

Any options: entry starting with '-' is collected and passed verbatim to
llama.cpp's own common_params_parse (LLAMA_EXAMPLE_SERVER) at the end of
params_parse, so every upstream llama-server flag works without a new
hand-wired branch. Passthrough runs last and wins on overlap; n_parallel is
snapshotted to survive parser_init's SERVER reset, and help/usage/completion
flags are skipped to avoid exiting the backend.

Closes #10483

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* docs(llama-cpp): document cpu_moe/n_cpu_moe and option passthrough

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(llama-cpp): terminate tensor/kv override vectors after passthrough

The tensor_buft_overrides padding and the kv/draft override terminators
ran before the generic option passthrough, so a passthrough flag
(--cpu-moe, --override-tensor, --override-kv, ...) appended a real entry
after the null sentinel - tripping the model loader's
back().pattern == nullptr assertion (crash) or being silently dropped.
Move all three termination/padding blocks to the end of params_parse,
after both the named-option loop and common_params_parse have pushed
their real entries. Also widen the exit()-flag skip list so --version,
--license, --list-devices and --cache-list cannot terminate the backend.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama-cpp/grpc-server.cpp        | 138 ++++++++++++++++---
 docs/content/advanced/model-configuration.md |  33 +++++
 2 files changed, 150 insertions(+), 21 deletions(-)

diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index c2e7f22e4..6907b9122 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -37,6 +37,7 @@
 #include "backend.pb.h"
 #include "backend.grpc.pb.h"
 #include "common.h"
+#include "arg.h"
 #include "chat-auto-parser.h"
 #include <getopt.h>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
@@ -592,6 +593,10 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
     params.checkpoint_min_step = 256;
 #endif
 
+    // Raw upstream llama-server flags collected from any option entry that
+    // starts with '-'. Applied once after the loop via common_params_parse.
+    std::vector<std::string> extra_argv;
+
      // decode options. Options are in form optname:optvale, or if booleans only optname.
     for (int i = 0; i < request->options_size(); i++) {
         std::string opt = request->options(i);
@@ -1080,6 +1085,31 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
                 } catch (...) {}
             }
 
+        // --- main model MoE on CPU (upstream --cpu-moe / --n-cpu-moe) ---
+        } else if (!strcmp(optname, "cpu_moe")) {
+            // Bool-style flag: keep all MoE expert weights on CPU.
+            const bool enable = (optval == NULL) ||
+                optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
+                optval_str == "on" || optval_str == "enabled";
+            if (enable) {
+                params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
+            }
+        } else if (!strcmp(optname, "n_cpu_moe")) {
+            if (optval != NULL) {
+                try {
+                    int n = std::stoi(optval_str);
+                    if (n < 0) n = 0;
+                    // Keep override-name storage alive for the lifetime of the
+                    // params struct (mirrors upstream arg.cpp's function-local static).
+                    static std::list<std::string> buft_overrides_main;
+                    for (int i = 0; i < n; ++i) {
+                        buft_overrides_main.push_back(llm_ffn_exps_block_regex(i));
+                        params.tensor_buft_overrides.push_back(
+                            {buft_overrides_main.back().c_str(), ggml_backend_cpu_buffer_type()});
+                    }
+                } catch (...) {}
+            }
+
         // --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
         } else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
             // Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
@@ -1111,6 +1141,30 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
                 else { cur.push_back(c); }
             }
             if (!cur.empty()) flush(cur);
+
+        // --- generic passthrough: any entry starting with '-' is a raw
+        //     upstream llama-server flag, forwarded verbatim to the parser. ---
+        } else if (optname[0] == '-') {
+            std::string flag = optname;
+            // These flags make upstream's parser exit() (printing usage /
+            // completion), which would kill the backend process. Skip them.
+            if (flag == "-h" || flag == "--help" || flag == "--usage" ||
+                flag == "--version" || flag == "--license" ||
+                flag == "--list-devices" || flag == "-cl" ||
+                flag == "--cache-list" ||
+                flag.rfind("--completion", 0) == 0) {
+                fprintf(stderr,
+                    "[llama-cpp] ignoring passthrough flag that would exit: %s\n",
+                    flag.c_str());
+            } else {
+                extra_argv.push_back(flag);
+                // Preserve the whole value after the first ':' so embedded
+                // colons (e.g. host:port) survive strtok's truncation of optval.
+                auto colon = opt.find(':');
+                if (colon != std::string::npos) {
+                    extra_argv.push_back(opt.substr(colon + 1));
+                }
+            }
         }
     }
 
@@ -1146,27 +1200,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
         }
     }
 
-    if (!params.kv_overrides.empty()) {
-        params.kv_overrides.emplace_back();
-        params.kv_overrides.back().key[0] = 0;
-    }
-
-    // tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
-    // Real entries are pushed during option parsing; here we pad/terminate so the
-    // model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
-    // and so llama_params_fit has the placeholder slots it requires.
-    {
-        const size_t ntbo = llama_max_tensor_buft_overrides();
-        while (params.tensor_buft_overrides.size() < ntbo) {
-            params.tensor_buft_overrides.push_back({nullptr, nullptr});
-        }
-    }
-    // Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
-    // the main-model handling above.
-    if (!params.speculative.draft.tensor_buft_overrides.empty()) {
-        params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
-    }
-
     // TODO: Add yarn
 
     if (!request->tensorsplit().empty()) {
@@ -1259,6 +1292,69 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
             params.sampling.grammar_triggers.push_back(std::move(trigger));
         }
     }
+
+    // Apply any raw upstream flags last so an explicit passthrough flag wins
+    // over the LocalAI-resolved field it maps to (e.g. --ctx-size beats
+    // context_size). This is the same parser llama-server itself uses.
+    if (!extra_argv.empty()) {
+        // common_params_parser_init resets a few fields for the SERVER example
+        // (n_parallel -> -1, use_color). Snapshot n_parallel so an unrelated
+        // passthrough flag can't silently clobber LocalAI's resolved value.
+        const int saved_n_parallel = params.n_parallel;
+
+        std::vector<char *> argv;
+        std::string prog = "llama-server";
+        argv.push_back(prog.data());
+        for (auto & a : extra_argv) {
+            argv.push_back(a.data());
+        }
+
+        // ctx_arg.params is a reference, so this overlays the given flags onto
+        // `params` in place. Returns false on a recoverable parse error (and
+        // self-restores params); may exit() on a hard error, exactly as
+        // passing the same bad flag to llama-server would.
+        if (!common_params_parse((int)argv.size(), argv.data(), params,
+                                 LLAMA_EXAMPLE_SERVER)) {
+            fprintf(stderr,
+                "[llama-cpp] failed to parse passthrough options; ignoring them\n");
+        }
+
+        // Restore n_parallel unless a passthrough flag explicitly set it
+        // (parser_init's reset sentinel for SERVER is -1).
+        if (params.n_parallel == -1) {
+            params.n_parallel = saved_n_parallel;
+        }
+    }
+
+    // Terminate/pad the override vectors only after BOTH the named-option loop
+    // and the generic passthrough (common_params_parse above) have pushed their
+    // real entries, so back() is the null sentinel the model loader asserts on.
+    // Running these before the passthrough let a passthrough flag (--cpu-moe,
+    // --override-tensor, --override-kv, ...) append a real entry after the
+    // sentinel: a GGML_ASSERT crash for tensor_buft_overrides, a silent drop for
+    // kv_overrides. Double-termination is harmless (the while is a no-op if the
+    // passthrough parse already padded; an extra trailing null is ignored).
+
+    if (!params.kv_overrides.empty()) {
+        params.kv_overrides.emplace_back();
+        params.kv_overrides.back().key[0] = 0;
+    }
+
+    // tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
+    // Real entries are pushed during option parsing; here we pad/terminate so the
+    // model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
+    // and so llama_params_fit has the placeholder slots it requires.
+    {
+        const size_t ntbo = llama_max_tensor_buft_overrides();
+        while (params.tensor_buft_overrides.size() < ntbo) {
+            params.tensor_buft_overrides.push_back({nullptr, nullptr});
+        }
+    }
+    // Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
+    // the main-model handling above.
+    if (!params.speculative.draft.tensor_buft_overrides.empty()) {
+        params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
+    }
 }
 
 
diff --git a/docs/content/advanced/model-configuration.md b/docs/content/advanced/model-configuration.md
index 55e435b12..8092c162a 100644
--- a/docs/content/advanced/model-configuration.md
+++ b/docs/content/advanced/model-configuration.md
@@ -494,6 +494,39 @@ These llama.cpp options are passed through the `options:` array.
 | `direct_io` / `use_direct_io` | bool | `false` | Open the model with `O_DIRECT` (faster cold loads on NVMe; ignored if not supported). |
 | `verbosity` | int | `3` | llama.cpp internal log verbosity threshold. Higher = more verbose. |
 | `override_tensor` / `tensor_buft_overrides` | string | "" | Per-tensor buffer-type overrides for the main model. Format: `<tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...`. Mirrors the existing `draft_override_tensor` syntax for the draft model. |
+| `cpu_moe` | bool | false | Keep all MoE expert weights of the main model on CPU (upstream `--cpu-moe`). Frees VRAM on large MoE models (DeepSeek, Qwen3 `*-A3B`). |
+| `n_cpu_moe` | int | 0 | Keep MoE expert weights of the first N main-model layers on CPU (upstream `--n-cpu-moe`). |
+
+#### Generic option passthrough
+
+Any `options:` entry whose name starts with `-` is forwarded **verbatim** to
+upstream llama.cpp's own `llama-server` argument parser. This means any flag the
+bundled llama.cpp supports works without LocalAI needing a dedicated option,
+even ones added after your LocalAI version was built. See the upstream
+[server flags reference](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md).
+
+Format mirrors the rest of the array - `--flag` for a boolean, or `--flag:value`
+for a flag that takes a value. Everything after the first `:` is the value, so
+embedded colons (e.g. `host:port`) are preserved:
+
+```yaml
+options:
+  - "--cpu-moe"                 # boolean flag
+  - "--n-cpu-moe:4"             # flag with a value
+  - "--override-tensor:exps=CPU"
+```
+
+Notes:
+
+- **Precedence:** passthrough flags are applied last, so an explicit flag
+  overrides the LocalAI option it maps to (e.g. `--ctx-size:8192` overrides
+  `context_size`).
+- **Power-user territory:** an invalid flag or value is rejected by the upstream
+  parser exactly as it would be by `llama-server`, which can fail model loading.
+  Prefer the named options above when one exists.
+- Flags that would terminate the process (such as `--help`, `--usage`,
+  `--version`, `--license`, `--list-devices`, `--cache-list`, and
+  `--completion*`) are ignored.
 
 ### Prompt Caching
 

From fae9f6356f1e447438d25a399c0345d32a51b002 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:10:41 +0200
Subject: [PATCH 07/11] chore: :arrow_up: Update ServeurpersoCom/qwentts.cpp to
 `9dbe7ea26a01b30fccb117ae5e86807c1dc23d42` (#10499)

:arrow_up: Update ServeurpersoCom/qwentts.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 backend/go/qwen3-tts-cpp/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/go/qwen3-tts-cpp/Makefile b/backend/go/qwen3-tts-cpp/Makefile
index 3311f93c3..c2bc6de34 100644
--- a/backend/go/qwen3-tts-cpp/Makefile
+++ b/backend/go/qwen3-tts-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 
 # qwentts.cpp version
 QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
-QWEN3TTS_CPP_VERSION?=4536dcdce27c3764a93a06d6bf64026b124962f5
+QWEN3TTS_CPP_VERSION?=9dbe7ea26a01b30fccb117ae5e86807c1dc23d42
 SO_TARGET?=libgoqwen3ttscpp.so
 
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

From fe4f425fb5818e3e1a315fa056033dee764f7fb2 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:10:59 +0200
Subject: [PATCH 08/11] fix: correct scheme/host on self-referential URLs
 behind an HTTPS reverse proxy (#10482) (#10504)

* fix(http): harden BaseURL proxy scheme/host detection

Split comma-separated X-Forwarded-Proto and honor the RFC 7239 Forwarded
header so generated links use https behind common reverse-proxy setups.

Refs #10482

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(http): honor explicit external base URL in BaseURL

When _external_base_url is set in the request context it dictates the
origin (scheme+host+port); the proxy path prefix is still appended.

Refs #10482

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(config): generalize LOCALAI_BASE_URL to ExternalBaseURL

LOCALAI_BASE_URL now sets a single instance-wide external base URL used
for OAuth callbacks and all self-referential links. A Pre middleware
stamps it into the request context for middleware.BaseURL.

Refs #10482

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* docs: document LOCALAI_BASE_URL and reverse-proxy headers

Refs #10482

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* test(http): cover parseForwarded edge cases; clarify base-url flag group

Adds direct unit coverage for quoted/malformed/multi-element Forwarded
headers and regroups the external base URL flag away from auth-only.

Refs #10482

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/run.go                            |  11 +-
 core/config/application_config.go          |  12 +-
 core/http/app.go                           |  12 ++
 core/http/middleware/baseurl.go            |  57 ++++++++-
 core/http/middleware/baseurl_test.go       | 134 +++++++++++++++++++++
 core/http/routes/auth.go                   |   2 +-
 docs/content/advanced/reverse-proxy-tls.md |  20 +++
 7 files changed, 238 insertions(+), 10 deletions(-)

diff --git a/core/cli/run.go b/core/cli/run.go
index abb0cdbf1..fd7ba8cd9 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -140,7 +140,7 @@ type RunCMD struct {
 	OIDCIssuer           string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
 	OIDCClientID         string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
 	OIDCClientSecret     string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
-	AuthBaseURL          string `env:"LOCALAI_BASE_URL" help:"Base URL for OAuth callbacks (e.g. http://localhost:8080)" group:"auth"`
+	ExternalBaseURL      string `env:"LOCALAI_BASE_URL" help:"External base URL of this instance (e.g. https://localhost:8080). Used for OAuth callbacks and self-referential links (generated images/videos, job status). When unset, derived from X-Forwarded-Proto/Host or Forwarded headers." group:"api"`
 	AuthAdminEmail       string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
 	AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
 	DisableLocalAuth     bool   `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
@@ -503,9 +503,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
 			opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
 		}
-		if r.AuthBaseURL != "" {
-			opts = append(opts, config.WithAuthBaseURL(r.AuthBaseURL))
-		}
 		if r.AuthAdminEmail != "" {
 			opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
 		}
@@ -523,6 +520,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}
 	}
 
+	// Applied unconditionally: the external base URL governs all self-referential
+	// links (not just OAuth callbacks), so it must take effect even when auth is off.
+	if r.ExternalBaseURL != "" {
+		opts = append(opts, config.WithExternalBaseURL(r.ExternalBaseURL))
+	}
+
 	if idleWatchDog || busyWatchDog {
 		opts = append(opts, config.EnableWatchDog)
 		if idleWatchDog {
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 87acd6bd5..1821a8441 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -49,6 +49,13 @@ type ApplicationConfig struct {
 	P2PNetworkID                  string
 	Federated                     bool
 
+	// ExternalBaseURL is the externally visible base URL of this instance
+	// (scheme+host[:port]), set via LOCALAI_BASE_URL. When non-empty it is
+	// authoritative for every self-referential URL LocalAI emits (OAuth
+	// callbacks, generated image/video links, async job StatusURLs),
+	// overriding proxy-header detection. Empty = derive from request headers.
+	ExternalBaseURL string
+
 	// DisableStats turns off per-request token tracking. By default the
 	// routing module's billing recorder runs in every mode (including
 	// no-auth single-user) so dashboards and `/api/usage` are immediately
@@ -196,7 +203,6 @@ type AuthConfig struct {
 	OIDCIssuer          string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
 	OIDCClientID        string
 	OIDCClientSecret    string
-	BaseURL             string // for OAuth callback URLs (e.g. "http://localhost:8080")
 	AdminEmail          string // auto-promote to admin on login
 	RegistrationMode    string // "open", "approval" (default when empty), "invite"
 	DisableLocalAuth    bool   // disable local email/password registration and login
@@ -950,9 +956,9 @@ func WithAuthGitHubClientSecret(clientSecret string) AppOption {
 	}
 }
 
-func WithAuthBaseURL(baseURL string) AppOption {
+func WithExternalBaseURL(url string) AppOption {
 	return func(o *ApplicationConfig) {
-		o.Auth.BaseURL = baseURL
+		o.ExternalBaseURL = url
 	}
 }
 
diff --git a/core/http/app.go b/core/http/app.go
index 9ec0711fb..ee5cd99eb 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -149,6 +149,18 @@ func API(application *application.Application) (*echo.Echo, error) {
 	// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
 	e.Pre(httpMiddleware.StripPathPrefix())
 
+	// Stamp the configured external base URL into each request context so
+	// middleware.BaseURL can treat it as authoritative for self-referential
+	// links. Registered as Pre so it runs before routing and handlers.
+	if extBaseURL := application.ApplicationConfig().ExternalBaseURL; extBaseURL != "" {
+		e.Pre(func(next echo.HandlerFunc) echo.HandlerFunc {
+			return func(c echo.Context) error {
+				c.Set("_external_base_url", extBaseURL)
+				return next(c)
+			}
+		})
+	}
+
 	e.Pre(middleware.RemoveTrailingSlash())
 
 	if application.ApplicationConfig().MachineTag != "" {
diff --git a/core/http/middleware/baseurl.go b/core/http/middleware/baseurl.go
index a1e1844ae..84f72cf69 100644
--- a/core/http/middleware/baseurl.go
+++ b/core/http/middleware/baseurl.go
@@ -55,17 +55,70 @@ func BasePathPrefix(c echo.Context) string {
 // The returned URL is guaranteed to end with `/`.
 // The method should be used in conjunction with the StripPathPrefix middleware.
 func BaseURL(c echo.Context) string {
+	// An explicit external base URL (LOCALAI_BASE_URL) is authoritative for
+	// the origin. The proxy-derived path prefix is still appended so a
+	// reverse-proxy mount point keeps working. Trailing slashes are
+	// normalized via BasePathPrefix, which always starts and ends with "/".
+	if ext, ok := c.Get("_external_base_url").(string); ok && ext != "" {
+		return strings.TrimRight(ext, "/") + BasePathPrefix(c)
+	}
+
+	fwdProto, fwdHost := parseForwarded(c.Request().Header.Get("Forwarded"))
+
 	scheme := "http"
-	if c.Request().Header.Get("X-Forwarded-Proto") == "https" {
+	switch {
+	case c.Request().TLS != nil:
 		scheme = "https"
-	} else if c.Request().TLS != nil {
+	case strings.EqualFold(firstToken(c.Request().Header.Get("X-Forwarded-Proto")), "https"):
+		scheme = "https"
+	case strings.EqualFold(fwdProto, "https"):
 		scheme = "https"
 	}
 
 	host := c.Request().Host
 	if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
 		host = forwardedHost
+	} else if fwdHost != "" {
+		host = fwdHost
 	}
 
 	return scheme + "://" + host + BasePathPrefix(c)
 }
+
+// firstToken returns the first comma-separated token of v, trimmed of spaces.
+// Reverse-proxy chains can emit X-Forwarded-Proto as "https,http"; only the
+// first hop (closest to the client) is meaningful for scheme detection.
+func firstToken(v string) string {
+	if i := strings.IndexByte(v, ','); i >= 0 {
+		v = v[:i]
+	}
+	return strings.TrimSpace(v)
+}
+
+// parseForwarded extracts the proto and host directives from the first element
+// of an RFC 7239 Forwarded header (e.g. `for=x;proto=https;host=h, for=y`).
+// Values may be quoted. Returns empty strings when absent or malformed so the
+// caller can fall through to other signals.
+func parseForwarded(header string) (proto, host string) {
+	if header == "" {
+		return "", ""
+	}
+	// Only the first element (closest proxy to the client) matters here.
+	if i := strings.IndexByte(header, ','); i >= 0 {
+		header = header[:i]
+	}
+	for _, directive := range strings.Split(header, ";") {
+		key, value, ok := strings.Cut(strings.TrimSpace(directive), "=")
+		if !ok {
+			continue
+		}
+		value = strings.Trim(strings.TrimSpace(value), `"`)
+		switch strings.ToLower(strings.TrimSpace(key)) {
+		case "proto":
+			proto = value
+		case "host":
+			host = value
+		}
+	}
+	return proto, host
+}
diff --git a/core/http/middleware/baseurl_test.go b/core/http/middleware/baseurl_test.go
index 4f6dbb1d1..6a132514b 100644
--- a/core/http/middleware/baseurl_test.go
+++ b/core/http/middleware/baseurl_test.go
@@ -135,4 +135,138 @@ var _ = Describe("BaseURL", func() {
 			Entry("missing leading slash", "evil"),
 		)
 	})
+
+	Context("scheme detection hardening", func() {
+		It("treats comma-separated X-Forwarded-Proto as https when first token is https", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/x", func(c echo.Context) error {
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/x", nil)
+			req.Header.Set("X-Forwarded-Proto", "https,http")
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("https://example.com/"))
+		})
+
+		It("derives https from the RFC 7239 Forwarded proto directive", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/x", func(c echo.Context) error {
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/x", nil)
+			req.Header.Set("Forwarded", "for=192.0.2.1;proto=https;host=proxy.example")
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("https://proxy.example/"))
+		})
+
+		It("prefers X-Forwarded-Host over the Forwarded host directive", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/x", func(c echo.Context) error {
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/x", nil)
+			req.Header.Set("X-Forwarded-Host", "xfh.example")
+			req.Header.Set("Forwarded", "host=fwd.example;proto=https")
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("https://xfh.example/"))
+		})
+	})
+
+	Context("explicit external base URL override", func() {
+		It("uses the configured origin over conflicting forwarded headers", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/x", func(c echo.Context) error {
+				c.Set("_external_base_url", "https://192.168.0.13:34567")
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/x", nil)
+			req.Header.Set("X-Forwarded-Proto", "http")
+			req.Header.Set("X-Forwarded-Host", "internal:8080")
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("https://192.168.0.13:34567/"))
+		})
+
+		It("combines the configured origin with a detected path prefix", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/hello", func(c echo.Context) error {
+				c.Set("_original_path", "/localai/hello")
+				c.Set("_external_base_url", "https://ext.example")
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/hello", nil)
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("https://ext.example/localai/"))
+		})
+
+		It("ignores an empty override", func() {
+			app := echo.New()
+			actualURL := ""
+			app.GET("/x", func(c echo.Context) error {
+				c.Set("_external_base_url", "")
+				actualURL = BaseURL(c)
+				return nil
+			})
+			req := httptest.NewRequest("GET", "/x", nil)
+			rec := httptest.NewRecorder()
+			app.ServeHTTP(rec, req)
+			Expect(actualURL).To(Equal("http://example.com/"))
+		})
+	})
+
+	Context("parseForwarded helper", func() {
+		It("parses unquoted proto and host", func() {
+			proto, host := parseForwarded("for=192.0.2.1;proto=https;host=h.example")
+			Expect(proto).To(Equal("https"))
+			Expect(host).To(Equal("h.example"))
+		})
+
+		It("strips quotes around values", func() {
+			proto, host := parseForwarded(`proto="https";host="h.example"`)
+			Expect(proto).To(Equal("https"))
+			Expect(host).To(Equal("h.example"))
+		})
+
+		It("uses only the first element of a multi-element header", func() {
+			proto, host := parseForwarded("proto=https;host=first.example, proto=http;host=second.example")
+			Expect(proto).To(Equal("https"))
+			Expect(host).To(Equal("first.example"))
+		})
+
+		It("returns empty strings for an empty header", func() {
+			proto, host := parseForwarded("")
+			Expect(proto).To(BeEmpty())
+			Expect(host).To(BeEmpty())
+		})
+
+		It("skips directives without a value", func() {
+			proto, host := parseForwarded("proto;host=h.example")
+			Expect(proto).To(BeEmpty())
+			Expect(host).To(Equal("h.example"))
+		})
+	})
+
+	Context("firstToken helper", func() {
+		It("returns the whole trimmed string when there is no comma", func() {
+			Expect(firstToken("  https  ")).To(Equal("https"))
+		})
+
+		It("returns the first trimmed token when there is a comma", func() {
+			Expect(firstToken("https , http")).To(Equal("https"))
+		})
+	})
 })
diff --git a/core/http/routes/auth.go b/core/http/routes/auth.go
index ef8372fff..b4144e0a1 100644
--- a/core/http/routes/auth.go
+++ b/core/http/routes/auth.go
@@ -268,7 +268,7 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
 	// Set up OAuth manager when any OAuth/OIDC provider is configured
 	if appConfig.Auth.GitHubClientID != "" || appConfig.Auth.OIDCClientID != "" {
 		oauthMgr, err := auth.NewOAuthManager(
-			appConfig.Auth.BaseURL,
+			appConfig.ExternalBaseURL,
 			auth.OAuthParams{
 				GitHubClientID:     appConfig.Auth.GitHubClientID,
 				GitHubClientSecret: appConfig.Auth.GitHubClientSecret,
diff --git a/docs/content/advanced/reverse-proxy-tls.md b/docs/content/advanced/reverse-proxy-tls.md
index 24af55c62..d36a64ae4 100644
--- a/docs/content/advanced/reverse-proxy-tls.md
+++ b/docs/content/advanced/reverse-proxy-tls.md
@@ -14,6 +14,26 @@ When running LocalAI behind a TLS termination reverse proxy, the Web UI may fail
 
 LocalAI uses the `X-Forwarded-Proto` HTTP header to determine the protocol used by clients. When this header is set to `https`, LocalAI will generate HTTPS URLs for static assets in the Web UI.
 
+## Running behind a reverse proxy (HTTPS / subpath)
+
+LocalAI does not terminate TLS itself, so HTTPS is provided by a reverse
+proxy in front of it. Self-referential links (generated image and video
+URLs, async job status URLs, OAuth callbacks) need the externally visible
+scheme, host and port.
+
+LocalAI determines these in this order:
+
+1. `LOCALAI_BASE_URL` - if set, it is authoritative for the origin. Set it to
+   the externally visible base URL, e.g. `LOCALAI_BASE_URL=https://localai.example.com`
+   or `https://192.168.0.13:34567`. Recommended whenever links come back with
+   the wrong scheme or host.
+2. Otherwise, the `X-Forwarded-Proto` and `X-Forwarded-Host` headers (or the
+   RFC 7239 `Forwarded` header) sent by the proxy. Ensure your proxy forwards
+   `X-Forwarded-Proto: https`.
+
+A reverse-proxy subpath mount is supported via `X-Forwarded-Prefix`; it is
+appended to `LOCALAI_BASE_URL` when both are present.
+
 ## Required Headers
 
 Your reverse proxy must forward these headers to LocalAI:

From 93d6255de393f10bcec662623f6b4636f4973b71 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:11:17 +0200
Subject: [PATCH 09/11] chore: :arrow_up: Update ggml-org/llama.cpp to
 `8be759e6f70d629638a7eb70db3824cbdcea370b` (#10501)

:arrow_up: Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 backend/cpp/llama-cpp/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 24f1f215d..f00fad518 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
+LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 
 CMAKE_ARGS?=

From f1e50713216c4badb532a3e65061196838138ba8 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:11:31 +0200
Subject: [PATCH 10/11] chore: :arrow_up: Update leejet/stable-diffusion.cpp to
 `8caa3f908ae6d4a4bef531e73b9a969f266a3d1f` (#10493)

:arrow_up: Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 backend/go/stablediffusion-ggml/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index d161a5b47..7a9917ea8 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 
 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
+STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
 
 CMAKE_ARGS+=-DGGML_MAX_NAME=128
 

From 693e3eec050cd507f4369800a1843ba0bb41448b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:11:52 +0200
Subject: [PATCH 11/11] chore(model gallery): :robot: add 1 new models via
 gallery agent (#10505)

chore(model gallery): :robot: add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 73 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 52f23a771..25a6e781d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,77 @@
 ---
+- name: "gemmable-4-12b-mtp"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/Mia-AiLab/Gemmable-4-12B-MTP-GGUF
+  description: |
+    ## Gemmable 4 12B
+
+    Gemmable 4 12B is a GGUF export of Gemma 4 12B fine-tuned on Fable-5 style
+    reasoning and assistant traces.
+
+    ## Highlights
+
+      - Base model: `google/gemma-4-12B`
+      - Format: GGUF
+      - Training style: Fable-5 style reasoning and assistant traces
+      - Distribution: fp16 GGUF plus matching assistant GGUFs for each quant
+      - Intended use: local inference, coding, reasoning, and assistant workflows
+
+    ## How to use
+
+    ### llama.cpp
+
+    Standard load:
+
+    ```bash
+    llama-server -m "gemmable-4-12b-fp16.gguf"
+    ```
+
+    Speculative / draft-MTP load:
+
+    ```bash
+    llama-server -m "gemmable-4-12b-Q4_K_M.gguf" \
+      --spec-draft-model "gemmable-4-12b-Q4_K_M-mtp.gguf" \
+      --spec-type draft-mtp \
+      --spec-draft-n-max 4
+    ```
+
+    Use the matching fp16 or quantized main file with its `-mtp` companion.
+
+    ### LM Studio
+
+    1.  Search this repo, download target + mtp file.
+    2.  Load target.
+    3.  Load settings → Speculative Decoding → select mtp file file.
+
+    (Requires LM Studio with am17an's PR merged or custom llama.cpp runtime. As of 2026-05, mainline LM Studio runtime doesn't yet have `draft-mtp` for Gemma-4 — track upstream merge.)
+
+    ## GGUF / local inference notes
+
+    ...
+  tags:
+    - llm
+    - gguf
+    - reasoning
+  icon: https://storage.ko-fi.com/cdn/kofi6.png
+  overrides:
+    backend: llama-cpp
+    function:
+      automatic_tool_parsing_fallback: true
+      grammar:
+        disable: true
+    known_usecases:
+      - chat
+    options:
+      - use_jinja:true
+    parameters:
+      model: llama-cpp/models/Gemmable-4-12B-MTP-GGUF/gemmable-4-12b-Q4_K_M-mtp.gguf
+    template:
+      use_tokenizer_template: true
+  files:
+    - filename: llama-cpp/models/Gemmable-4-12B-MTP-GGUF/gemmable-4-12b-Q4_K_M-mtp.gguf
+      sha256: 217dc0ed177ecc733f801a851c3e3854cf1b17a1f86cd5430c0a7f82d93027bc
+      uri: https://huggingface.co/Mia-AiLab/Gemmable-4-12B-MTP-GGUF/resolve/main/gemmable-4-12b-Q4_K_M-mtp.gguf
 - name: "lfm2.5-1.2b-instruct"
   url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
   urls: