chore: ⬆️ Update ggml-org/llama.cpp to 31c511a968348281e11d590446bb815048a1e912 (#6970 )

⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
chore: fix linting issues
2026-02-04 03:32:40 -05:00 · 2025-10-31 21:04:53 +00:00 · 2025-10-31 19:08:34 +01:00 · 2025-10-31 19:04:10 +01:00 · 2025-10-31 19:03:23 +01:00 · 2025-10-31 19:02:22 +01:00
17 changed files with 1105 additions and 1722 deletions
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -1,10 +1,10 @@
-name: Bump dependencies
+name: Bump Backend dependencies
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump:
+  bump-backends:
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -1,10 +1,10 @@
-name: Bump dependencies
+name: Bump Documentation
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump:
+  bump-docs:
    strategy:
      fail-fast: false
      matrix:
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=5a4ff43e7dd049e35942bc3d12361dab2f155544
+LLAMA_VERSION?=31c511a968348281e11d590446bb815048a1e912
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=f16c12f3f55f5bd3d6ac8cf2f31ab90a42c884d5
+WHISPER_CPP_VERSION?=c62adfbd1ecdaea9e295c72d672992514a2d887c
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -61,12 +61,15 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
 	term = strings.ToLower(term)
 	for _, m := range gm {
 		if fuzzy.Match(term, strings.ToLower(m.GetName())) ||
-			fuzzy.Match(term, strings.ToLower(m.GetDescription())) ||
 			fuzzy.Match(term, strings.ToLower(m.GetGallery().Name)) ||
+			strings.Contains(strings.ToLower(m.GetName()), term) ||
+			strings.Contains(strings.ToLower(m.GetDescription()), term) ||
+			strings.Contains(strings.ToLower(m.GetGallery().Name), term) ||
 			strings.Contains(strings.ToLower(strings.Join(m.GetTags(), ",")), term) {
 			filteredModels = append(filteredModels, m)
 		}
 	}
+
 	return filteredModels
 }

--- a/core/http/app.go
+++ b/core/http/app.go
@@ -128,7 +128,6 @@ func API(application *application.Application) (*fiber.App, error) {
 		router.Use(recover.New())
 	}

-	// OpenTelemetry metrics for Prometheus export
 	if !application.ApplicationConfig().DisableMetrics {
 		metricsService, err := services.NewLocalAIMetricsService()
 		if err != nil {
@@ -142,7 +141,6 @@ func API(application *application.Application) (*fiber.App, error) {
 			})
 		}
 	}
-
 	// Health Checks should always be exempt from auth, so register these first
 	routes.HealthRoutes(router)

@@ -204,28 +202,12 @@ func API(application *application.Application) (*fiber.App, error) {
 	routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	routes.RegisterOpenAIRoutes(router, requestExtractor, application)
-
 	if !application.ApplicationConfig().DisableWebUI {
-
-		// Create metrics store for tracking usage (before API routes registration)
-		metricsStore := services.NewInMemoryMetricsStore()
-
-		// Add metrics middleware BEFORE API routes so it can intercept them
-		router.Use(middleware.MetricsMiddleware(metricsStore))
-
-		// Register cleanup on shutdown
-		router.Hooks().OnShutdown(func() error {
-			metricsStore.Stop()
-			log.Info().Msg("Metrics store stopped")
-			return nil
-		})
-
 		// Create opcache for tracking UI operations
 		opcache := services.NewOpCache(application.GalleryService())
-		routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, metricsStore)
+		routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 		routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	}
-
 	routes.RegisterJINARoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())

 	// Define a custom 404 handler
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -1,61 +0,0 @@
-package localai
-
-import (
-	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/internal"
-	"github.com/mudler/LocalAI/pkg/model"
-)
-
-// SettingsEndpoint handles the settings page which shows detailed model/backend management
-func SettingsEndpoint(appConfig *config.ApplicationConfig,
-	cl *config.ModelConfigLoader, ml *model.ModelLoader, opcache *services.OpCache) func(*fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		modelConfigs := cl.GetAllModelsConfigs()
-		galleryConfigs := map[string]*gallery.ModelConfig{}
-
-		installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
-		if err != nil {
-			return err
-		}
-
-		for _, m := range modelConfigs {
-			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
-			if err != nil {
-				continue
-			}
-			galleryConfigs[m.Name] = cfg
-		}
-
-		loadedModels := ml.ListLoadedModels()
-		loadedModelsMap := map[string]bool{}
-		for _, m := range loadedModels {
-			loadedModelsMap[m.ID] = true
-		}
-
-		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
-
-		// Get model statuses to display in the UI the operation in progress
-		processingModels, taskTypes := opcache.GetStatus()
-
-		summary := fiber.Map{
-			"Title":             "LocalAI - Settings & Management",
-			"Version":           internal.PrintableVersion(),
-			"BaseURL":           utils.BaseURL(c),
-			"Models":            modelsWithoutConfig,
-			"ModelsConfig":      modelConfigs,
-			"GalleryConfig":     galleryConfigs,
-			"ApplicationConfig": appConfig,
-			"ProcessingModels":  processingModels,
-			"TaskTypes":         taskTypes,
-			"LoadedModels":      loadedModelsMap,
-			"InstalledBackends": installedBackends,
-		}
-
-		// Render settings page
-		return c.Render("views/settings", summary)
-	}
-}
--- a/core/http/middleware/metrics.go
+++ b/core/http/middleware/metrics.go
@@ -1,174 +0,0 @@
-package middleware
-
-import (
-	"encoding/json"
-	"strings"
-	"time"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/rs/zerolog/log"
-)
-
-// MetricsMiddleware creates a middleware that tracks API usage metrics
-// Note: Uses CONTEXT_LOCALS_KEY_MODEL_NAME constant defined in request.go
-func MetricsMiddleware(metricsStore services.MetricsStore) fiber.Handler {
-	return func(c *fiber.Ctx) error {
-		path := c.Path()
-
-		// Skip tracking for UI routes, static files, and non-API endpoints
-		if shouldSkipMetrics(path) {
-			return c.Next()
-		}
-
-		// Record start time
-		start := time.Now()
-
-		// Get endpoint category
-		endpoint := categorizeEndpoint(path)
-
-		// Continue with the request
-		err := c.Next()
-
-		// Record metrics after request completes
-		duration := time.Since(start)
-		success := err == nil && c.Response().StatusCode() < 400
-
-		// Extract model name from context (set by RequestExtractor middleware)
-		// Use the same constant as RequestExtractor
-		model := "unknown"
-		if modelVal, ok := c.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string); ok && modelVal != "" {
-			model = modelVal
-			log.Debug().Str("model", model).Str("endpoint", endpoint).Msg("Recording metrics for request")
-		} else {
-			// Fallback: try to extract from path params or query
-			model = extractModelFromRequest(c)
-			log.Debug().Str("model", model).Str("endpoint", endpoint).Msg("Recording metrics for request (fallback)")
-		}
-
-		// Extract backend from response headers if available
-		backend := string(c.Response().Header.Peek("X-LocalAI-Backend"))
-
-		// Record the request
-		metricsStore.RecordRequest(endpoint, model, backend, success, duration)
-
-		return err
-	}
-}
-
-// shouldSkipMetrics determines if a request should be excluded from metrics
-func shouldSkipMetrics(path string) bool {
-	// Skip UI routes
-	skipPrefixes := []string{
-		"/views/",
-		"/static/",
-		"/browse/",
-		"/chat/",
-		"/text2image/",
-		"/tts/",
-		"/talk/",
-		"/models/edit/",
-		"/import-model",
-		"/settings",
-		"/api/models",     // UI API endpoints
-		"/api/backends",   // UI API endpoints
-		"/api/operations", // UI API endpoints
-		"/api/p2p",        // UI API endpoints
-		"/api/metrics",    // Metrics API itself
-	}
-
-	for _, prefix := range skipPrefixes {
-		if strings.HasPrefix(path, prefix) {
-			return true
-		}
-	}
-
-	// Also skip root path and other UI pages
-	if path == "/" || path == "/index" {
-		return true
-	}
-
-	return false
-}
-
-// categorizeEndpoint maps request paths to friendly endpoint categories
-func categorizeEndpoint(path string) string {
-	// OpenAI-compatible endpoints
-	if strings.HasPrefix(path, "/v1/chat/completions") || strings.HasPrefix(path, "/chat/completions") {
-		return "chat"
-	}
-	if strings.HasPrefix(path, "/v1/completions") || strings.HasPrefix(path, "/completions") {
-		return "completions"
-	}
-	if strings.HasPrefix(path, "/v1/embeddings") || strings.HasPrefix(path, "/embeddings") {
-		return "embeddings"
-	}
-	if strings.HasPrefix(path, "/v1/images/generations") || strings.HasPrefix(path, "/images/generations") {
-		return "image-generation"
-	}
-	if strings.HasPrefix(path, "/v1/audio/transcriptions") || strings.HasPrefix(path, "/audio/transcriptions") {
-		return "transcriptions"
-	}
-	if strings.HasPrefix(path, "/v1/audio/speech") || strings.HasPrefix(path, "/audio/speech") {
-		return "text-to-speech"
-	}
-	if strings.HasPrefix(path, "/v1/models") || strings.HasPrefix(path, "/models") {
-		return "models"
-	}
-
-	// LocalAI-specific endpoints
-	if strings.HasPrefix(path, "/v1/internal") {
-		return "internal"
-	}
-	if strings.Contains(path, "/tts") {
-		return "text-to-speech"
-	}
-	if strings.Contains(path, "/stt") || strings.Contains(path, "/whisper") {
-		return "speech-to-text"
-	}
-	if strings.Contains(path, "/sound-generation") {
-		return "sound-generation"
-	}
-
-	// Default to the first path segment
-	parts := strings.Split(strings.Trim(path, "/"), "/")
-	if len(parts) > 0 {
-		return parts[0]
-	}
-
-	return "unknown"
-}
-
-// extractModelFromRequest attempts to extract the model name from the request
-func extractModelFromRequest(c *fiber.Ctx) string {
-	// Try query parameter first
-	model := c.Query("model")
-	if model != "" {
-		return model
-	}
-
-	// Try to extract from JSON body for POST requests
-	if c.Method() == fiber.MethodPost {
-		// Read body
-		bodyBytes := c.Body()
-		if len(bodyBytes) > 0 {
-			// Parse JSON
-			var reqBody map[string]interface{}
-			if err := json.Unmarshal(bodyBytes, &reqBody); err == nil {
-				if modelVal, ok := reqBody["model"]; ok {
-					if modelStr, ok := modelVal.(string); ok {
-						return modelStr
-					}
-				}
-			}
-		}
-	}
-
-	// Try path parameter for endpoints like /models/:model
-	model = c.Params("model")
-	if model != "" {
-		return model
-	}
-
-	return "unknown"
-}
--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@@ -127,10 +127,6 @@ func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIR
 				log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
 				input.ModelName(&localModelName)
 			}
-		} else {
-			// Update context locals with the model name from the request body
-			// This ensures downstream middleware (like metrics) can access it
-			ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, input.ModelName(nil))
 		}

 		cfg, err := re.modelConfigLoader.LoadModelConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -23,9 +23,6 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))

-	// Settings page - detailed model/backend management
-	app.Get("/settings", localai.SettingsEndpoint(appConfig, cl, ml, processingOps))
-
 	// P2P
 	app.Get("/p2p", func(c *fiber.Ctx) error {
 		summary := fiber.Map{
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -18,7 +18,7 @@ import (
 )

 // RegisterUIAPIRoutes registers JSON API routes for the web UI
-func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, metricsStore services.MetricsStore) {
+func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) {

 	// Operations API - Get all current operations (models + backends)
 	app.Get("/api/operations", func(c *fiber.Ctx) error {
@@ -716,104 +716,4 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 			},
 		})
 	})
-
-	// Metrics API endpoints
-	if metricsStore != nil {
-		// Get metrics summary
-		app.Get("/api/metrics/summary", func(c *fiber.Ctx) error {
-			endpointStats := metricsStore.GetEndpointStats()
-			modelStats := metricsStore.GetModelStats()
-			backendStats := metricsStore.GetBackendStats()
-
-			// Get top 5 models
-			type modelStat struct {
-				Name  string `json:"name"`
-				Count int64  `json:"count"`
-			}
-			topModels := make([]modelStat, 0)
-			for model, count := range modelStats {
-				topModels = append(topModels, modelStat{Name: model, Count: count})
-			}
-			sort.Slice(topModels, func(i, j int) bool {
-				return topModels[i].Count > topModels[j].Count
-			})
-			if len(topModels) > 5 {
-				topModels = topModels[:5]
-			}
-
-			// Get top 5 endpoints
-			type endpointStat struct {
-				Name  string `json:"name"`
-				Count int64  `json:"count"`
-			}
-			topEndpoints := make([]endpointStat, 0)
-			for endpoint, count := range endpointStats {
-				topEndpoints = append(topEndpoints, endpointStat{Name: endpoint, Count: count})
-			}
-			sort.Slice(topEndpoints, func(i, j int) bool {
-				return topEndpoints[i].Count > topEndpoints[j].Count
-			})
-			if len(topEndpoints) > 5 {
-				topEndpoints = topEndpoints[:5]
-			}
-
-			return c.JSON(fiber.Map{
-				"totalRequests": metricsStore.GetTotalRequests(),
-				"successRate":   metricsStore.GetSuccessRate(),
-				"topModels":     topModels,
-				"topEndpoints":  topEndpoints,
-				"topBackends":   backendStats,
-			})
-		})
-
-		// Get endpoint statistics
-		app.Get("/api/metrics/endpoints", func(c *fiber.Ctx) error {
-			stats := metricsStore.GetEndpointStats()
-			return c.JSON(fiber.Map{
-				"endpoints": stats,
-			})
-		})
-
-		// Get model statistics
-		app.Get("/api/metrics/models", func(c *fiber.Ctx) error {
-			stats := metricsStore.GetModelStats()
-			return c.JSON(fiber.Map{
-				"models": stats,
-			})
-		})
-
-		// Get backend statistics
-		app.Get("/api/metrics/backends", func(c *fiber.Ctx) error {
-			stats := metricsStore.GetBackendStats()
-			return c.JSON(fiber.Map{
-				"backends": stats,
-			})
-		})
-
-		// Get time series data
-		app.Get("/api/metrics/timeseries", func(c *fiber.Ctx) error {
-			// Default to last 24 hours
-			hours := 24
-			if hoursParam := c.Query("hours"); hoursParam != "" {
-				if h, err := strconv.Atoi(hoursParam); err == nil && h > 0 {
-					hours = h
-				}
-			}
-
-			timeSeries := metricsStore.GetRequestsOverTime(hours)
-			return c.JSON(fiber.Map{
-				"timeseries": timeSeries,
-				"hours":      hours,
-			})
-		})
-
-		// Reset metrics (optional - for testing/admin purposes)
-		app.Post("/api/metrics/reset", func(c *fiber.Ctx) error {
-			metricsStore.Reset()
-			return c.JSON(fiber.Map{
-				"success": true,
-				"message": "Metrics reset successfully",
-			})
-		})
-	}
 }
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -22,9 +22,6 @@
                <a href="./" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
                    <i class="fas fa-home text-[#38BDF8] mr-2 group-hover:scale-110 transition-transform"></i>Home
                </a>
-                <a href="settings" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
-                    <i class="fas fa-cog text-[#8B5CF6] mr-2 group-hover:scale-110 transition-transform"></i>Settings
-                </a>
                <a href="browse/" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
                    <i class="fas fa-brain text-[#38BDF8] mr-2 group-hover:scale-110 transition-transform"></i>Models
                </a>
@@ -58,9 +55,6 @@
                <a href="./" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                    <i class="fas fa-home text-[#38BDF8] mr-3 w-5 text-center"></i>Home
                </a>
-                <a href="settings" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
-                    <i class="fas fa-cog text-[#8B5CF6] mr-3 w-5 text-center"></i>Settings
-                </a>
                <a href="browse/" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                    <i class="fas fa-brain text-[#38BDF8] mr-3 w-5 text-center"></i>Models
                </a>
--- a/core/http/views/settings.html
+++ b/core/http/views/settings.html
@@ -1,609 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-{{template "views/partials/head" .}}
-
-<body class="bg-[#101827] text-[#E5E7EB]">
-<div class="flex flex-col min-h-screen" x-data="indexDashboard()">
-
-    {{template "views/partials/navbar" .}}
-
-    <!-- Notifications -->
-    <div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
-        <template x-for="notification in notifications" :key="notification.id">
-            <div x-show="true" 
-                 x-transition:enter="transform ease-out duration-300 transition"
-                 x-transition:enter-start="translate-x-full opacity-0"
-                 x-transition:enter-end="translate-x-0 opacity-100"
-                 x-transition:leave="transform ease-in duration-200 transition"
-                 x-transition:leave-start="translate-x-0 opacity-100"
-                 x-transition:leave-end="translate-x-full opacity-0"
-                 :class="notification.type === 'error' ? 'bg-red-500' : 'bg-green-500'"
-                 class="rounded-lg shadow-xl p-4 text-white flex items-start space-x-3">
-                <div class="flex-shrink-0">
-                    <i :class="notification.type === 'error' ? 'fas fa-exclamation-circle' : 'fas fa-check-circle'" class="text-xl"></i>
-                </div>
-                <div class="flex-1 min-w-0">
-                    <p class="text-sm font-medium break-words" x-text="notification.message"></p>
-                </div>
-                <button @click="dismissNotification(notification.id)" class="flex-shrink-0 text-white hover:text-gray-200">
-                    <i class="fas fa-times"></i>
-                </button>
-            </div>
-        </template>
-    </div>
-
-    <div class="container mx-auto px-4 py-8 flex-grow">
-        <!-- Hero Section -->
-        <div class="relative bg-[#1E293B] border border-[#38BDF8]/20 rounded-3xl shadow-2xl shadow-[#38BDF8]/10 p-8 mb-12 overflow-hidden">
-            <!-- Background Pattern -->
-            <div class="absolute inset-0 opacity-10">
-                <div class="absolute inset-0 bg-gradient-to-r from-[#38BDF8]/20 to-[#8B5CF6]/20"></div>
-                <div class="absolute top-0 left-0 w-full h-full" style="background-image: radial-gradient(circle at 1px 1px, rgba(56,189,248,0.15) 1px, transparent 0); background-size: 20px 20px;"></div>
-            </div>
-            
-            <div class="relative max-w-5xl mx-auto text-center">
-                <h1 class="text-5xl md:text-6xl font-bold text-[#E5E7EB] mb-6">
-                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] via-[#8B5CF6] to-[#38BDF8]">
-                        Settings & Management
-                    </span>
-                </h1>
-                <p class="text-xl md:text-2xl text-[#94A3B8] mb-8 font-light">Manage your models, backends, and system configuration</p>
-                
-                <div class="flex flex-wrap justify-center gap-4">
-                    <a href="/" 
-                       class="group relative inline-flex items-center bg-gray-600 hover:bg-gray-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(75,85,99,0.4)]">
-                        <i class="fas fa-home mr-3 text-lg"></i>
-                        <span>Back to Dashboard</span>
-                        <i class="fas fa-arrow-left ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
-                    </a>
-
-                    <a href="https://localai.io" target="_blank" 
-                       class="group relative inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(56,189,248,0.4)]">
-                        <i class="fas fa-book-reader mr-3 text-lg"></i>
-                        <span>Documentation</span>
-                        <i class="fas fa-external-link-alt ml-3 text-sm opacity-70 group-hover:opacity-100 transition-opacity"></i>
-                    </a>
-                    
-                    <a href="browse" 
-                       class="group relative inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
-                        <i class="fas fa-images mr-3 text-lg"></i>
-                        <span>Model Gallery</span>
-                        <i class="fas fa-arrow-right ml-3 opacity-0 group-hover:opacity-100 group-hover:translate-x-1 transition-all duration-300"></i>
-                    </a>
-                    
-                    <a href="/import-model"
-                       class="group relative inline-flex items-center bg-green-600 hover:bg-green-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(34,197,94,0.4)]">
-                        <i class="fas fa-plus mr-3 text-lg"></i>
-                        <span>Import Model</span>
-                        <i class="fas fa-upload ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
-                    </a>
-
-                    <button id="reload-models-btn"
-                       class="group relative inline-flex items-center bg-orange-600 hover:bg-orange-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(234,88,12,0.4)]">
-                        <i class="fas fa-sync-alt mr-3 text-lg"></i>
-                        <span>Update Models</span>
-                        <i class="fas fa-refresh ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
-                    </button>
-                </div>
-            </div>
-        </div>
-
-        <!-- Models Section -->
-        <div class="models mt-8">
-            {{template "views/partials/inprogress" .}}
-            
-            {{ if eq (len .ModelsConfig) 0 }}
-            <!-- No Models State -->
-            <div class="relative bg-[#1E293B]/80 border border-[#38BDF8]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
-                <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-yellow-500/5 to-orange-500/5"></div>
-                <div class="relative text-center max-w-4xl mx-auto">
-                    <div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-yellow-500/10 border border-yellow-500/20 mb-6">
-                        <i class="text-yellow-400 text-3xl fas fa-robot"></i>
-                    </div>
-                    <h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">No models installed yet</h2>
-                    <p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">Get started by installing models from the gallery or check our documentation for guidance</p>
-                    
-                    <div class="flex flex-wrap justify-center gap-4 mb-8">
-                        <a href="browse" class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(56,189,248,0.4)]">
-                            <i class="fas fa-images mr-2"></i>
-                            Browse Gallery
-                        </a>
-                        <a href="https://localai.io/basics/getting_started/" class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
-                            <i class="fas fa-book mr-2"></i>
-                            Documentation
-                        </a>
-                    </div>
-
-                    {{ if ne (len .Models) 0 }}
-                    <div class="mt-12 pt-8 border-t border-[#38BDF8]/20">
-                        <h3 class="text-2xl font-bold text-[#E5E7EB] mb-6">Detected Model Files</h3>
-                        <p class="text-[#94A3B8] mb-6">These models were found but don't have configuration files yet</p>
-                        <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-                            {{ range .Models }}
-                            <div class="bg-[#101827] border border-[#38BDF8]/20 rounded-xl p-4 flex items-center hover:border-[#38BDF8]/50 transition-all duration-300 hover:shadow-[0_0_12px_rgba(56,189,248,0.15)]">
-                                <div class="w-10 h-10 rounded-lg bg-[#1E293B] flex items-center justify-center mr-3">
-                                    <i class="fas fa-brain text-[#38BDF8]"></i>
-                                </div>
-                                <div class="flex-1">
-                                    <p class="font-semibold text-[#E5E7EB] truncate">{{.}}</p>
-                                    <p class="text-xs text-[#94A3B8]">No configuration</p>
-                                </div>
-                            </div>              
-                            {{end}}
-                        </div>
-                    </div>
-                    {{end}}
-                </div>
-            </div>
-            {{ else }}
-            <!-- Models Grid -->
-            {{ $modelsN := len .ModelsConfig}}
-            {{ $modelsN = add $modelsN (len .Models)}}
-            <div class="mb-8 flex flex-col md:flex-row md:items-center md:justify-between">
-                <div class="mb-4 md:mb-0">
-                    <h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-2">
-                        Installed Models
-                    </h2>
-                    <p class="text-[#94A3B8]">
-                        <span class="text-[#38BDF8] font-semibold">{{$modelsN}}</span> model{{if gt $modelsN 1}}s{{end}} ready to use
-                    </p>
-                </div>
-            </div>
-            
-            <div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6">
-                {{$galleryConfig:=.GalleryConfig}}
-                {{ $loadedModels := .LoadedModels }}
-                {{$noicon:="https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"}}
-                
-                {{ range .ModelsConfig }}
-                {{ $backendCfg := . }}
-                {{ $cfg:= index $galleryConfig .Name}}
-                <div class="group relative bg-[#1E293B] border border-[#38BDF8]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_20px_rgba(56,189,248,0.2)] hover:-translate-y-2 hover:border-[#38BDF8]/50">
-                    <!-- Card Header -->
-                    <div class="relative p-6 border-b border-[#101827]">
-                        <div class="flex items-start space-x-4">
-                            <div class="relative w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center group-hover:scale-110 transition-transform duration-300">
-                                <img {{ if and $cfg $cfg.Icon }}
-                                    src="{{$cfg.Icon}}"
-                                    {{ else }}
-                                    src="{{$noicon}}"
-                                    {{ end }}
-                                    class="w-full h-full object-contain"
-                                    alt="{{.Name}} icon"
-                                >
-                                {{ if index $loadedModels .Name }}
-                                <div class="absolute -top-1 -right-1 w-4 h-4 bg-green-500 rounded-full border-2 border-[#1E293B] animate-pulse"></div>
-                                {{ end }}
-                            </div>
-                            
-                            <div class="flex-1 min-w-0">
-                                <div class="flex items-center justify-between">
-                                    <h3 class="font-bold text-xl text-[#E5E7EB] truncate group-hover:text-[#38BDF8] transition-colors">{{.Name}}</h3>
-                                </div>
-                                
-                                <div class="mt-2 flex flex-wrap gap-2">
-                                    {{ if .Backend }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#38BDF8]/20 text-[#38BDF8] border border-[#38BDF8]/30">
-                                        <i class="fas fa-cog mr-1"></i>{{.Backend}}
-                                    </span>
-                                    {{ else }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-yellow-500/10 text-yellow-300 border border-yellow-500/30">
-                                        <i class="fas fa-magic mr-1"></i>Auto
-                                    </span>
-                                    {{ end }}
-
-                                    {{ if and $backendCfg (or (ne $backendCfg.MCP.Servers "") (ne $backendCfg.MCP.Stdio ""))  }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#8B5CF6]/20 text-[#8B5CF6] border border-[#8B5CF6]/30">
-                                        <i class="fas fa-plug mr-1"></i>MCP
-                                    </span>
-                                    {{ end }}
-                                    
-                                    {{ if index $loadedModels .Name }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-green-500/10 text-green-300 border border-green-500/30">
-                                        <i class="fas fa-play mr-1"></i>Running
-                                    </span>
-                                    {{ end }}
-                                </div>
-                            </div>
-                        </div>
-                    </div>
-                    
-                    <!-- Usage Buttons -->
-                    <div class="p-6">
-                        <div class="flex flex-wrap gap-2 mb-4">
-                            {{ range .KnownUsecaseStrings }}
-                            {{ if eq . "FLAG_CHAT" }}
-                                <a href="chat/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/chat inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(56,189,248,0.4)]">
-                                    <i class="fas fa-comment-alt mr-2 group-hover/chat:animate-bounce"></i>
-                                    Chat
-                                </a>
-                            {{ end }}
-                            {{ if eq . "FLAG_IMAGE" }}
-                                <a href="text2image/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/image inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-green-600 hover:bg-green-700 text-white transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(34,197,94,0.4)]">
-                                    <i class="fas fa-image mr-2 group-hover/image:animate-pulse"></i>
-                                    Image
-                                </a>
-                            {{ end }}
-                            {{ if eq . "FLAG_TTS" }}
-                                <a href="tts/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/tts inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(139,92,246,0.4)]">
-                                    <i class="fas fa-microphone mr-2 group-hover/tts:animate-pulse"></i>
-                                    TTS
-                                </a>
-                            {{ end }}
-                            {{ end }}
-                        </div>
-                        
-                        <!-- Action Buttons -->
-                        <div class="flex justify-between items-center pt-4 border-t border-[#101827]">
-                            <div class="flex gap-2">
-                                {{ if index $loadedModels .Name }}
-                                <button class="group/stop inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200"
-                                    data-twe-ripple-init=""
-                                    onclick="handleStopModel('{{.Name}}')">
-                                    <i class="fas fa-stop mr-2 group-hover/stop:animate-pulse"></i>Stop
-                                </button>
-                                {{ end }}
-                            </div>
-                            
-                            <div class="flex gap-2">
-                                <a href="/models/edit/{{.Name}}" 
-                                   class="group/edit inline-flex items-center text-sm font-semibold text-[#38BDF8] hover:text-[#8B5CF6] hover:bg-[#38BDF8]/10 rounded-lg px-3 py-2 transition-all duration-200">
-                                    <i class="fas fa-edit mr-2 group-hover/edit:animate-pulse"></i>Edit
-                                </a>
-                                <button
-                                    class="group/delete inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200"
-                                    data-twe-ripple-init=""
-                                    onclick="handleDeleteModel('{{.Name}}')">
-                                    <i class="fas fa-trash-alt mr-2 group-hover/delete:animate-bounce"></i>Delete
-                                </button>
-                            </div>
-                        </div>
-                    </div>
-                </div>
-                {{ end }}
-                
-                <!-- Models without config -->
-                {{ range .Models }}
-                <div class="group relative bg-[#1E293B]/80 border border-[#38BDF8]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_15px_rgba(234,179,8,0.15)] hover:-translate-y-1 hover:border-yellow-500/30">
-                    <div class="p-6">
-                        <div class="flex items-start space-x-4">
-                            <div class="w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center">
-                                <i class="fas fa-brain text-2xl text-[#94A3B8]"></i>
-                            </div>
-                            <div class="flex-1 min-w-0">
-                                <h3 class="font-bold text-xl text-[#E5E7EB] truncate mb-2">{{.}}</h3>
-                                
-                                <div class="flex flex-wrap gap-2 mb-4">
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-yellow-500/10 text-yellow-300 border border-yellow-500/30">
-                                        <i class="fas fa-magic mr-1"></i>Auto Backend
-                                    </span>
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-orange-500/10 text-orange-300 border border-orange-500/30">
-                                        <i class="fas fa-exclamation-triangle mr-1"></i>No Config
-                                    </span>
-                                </div>
-                                
-                                <div class="flex justify-center pt-4">
-                                    <span class="inline-flex items-center text-sm font-medium text-[#94A3B8] px-4 py-2 bg-[#101827]/50 rounded-lg">
-                                        <i class="fas fa-info-circle mr-2"></i>
-                                        Configuration required for full functionality
-                                    </span>
-                                </div>
-                            </div>
-                        </div>
-                    </div>
-                </div>
-                {{end}}
-            </div>            
-            {{ end }}
-        </div>
-
-        <!-- Backends Section -->
-        <div class="mt-12">
-            <div class="mb-8">
-                <h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-2">
-                    Installed Backends
-                </h2>
-                <p class="text-[#94A3B8]">
-                    <span class="text-[#8B5CF6] font-semibold">{{len .InstalledBackends}}</span> backend{{if gt (len .InstalledBackends) 1}}s{{end}} ready to use
-                </p>
-            </div>
-
-            {{ if eq (len .InstalledBackends) 0 }}
-            <!-- No backends state -->
-            <div class="relative bg-[#1E293B]/80 border border-[#8B5CF6]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
-                <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-purple-500/5 to-cyan-500/5"></div>
-                <div class="relative text-center max-w-4xl mx-auto">
-                    <div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-[#8B5CF6]/10 border border-[#8B5CF6]/20 mb-6">
-                        <i class="text-[#8B5CF6] text-3xl fas fa-cogs"></i>
-                    </div>
-                    <h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">No backends installed yet</h2>
-                    <p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">Backends power your AI models. Install them from the backend gallery to get started</p>
-                    
-                    <div class="flex flex-wrap justify-center gap-4">
-                        <a href="/browse/backends" class="inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
-                            <i class="fas fa-cogs mr-2"></i>
-                            Browse Backend Gallery
-                        </a>
-                        <a href="https://localai.io/backends/" target="_blank" class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#8B5CF6]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
-                            <i class="fas fa-book mr-2"></i>
-                            Documentation
-                        </a>
-                    </div>
-                </div>
-            </div>
-            {{ else }}
-            <!-- Backends Grid -->
-            <div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6">
-                {{ range .InstalledBackends }}
-                <div class="group relative bg-[#1E293B] border border-[#8B5CF6]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_20px_rgba(139,92,246,0.2)] hover:-translate-y-2 hover:border-[#8B5CF6]/50">
-                    <!-- Card Header -->
-                    <div class="relative p-6 border-b border-[#101827]">
-                        <div class="flex items-start space-x-4">
-                            <div class="w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center group-hover:scale-110 transition-transform duration-300">
-                                <i class="fas fa-cog text-2xl text-[#8B5CF6]"></i>
-                            </div>
-                            <div class="flex-1 min-w-0">
-                                <h3 class="font-bold text-xl text-[#E5E7EB] truncate mb-2 group-hover:text-[#8B5CF6] transition-colors">{{.Name}}</h3>
-                                
-                                <div class="flex flex-wrap gap-2">
-                                    {{ if .IsSystem }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-blue-500/10 text-blue-300 border border-blue-500/30">
-                                        <i class="fas fa-shield-alt mr-1"></i>System
-                                    </span>
-                                    {{ else }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-green-500/10 text-green-300 border border-green-500/30">
-                                        <i class="fas fa-download mr-1"></i>User Installed
-                                    </span>
-                                    {{ end }}
-                                    
-                                    {{ if .IsMeta }}
-                                    <span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#8B5CF6]/20 text-[#8B5CF6] border border-[#8B5CF6]/30">
-                                        <i class="fas fa-layer-group mr-1"></i>Meta
-                                    </span>
-                                    {{ end }}
-                                </div>
-                            </div>
-                        </div>
-                    </div>
-                    
-                    <!-- Backend Details -->
-                    <div class="p-6">
-                        <div class="space-y-3 text-sm">
-                            {{ if and .Metadata .Metadata.Alias }}
-                            <div class="flex items-start">
-                                <i class="fas fa-tag text-[#94A3B8] mr-2 mt-0.5"></i>
-                                <div class="flex-1">
-                                    <span class="text-[#94A3B8]">Alias:</span>
-                                    <span class="text-[#E5E7EB] ml-1">{{.Metadata.Alias}}</span>
-                                </div>
-                            </div>
-                            {{ end }}
-                            
-                            {{ if and .Metadata .Metadata.InstalledAt }}
-                            <div class="flex items-start">
-                                <i class="fas fa-calendar text-[#94A3B8] mr-2 mt-0.5"></i>
-                                <div class="flex-1">
-                                    <span class="text-[#94A3B8]">Installed:</span>
-                                    <span class="text-[#E5E7EB] ml-1">{{.Metadata.InstalledAt}}</span>
-                                </div>
-                            </div>
-                            {{ end }}
-                            
-                            {{ if and .Metadata .Metadata.MetaBackendFor }}
-                            <div class="flex items-start">
-                                <i class="fas fa-link text-[#94A3B8] mr-2 mt-0.5"></i>
-                                <div class="flex-1">
-                                    <span class="text-[#94A3B8]">Meta backend for:</span>
-                                    <span class="text-[#8B5CF6] ml-1 font-semibold">{{.Metadata.MetaBackendFor}}</span>
-                                </div>
-                            </div>
-                            {{ end }}
-                            
-                            {{ if and .Metadata .Metadata.GalleryURL }}
-                            <div class="flex items-start">
-                                <i class="fas fa-globe text-[#94A3B8] mr-2 mt-0.5"></i>
-                                <div class="flex-1">
-                                    <span class="text-[#94A3B8]">Gallery:</span>
-                                    <a href="{{.Metadata.GalleryURL}}" target="_blank" class="text-[#38BDF8] hover:text-[#38BDF8]/80 ml-1 truncate inline-block max-w-[200px] align-bottom">
-                                        {{.Metadata.GalleryURL}}
-                                        <i class="fas fa-external-link-alt text-xs ml-1"></i>
-                                    </a>
-                                </div>
-                            </div>
-                            {{ end }}
-                            
-                            <div class="flex items-start">
-                                <i class="fas fa-folder text-[#94A3B8] mr-2 mt-0.5"></i>
-                                <div class="flex-1">
-                                    <span class="text-[#94A3B8]">Path:</span>
-                                    <span class="text-[#E5E7EB] ml-1 text-xs font-mono truncate block">{{.RunFile}}</span>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <!-- Action Buttons -->
-                        {{ if not .IsSystem }}
-                        <div class="flex justify-end items-center pt-4 mt-4 border-t border-[#101827]">
-                            <button
-                                @click="deleteBackend('{{.Name}}')"
-                                class="group/delete inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200">
-                                <i class="fas fa-trash-alt mr-2 group-hover/delete:animate-bounce"></i>Delete
-                            </button>
-                        </div>
-                        {{ end }}
-                    </div>
-                </div>
-                {{end}}
-            </div>
-            {{ end }}
-        </div>
-    </div>
-
-    {{template "views/partials/footer" .}}
-</div>
-
-<script>
-// Alpine.js component for index dashboard
-function indexDashboard() {
-    return {
-        notifications: [],
-        
-        init() {
-            // Initialize component
-        },
-        
-        addNotification(message, type = 'success') {
-            const id = Date.now();
-            this.notifications.push({ id, message, type });
-            // Auto-dismiss after 5 seconds
-            setTimeout(() => this.dismissNotification(id), 5000);
-        },
-        
-        dismissNotification(id) {
-            this.notifications = this.notifications.filter(n => n.id !== id);
-        },
-        
-        async deleteBackend(backendName) {
-            if (!confirm(`Are you sure you want to delete the backend "${backendName}"?`)) {
-                return;
-            }
-            
-            try {
-                const response = await fetch(`/api/backends/system/delete/${encodeURIComponent(backendName)}`, {
-                    method: 'POST'
-                });
-                
-                const data = await response.json();
-                
-                if (response.ok && data.success) {
-                    this.addNotification(`Backend "${backendName}" deleted successfully!`, 'success');
-                    // Reload page after short delay
-                    setTimeout(() => {
-                        window.location.reload();
-                    }, 1500);
-                } else {
-                    this.addNotification(`Failed to delete backend: ${data.error || 'Unknown error'}`, 'error');
-                }
-            } catch (error) {
-                console.error('Error deleting backend:', error);
-                this.addNotification(`Failed to delete backend: ${error.message}`, 'error');
-            }
-        }
-    }
-}
-
-async function handleStopModel(modelName) {
-    if (!confirm('Are you sure you wish to stop this model?')) {
-        return;
-    }
-    
-    try {
-        const response = await fetch('/backend/shutdown', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-            },
-            body: JSON.stringify({ model: modelName })
-        });
-        
-        if (response.ok) {
-            window.location.reload();
-        } else {
-            alert('Failed to stop model');
-        }
-    } catch (error) {
-        console.error('Error stopping model:', error);
-        alert('Failed to stop model');
-    }
-}
-
-async function handleDeleteModel(modelName) {
-    if (!confirm('Are you sure you wish to delete this model?')) {
-        return;
-    }
-    
-    try {
-        const response = await fetch(`/api/models/delete/${encodeURIComponent(modelName)}`, {
-            method: 'POST'
-        });
-        
-        if (response.ok) {
-            window.location.reload();
-        } else {
-            alert('Failed to delete model');
-        }
-    } catch (error) {
-        console.error('Error deleting model:', error);
-        alert('Failed to delete model');
-    }
-}
-
-// Handle reload models button
-document.addEventListener('DOMContentLoaded', function() {
-    const reloadBtn = document.getElementById('reload-models-btn');
-    if (reloadBtn) {
-        reloadBtn.addEventListener('click', function() {
-            const button = this;
-            const originalText = button.querySelector('span').textContent;
-            const icon = button.querySelector('i');
-
-            // Show loading state
-            button.disabled = true;
-            button.querySelector('span').textContent = 'Updating...';
-            icon.classList.add('fa-spin');
-
-            // Make the API call
-            fetch('/models/reload', {
-                method: 'POST',
-                headers: {
-                    'Content-Type': 'application/json',
-                }
-            })
-            .then(response => response.json())
-            .then(data => {
-                if (data.success) {
-                    // Show success state briefly
-                    button.querySelector('span').textContent = 'Updated!';
-                    icon.classList.remove('fa-spin', 'fa-sync-alt');
-                    icon.classList.add('fa-check');
-
-                    // Reload the page after a short delay
-                    setTimeout(() => {
-                        window.location.reload();
-                    }, 1000);
-                } else {
-                    // Show error state
-                    button.querySelector('span').textContent = 'Error!';
-                    icon.classList.remove('fa-spin');
-                    console.error('Failed to reload models:', data.error);
-
-                    // Reset button after delay
-                    setTimeout(() => {
-                        button.disabled = false;
-                        button.querySelector('span').textContent = originalText;
-                        icon.classList.remove('fa-check');
-                        icon.classList.add('fa-sync-alt');
-                    }, 3000);
-                }
-            })
-            .catch(error => {
-                // Show error state
-                button.querySelector('span').textContent = 'Error!';
-                icon.classList.remove('fa-spin');
-                console.error('Error reloading models:', error);
-
-                // Reset button after delay
-                setTimeout(() => {
-                    button.disabled = false;
-                    button.querySelector('span').textContent = originalText;
-                    icon.classList.remove('fa-check');
-                    icon.classList.add('fa-sync-alt');
-                }, 3000);
-            });
-        });
-    }
-});
-</script>
-
-</body>
-</html>
--- a/core/services/metrics.go
+++ b/core/services/metrics.go
@@ -2,8 +2,6 @@ package services

 import (
 	"context"
-	"sync"
-	"time"

 	"github.com/rs/zerolog/log"
 	"go.opentelemetry.io/otel/attribute"
@@ -12,315 +10,6 @@ import (
 	metricApi "go.opentelemetry.io/otel/sdk/metric"
 )

-// MetricsStore is the interface for storing and retrieving metrics
-// This allows for future implementations with persistence (JSON files, databases, etc.)
-type MetricsStore interface {
-	RecordRequest(endpoint, model, backend string, success bool, duration time.Duration)
-	GetEndpointStats() map[string]int64
-	GetModelStats() map[string]int64
-	GetBackendStats() map[string]int64
-	GetRequestsOverTime(hours int) []TimeSeriesPoint
-	GetTotalRequests() int64
-	GetSuccessRate() float64
-	Reset()
-}
-
-// TimeSeriesPoint represents a single point in the time series
-type TimeSeriesPoint struct {
-	Timestamp time.Time `json:"timestamp"`
-	Count     int64     `json:"count"`
-}
-
-// RequestRecord stores individual request information
-type RequestRecord struct {
-	Timestamp time.Time
-	Endpoint  string
-	Model     string
-	Backend   string
-	Success   bool
-	Duration  time.Duration
-}
-
-// InMemoryMetricsStore implements MetricsStore with in-memory storage
-type InMemoryMetricsStore struct {
-	endpoints    map[string]int64
-	models       map[string]int64
-	backends     map[string]int64
-	timeSeries   []RequestRecord
-	successCount int64
-	failureCount int64
-	mu           sync.RWMutex
-	stopChan     chan struct{}
-	maxRecords   int           // Maximum number of time series records to keep
-	maxMapKeys   int           // Maximum number of unique keys per map
-	pruneEvery   time.Duration // How often to prune old data
-}
-
-// NewInMemoryMetricsStore creates a new in-memory metrics store
-func NewInMemoryMetricsStore() *InMemoryMetricsStore {
-	store := &InMemoryMetricsStore{
-		endpoints:  make(map[string]int64),
-		models:     make(map[string]int64),
-		backends:   make(map[string]int64),
-		timeSeries: make([]RequestRecord, 0),
-		stopChan:   make(chan struct{}),
-		maxRecords: 10000,           // Limit to 10k records (~1-2MB of memory)
-		maxMapKeys: 1000,            // Limit to 1000 unique keys per map (~50KB per map)
-		pruneEvery: 5 * time.Minute, // Prune every 5 minutes instead of every request
-	}
-
-	// Start background pruning goroutine
-	go store.pruneLoop()
-
-	return store
-}
-
-// pruneLoop runs periodically to clean up old data
-func (m *InMemoryMetricsStore) pruneLoop() {
-	ticker := time.NewTicker(m.pruneEvery)
-	defer ticker.Stop()
-
-	for {
-		select {
-		case <-ticker.C:
-			m.pruneOldData()
-		case <-m.stopChan:
-			return
-		}
-	}
-}
-
-// pruneOldData removes data older than 24 hours and enforces max record limit
-func (m *InMemoryMetricsStore) pruneOldData() {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	cutoff := time.Now().Add(-24 * time.Hour)
-	newTimeSeries := make([]RequestRecord, 0, len(m.timeSeries))
-
-	for _, r := range m.timeSeries {
-		if r.Timestamp.After(cutoff) {
-			newTimeSeries = append(newTimeSeries, r)
-		}
-	}
-
-	// If still over the limit, keep only the most recent records
-	if len(newTimeSeries) > m.maxRecords {
-		// Keep the most recent maxRecords entries
-		newTimeSeries = newTimeSeries[len(newTimeSeries)-m.maxRecords:]
-		log.Warn().
-			Int("dropped", len(m.timeSeries)-len(newTimeSeries)).
-			Int("kept", len(newTimeSeries)).
-			Msg("Metrics store exceeded maximum records, dropping oldest entries")
-	}
-
-	m.timeSeries = newTimeSeries
-
-	// Also check if maps have grown too large
-	m.pruneMapIfNeeded("endpoints", m.endpoints, m.maxMapKeys)
-	m.pruneMapIfNeeded("models", m.models, m.maxMapKeys)
-	m.pruneMapIfNeeded("backends", m.backends, m.maxMapKeys)
-}
-
-// pruneMapIfNeeded keeps only the top N entries in a map by count
-func (m *InMemoryMetricsStore) pruneMapIfNeeded(name string, mapData map[string]int64, maxKeys int) {
-	if len(mapData) <= maxKeys {
-		return
-	}
-
-	// Convert to slice for sorting
-	type kv struct {
-		key   string
-		value int64
-	}
-
-	entries := make([]kv, 0, len(mapData))
-	for k, v := range mapData {
-		entries = append(entries, kv{k, v})
-	}
-
-	// Sort by value descending (keep highest counts)
-	for i := 0; i < len(entries); i++ {
-		for j := i + 1; j < len(entries); j++ {
-			if entries[i].value < entries[j].value {
-				entries[i], entries[j] = entries[j], entries[i]
-			}
-		}
-	}
-
-	// Keep only top maxKeys entries
-	for k := range mapData {
-		delete(mapData, k)
-	}
-
-	for i := 0; i < maxKeys && i < len(entries); i++ {
-		mapData[entries[i].key] = entries[i].value
-	}
-
-	log.Warn().
-		Str("map", name).
-		Int("dropped", len(entries)-maxKeys).
-		Int("kept", maxKeys).
-		Msg("Metrics map exceeded maximum keys, keeping only top entries")
-}
-
-// Stop gracefully shuts down the metrics store
-func (m *InMemoryMetricsStore) Stop() {
-	close(m.stopChan)
-}
-
-// RecordRequest records a new API request
-func (m *InMemoryMetricsStore) RecordRequest(endpoint, model, backend string, success bool, duration time.Duration) {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	// Record endpoint
-	if endpoint != "" {
-		m.endpoints[endpoint]++
-	}
-
-	// Record model
-	if model != "" {
-		m.models[model]++
-	}
-
-	// Record backend
-	if backend != "" {
-		m.backends[backend]++
-	}
-
-	// Record success/failure
-	if success {
-		m.successCount++
-	} else {
-		m.failureCount++
-	}
-
-	// Add to time series
-	record := RequestRecord{
-		Timestamp: time.Now(),
-		Endpoint:  endpoint,
-		Model:     model,
-		Backend:   backend,
-		Success:   success,
-		Duration:  duration,
-	}
-	m.timeSeries = append(m.timeSeries, record)
-
-	// Note: Pruning is done periodically by pruneLoop() to avoid overhead on every request
-}
-
-// GetEndpointStats returns request counts per endpoint
-func (m *InMemoryMetricsStore) GetEndpointStats() map[string]int64 {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	result := make(map[string]int64)
-	for k, v := range m.endpoints {
-		result[k] = v
-	}
-	return result
-}
-
-// GetModelStats returns request counts per model
-func (m *InMemoryMetricsStore) GetModelStats() map[string]int64 {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	result := make(map[string]int64)
-	for k, v := range m.models {
-		result[k] = v
-	}
-	return result
-}
-
-// GetBackendStats returns request counts per backend
-func (m *InMemoryMetricsStore) GetBackendStats() map[string]int64 {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	result := make(map[string]int64)
-	for k, v := range m.backends {
-		result[k] = v
-	}
-	return result
-}
-
-// GetRequestsOverTime returns time series data for the specified number of hours
-func (m *InMemoryMetricsStore) GetRequestsOverTime(hours int) []TimeSeriesPoint {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	cutoff := time.Now().Add(-time.Duration(hours) * time.Hour)
-
-	// Group by hour
-	hourlyBuckets := make(map[int64]int64)
-	for _, record := range m.timeSeries {
-		if record.Timestamp.After(cutoff) {
-			// Round down to the hour
-			hourTimestamp := record.Timestamp.Truncate(time.Hour).Unix()
-			hourlyBuckets[hourTimestamp]++
-		}
-	}
-
-	// Convert to sorted time series
-	result := make([]TimeSeriesPoint, 0)
-	for ts, count := range hourlyBuckets {
-		result = append(result, TimeSeriesPoint{
-			Timestamp: time.Unix(ts, 0),
-			Count:     count,
-		})
-	}
-
-	// Sort by timestamp
-	for i := 0; i < len(result); i++ {
-		for j := i + 1; j < len(result); j++ {
-			if result[i].Timestamp.After(result[j].Timestamp) {
-				result[i], result[j] = result[j], result[i]
-			}
-		}
-	}
-
-	return result
-}
-
-// GetTotalRequests returns the total number of requests recorded
-func (m *InMemoryMetricsStore) GetTotalRequests() int64 {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	return m.successCount + m.failureCount
-}
-
-// GetSuccessRate returns the percentage of successful requests
-func (m *InMemoryMetricsStore) GetSuccessRate() float64 {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-
-	total := m.successCount + m.failureCount
-	if total == 0 {
-		return 0.0
-	}
-	return float64(m.successCount) / float64(total) * 100.0
-}
-
-// Reset clears all metrics
-func (m *InMemoryMetricsStore) Reset() {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	m.endpoints = make(map[string]int64)
-	m.models = make(map[string]int64)
-	m.backends = make(map[string]int64)
-	m.timeSeries = make([]RequestRecord, 0)
-	m.successCount = 0
-	m.failureCount = 0
-}
-
-// ============================================================================
-// OpenTelemetry Metrics Service (for Prometheus export)
-// ============================================================================
-
 type LocalAIMetricsService struct {
 	Meter         metric.Meter
 	ApiTimeMetric metric.Float64Histogram
@@ -334,7 +23,7 @@ func (m *LocalAIMetricsService) ObserveAPICall(method string, path string, durat
 	m.ApiTimeMetric.Record(context.Background(), duration, opts)
 }

-// NewLocalAIMetricsService bootstraps the OpenTelemetry pipeline for Prometheus export.
+// setupOTelSDK bootstraps the OpenTelemetry pipeline.
 // If it does not return an error, make sure to call shutdown for proper cleanup.
 func NewLocalAIMetricsService() (*LocalAIMetricsService, error) {
 	exporter, err := prometheus.New()
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,186 @@
 ---
+- &qwen3vl
+  url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
+  icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
+  license: apache-2.0
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - image-to-text
+    - multimodal
+    - cpu
+    - qwen
+    - qwen3
+    - thinking
+    - reasoning
+  name: "qwen3-vl-30b-a3b-instruct"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF
+  description: |
+    Meet Qwen3-VL — the most powerful vision-language model in the Qwen series to date.
+
+    This generation delivers comprehensive upgrades across the board: superior text understanding & generation, deeper visual perception & reasoning, extended context length, enhanced spatial and video dynamics comprehension, and stronger agent interaction capabilities.
+
+    Available in Dense and MoE architectures that scale from edge to cloud, with Instruct and reasoning‑enhanced Thinking editions for flexible, on-demand deployment.
+
+    #### Key Enhancements:
+
+    * **Visual Agent**: Operates PC/mobile GUIs—recognizes elements, understands functions, invokes tools, completes tasks.
+
+    * **Visual Coding Boost**: Generates Draw.io/HTML/CSS/JS from images/videos.
+
+    * **Advanced Spatial Perception**: Judges object positions, viewpoints, and occlusions; provides stronger 2D grounding and enables 3D grounding for spatial reasoning and embodied AI.
+
+    * **Long Context & Video Understanding**: Native 256K context, expandable to 1M; handles books and hours-long video with full recall and second-level indexing.
+
+    * **Enhanced Multimodal Reasoning**: Excels in STEM/Math—causal analysis and logical, evidence-based answers.
+
+    * **Upgraded Visual Recognition**: Broader, higher-quality pretraining is able to “recognize everything”—celebrities, anime, products, landmarks, flora/fauna, etc.
+
+    * **Expanded OCR**: Supports 32 languages (up from 19); robust in low light, blur, and tilt; better with rare/ancient characters and jargon; improved long-document structure parsing.
+
+    * **Text Understanding on par with pure LLMs**: Seamless text–vision fusion for lossless, unified comprehension.
+
+    #### Model Architecture Updates:
+
+    1. **Interleaved-MRoPE**: Full‑frequency allocation over time, width, and height via robust positional embeddings, enhancing long‑horizon video reasoning.
+
+    2. **DeepStack**: Fuses multi‑level ViT features to capture fine-grained details and sharpen image–text alignment.
+
+    3. **Text–Timestamp Alignment:** Moves beyond T‑RoPE to precise, timestamp‑grounded event localization for stronger video temporal modeling.
+
+    This is the weight repository for Qwen3-VL-30B-A3B-Instruct.
+  overrides:
+    mmproj: mmproj/mmproj-F16.gguf
+    parameters:
+      model: Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
+      sha256: 75d8f4904016d90b71509c8576ebd047a0606cc5aa788eada29d4bedf9b761a6
+      uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF/Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
+    - filename: mmproj/mmproj-F16.gguf
+      sha256: 7e7cec67a3a887bddbf38099738d08570e85f08dd126578fa00a7acf4dacef01
+      uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-30b-a3b-thinking"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF
+  description: |
+    Qwen3-VL-30B-A3B-Thinking is a 30B parameter model that is thinking.
+  overrides:
+    mmproj: mmproj/mmproj-F16.gguf
+    parameters:
+      model: Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
+      sha256: d3e12c6b15f59cc1c6db685d33eb510184d006ebbff0e038e7685e57ce628b3b
+      uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF/Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
+    - filename: mmproj/mmproj-F16.gguf
+      sha256: 7e7cec67a3a887bddbf38099738d08570e85f08dd126578fa00a7acf4dacef01
+      uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-4b-instruct"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-4B-Instruct-GGUF
+  description: |
+    Qwen3-VL-4B-Instruct is the 4B parameter model of the Qwen3-VL series.
+  overrides:
+    mmproj: mmproj/mmproj-Qwen3-VL-4B-Instruct-F16.gguf
+    parameters:
+      model: Qwen3-VL-4B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-4B-Instruct-Q4_K_M.gguf
+      sha256: d4dcd426bfba75752a312b266b80fec8136fbaca13c62d93b7ac41fa67f0492b
+      uri: huggingface://unsloth/Qwen3-VL-4B-Instruct-GGUF/Qwen3-VL-4B-Instruct-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Qwen3-VL-4B-Instruct-F16.gguf
+      sha256: 1b9f4e92f0fbda14d7d7b58baed86039b8a980fe503d9d6a9393f25c0028f1fc
+      uri: huggingface://unsloth/Qwen3-VL-4B-Instruct-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-32b-instruct"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-32B-Instruct-GGUF
+  description: |
+    Qwen3-VL-32B-Instruct is the 32B parameter model of the Qwen3-VL series.
+  overrides:
+    mmproj: mmproj/mmproj-Qwen3-VL-32B-Instruct-F16.gguf
+    parameters:
+      model: Qwen3-VL-32B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-32B-Instruct-Q4_K_M.gguf
+      sha256: 17885d28e964b22b2faa981a7eaeeeb78da0972ee5f826ad5965f7583a610d9f
+      uri: huggingface://unsloth/Qwen3-VL-32B-Instruct-GGUF/Qwen3-VL-32B-Instruct-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Qwen3-VL-32B-Instruct-F16.gguf
+      sha256: 14b1d68befa75a5e646dd990c5bb429c912b7aa9b49b9ab18231ca5f750421c9
+      uri: huggingface://unsloth/Qwen3-VL-32B-Instruct-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-4b-thinking"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-4B-Thinking-GGUF
+  description: |
+    Qwen3-VL-4B-Thinking is the 4B parameter model of the Qwen3-VL series that is thinking.
+  overrides:
+    mmproj: mmproj/mmproj-Qwen3-VL-4B-Thinking-F16.gguf
+    parameters:
+      model: Qwen3-VL-4B-Thinking-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-4B-Thinking-Q4_K_M.gguf
+      sha256: bd73237f16265a1014979b7ed34ff9265e7e200ae6745bb1da383a1bbe0f9211
+      uri: huggingface://unsloth/Qwen3-VL-4B-Thinking-GGUF/Qwen3-VL-4B-Thinking-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Qwen3-VL-4B-Thinking-F16.gguf
+      sha256: 72354fcd3fc75935b84e745ca492d6e78dd003bb5a020d71b296e7650926ac87
+      uri: huggingface://unsloth/Qwen3-VL-4B-Thinking-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-2b-thinking"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-2B-Thinking-GGUF
+  description: |
+    Qwen3-VL-2B-Thinking is the 2B parameter model of the Qwen3-VL series that is thinking.
+  overrides:
+    mmproj: mmproj/mmproj-Qwen3-VL-2B-Thinking-F16.gguf
+    parameters:
+      model: Qwen3-VL-2B-Thinking-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-2B-Thinking-Q4_K_M.gguf
+      sha256: 5f282086042d96b78b138839610f5148493b354524090fadc5c97c981b70a26e
+      uri: huggingface://unsloth/Qwen3-VL-2B-Thinking-GGUF/Qwen3-VL-2B-Thinking-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Qwen3-VL-2B-Thinking-F16.gguf
+      sha256: 4eabc90a52fe890d6ca1dad92548782eab6edc91f012a365fff95cf027ba529d
+      uri: huggingface://unsloth/Qwen3-VL-2B-Thinking-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "qwen3-vl-2b-instruct"
+  urls:
+    - https://huggingface.co/unsloth/Qwen3-VL-2B-Instruct-GGUF
+  description: |
+    Qwen3-VL-2B-Instruct is the 2B parameter model of the Qwen3-VL series.
+  overrides:
+    mmproj: mmproj/mmproj-Qwen3-VL-2B-Instruct-F16.gguf
+    parameters:
+      model: Qwen3-VL-2B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VL-2B-Instruct-Q4_K_M.gguf
+      sha256: 858fcf2a39dc73b26dd86592cb0a5f949b59d1edb365d1dea98e46b02e955e56
+      uri: huggingface://unsloth/Qwen3-VL-2B-Instruct-GGUF/Qwen3-VL-2B-Instruct-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Qwen3-VL-2B-Instruct-F16.gguf
+      sha256: cd5a851d3928697fa1bd76d459d2cc409b6cf40c9d9682b2f5c8e7c6a9f9630f
+      uri: huggingface://unsloth/Qwen3-VL-2B-Instruct-GGUF/mmproj-F16.gguf
+- !!merge <<: *qwen3vl
+  name: "huihui-qwen3-vl-30b-a3b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF
+  description: |
+    These are quantizations of the model Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF
+  overrides:
+    mmproj: mmproj/mmproj-Huihui-Qwen3-VL-30B-A3B-F16.gguf
+    parameters:
+      model: Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 1e94a65167a39d2ff4427393746d4dbc838f3d163c639d932e9ce983f575eabf
+      uri: huggingface://noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
+    - filename: mmproj/mmproj-Huihui-Qwen3-VL-30B-A3B-F16.gguf
+      sha256: 4bfd655851a5609b29201154e0bd4fe5f9274073766b8ab35b3a8acba0dd77a7
+      uri: huggingface://noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF/mmproj-F16.gguf
 - &jamba
  icon: https://cdn-avatars.huggingface.co/v1/production/uploads/65e60c0ed5313c06372446ff/QwehUHgP2HtVAMW5MzJ2j.png
  name: "ai21labs_ai21-jamba-reasoning-3b"
@@ -22795,3 +22977,389 @@
    - filename: wraith-8b.i1-Q4_K_M.gguf
      sha256: 180469f9de3e1b5a77b7cf316899dbe4782bd5e6d4f161fb18ea95aa612e6926
      uri: huggingface://mradermacher/wraith-8b-i1-GGUF/wraith-8b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "pokee_research_7b"
+  urls:
+    - https://huggingface.co/Mungert/pokee_research_7b-GGUF
+  description: |
+    **Model Name:** Qwen2.5-7B-Instruct
+    **Base Model:** Qwen/Qwen2.5-7B
+    **Model Type:** Instruction-tuned large language model (7.61B parameters)
+    **License:** Apache 2.0
+
+    **Description:**
+    Qwen2.5-7B-Instruct is a powerful, instruction-following language model designed for advanced reasoning, coding, and multi-turn dialogue. Built on the Qwen2.5 architecture, it delivers state-of-the-art performance in understanding complex prompts, generating long-form text (up to 8K tokens), and handling structured outputs like JSON. It supports multilingual communication (29+ languages), including English, Chinese, and European languages, and excels in long-context tasks with support for up to 131,072 tokens.
+
+    Ideal for research, creative writing, coding assistance, and agent-based workflows, this model is optimized for real-world applications requiring robustness, accuracy, and scalability.
+
+    **Key Features:**
+    - 7.61 billion parameters
+    - Context length: 131K tokens (supports long-context via YaRN)
+    - Strong performance in math, coding, and factual reasoning
+    - Fine-tuned for instruction following and chat interactions
+    - Deployable with Hugging Face Transformers, vLLM, and llama.cpp
+
+    **Use Case:**
+    Perfect for developers, researchers, and enterprises building intelligent assistants, autonomous agents, or content generation systems.
+
+    **Citation:**
+    ```bibtex
+    @misc{qwen2.5,
+        title = {Qwen2.5: A Party of Foundation Models},
+        url = {https://qwenlm.github.io/blog/qwen2.5/},
+        author = {Qwen Team},
+        month = {September},
+        year = {2024}
+    }
+    ```
+  overrides:
+    parameters:
+      model: pokee_research_7b-q4_k_m.gguf
+  files:
+    - filename: pokee_research_7b-q4_k_m.gguf
+      sha256: 670706711d82fcdbae951fda084f77c9c479edf3eb5d8458d1cfddd46cf4b767
+      uri: huggingface://Mungert/pokee_research_7b-GGUF/pokee_research_7b-q4_k_m.gguf
+- !!merge <<: *qwen3
+  name: "deepkat-32b-i1"
+  urls:
+    - https://huggingface.co/mradermacher/DeepKAT-32B-i1-GGUF
+  description: |
+    **DeepKAT-32B** is a high-performance, open-source coding agent built by merging two leading RL-tuned models—**DeepSWE-Preview** and **KAT-Dev**—on the **Qwen3-32B** base architecture using Arcee MergeKit’s TIES method. This 32B parameter model excels in complex software engineering tasks, including code generation, bug fixing, refactoring, and autonomous agent workflows with tool use.
+
+    Key strengths:
+    - Achieves ~62% SWE-Bench Verified score (on par with top open-source models).
+    - Strong performance in multi-file reasoning, multi-turn planning, and sparse reward environments.
+    - Optimized for agentic behavior with step-by-step reasoning and tool chaining.
+
+    Ideal for developers, AI researchers, and teams building intelligent code assistants or autonomous software agents.
+
+    > 🔗 **Base Model**: Qwen/Qwen3-32B
+    > 🛠️ **Built With**: MergeKit (TIES), RL-finetuned components
+    > 📊 **Benchmarks**: SWE-Bench Verified: ~62%, HumanEval Pass@1: ~85%
+
+    *Note: The model is a merge of two RL-tuned models and not a direct training from scratch.*
+  overrides:
+    parameters:
+      model: mradermacher/DeepKAT-32B-i1-GGUF
+- !!merge <<: *granite4
+  name: "ibm-granite.granite-4.0-1b"
+  urls:
+    - https://huggingface.co/DevQuasar/ibm-granite.granite-4.0-1b-GGUF
+  description: |
+    ### **Granite-4.0-1B**
+    *By IBM | Apache 2.0 License*
+
+    **Overview:**
+    Granite-4.0-1B is a lightweight, instruction-tuned language model designed for efficient on-device and research use. Built on a decoder-only dense transformer architecture, it delivers strong performance in instruction following, code generation, tool calling, and multilingual tasks—making it ideal for applications requiring low latency and minimal resource usage.
+
+    **Key Features:**
+    - **Size:** 1.6 billion parameters (1B Dense), optimized for efficiency.
+    - **Capabilities:**
+      - Text generation, summarization, question answering
+      - Code completion and function calling (e.g., API integration)
+      - Multilingual support (English, Spanish, French, German, Japanese, Chinese, Arabic, Korean, Portuguese, Italian, Dutch, Czech)
+      - Robust safety and alignment via instruction tuning and reinforcement learning
+    - **Architecture:** Uses GQA (Grouped Query Attention), SwiGLU activation, RMSNorm, shared input/output embeddings, and RoPE position embeddings.
+    - **Context Length:** Up to 128K tokens — suitable for long-form content and complex reasoning.
+    - **Training:** Finetuned from *Granite-4.0-1B-Base* using open-source datasets, synthetic data, and human-curated instruction pairs.
+
+    **Performance Highlights (1B Dense):**
+    - **MMLU (5-shot):** 59.39
+    - **HumanEval (pass@1):** 74
+    - **IFEval (Alignment):** 80.82
+    - **GSM8K (8-shot):** 76.35
+    - **SALAD-Bench (Safety):** 93.44
+
+    **Use Cases:**
+    - On-device AI applications
+    - Research and prototyping
+    - Fine-tuning for domain-specific tasks
+    - Low-resource environments with high performance expectations
+
+    **Resources:**
+    - [Hugging Face Model](https://huggingface.co/ibm-granite/granite-4.0-1b)
+    - [Granite Docs](https://www.ibm.com/granite/docs/)
+    - [GitHub Repository](https://github.com/ibm-granite/granite-4.0-nano-language-models)
+
+    > *“Make knowledge free for everyone.” – IBM Granite Team*
+  overrides:
+    parameters:
+      model: ibm-granite.granite-4.0-1b.Q4_K_M.gguf
+  files:
+    - filename: ibm-granite.granite-4.0-1b.Q4_K_M.gguf
+      sha256: 0e0ef42486b7f1f95dfe33af2e696df1149253e500c48f3fb8db0125afa2922c
+      uri: huggingface://DevQuasar/ibm-granite.granite-4.0-1b-GGUF/ibm-granite.granite-4.0-1b.Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "apollo-astralis-4b-i1"
+  urls:
+    - https://huggingface.co/mradermacher/apollo-astralis-4b-i1-GGUF
+  description: |
+    **Apollo-Astralis V1 4B**
+    *A warm, enthusiastic, and empathetic reasoning model built on Qwen3-4B-Thinking*
+
+    **Overview**
+    Apollo-Astralis V1 4B is a 4-billion-parameter conversational AI designed for collaborative, emotionally intelligent problem-solving. Developed by VANTA Research, it combines rigorous logical reasoning with a vibrant, supportive communication style—making it ideal for creative brainstorming, educational support, and personal development.
+
+    **Key Features**
+    - 🤔 **Explicit Reasoning**: Uses `</tool_call>` tags to break down thought processes step by step
+    - 💬 **Warm & Enthusiastic Tone**: Celebrates achievements with energy and empathy
+    - 🤝 **Collaborative Style**: Engages users with "we" language and clarifying questions
+    - 🔍 **High Accuracy**: Achieves 100% in enthusiasm detection and 90% in empathy recognition
+    - 🎯 **Fine-Tuned for Real-World Use**: Trained with LoRA on a dataset emphasizing emotional intelligence and consistency
+
+    **Base Model**
+    Built on **Qwen3-4B-Thinking** and enhanced with lightweight LoRA fine-tuning (33M trainable parameters).
+    Available in both full and quantized (GGUF) formats via Hugging Face and Ollama.
+
+    **Use Cases**
+    - Personal coaching & motivation
+    - Creative ideation & project planning
+    - Educational tutoring with emotional support
+    - Mental wellness conversations (complementary, not替代)
+
+    **License**
+    Apache 2.0 — open for research, commercial, and personal use.
+
+    **Try It**
+    👉 [Hugging Face Page](https://huggingface.co/VANTA-Research/apollo-astralis-v1-4b)
+    👉 [Ollama](https://ollama.com/vanta-research/apollo-astralis-v1-4b)
+
+    *Developed by VANTA Research — where reasoning meets warmth.*
+  overrides:
+    parameters:
+      model: apollo-astralis-4b.i1-Q4_K_M.gguf
+  files:
+    - filename: apollo-astralis-4b.i1-Q4_K_M.gguf
+      sha256: 94e1d371420b03710fc7de030c1c06e75a356d9388210a134ee2adb4792a2626
+      uri: huggingface://mradermacher/apollo-astralis-4b-i1-GGUF/apollo-astralis-4b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-vlto-32b-instruct-i1"
+  urls:
+    - https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF
+  description: |
+    **Model Name:** Qwen3-VL-32B-Instruct (Text-Only Variant: Qwen3-VLTO-32B-Instruct)
+    **Base Model:** Qwen/Qwen3-VL-32B-Instruct
+    **Repository:** [mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF](https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF)
+    **Type:** Large Language Model (LLM) – Text-Only (Vision-Language model stripped of vision components)
+    **Architecture:** Qwen3-VL, adapted for pure text generation
+    **Size:** 32 billion parameters
+    **License:** Apache 2.0
+    **Framework:** Hugging Face Transformers
+
+    ---
+
+    ### 🔍 **Description**
+
+    This is a **text-only variant** of the powerful **Qwen3-VL-32B-Instruct** multimodal model, stripped of its vision components to function as a high-performance pure language model. The model retains the full text understanding and generation capabilities of its parent — including strong reasoning, long-context handling (up to 32K+ tokens), and advanced multimodal training-derived coherence — while being optimized for text-only tasks.
+
+    It was created by loading the weights from the full Qwen3-VL-32B-Instruct model into a text-only Qwen3 architecture, preserving all linguistic and reasoning strengths without the need for image input.
+
+    Perfect for applications requiring deep reasoning, long-form content generation, code synthesis, and dialogue — with all the benefits of the Qwen3 series, now in a lightweight, text-focused form.
+
+    ---
+
+    ### 📌 Key Features
+
+    - ✅ **High-Performance Text Generation** – Built on top of the state-of-the-art Qwen3-VL architecture
+    - ✅ **Extended Context Length** – Supports up to 32,768 tokens (ideal for long documents and complex tasks)
+    - ✅ **Strong Reasoning & Planning** – Excels at logic, math, coding, and multi-step reasoning
+    - ✅ **Optimized for GGUF Format** – Available in multiple quantized versions (IQ3_M, Q2_K, etc.) for efficient inference on consumer hardware
+    - ✅ **Free to Use & Modify** – Apache 2.0 license
+
+    ---
+
+    ### 📦 Use Case Suggestions
+
+    - Long-form writing, summarization, and editing
+    - Code generation and debugging
+    - AI agents and task automation
+    - High-quality chat and dialogue systems
+    - Research and experimentation with large-scale LLMs on local devices
+
+    ---
+
+    ### 📚 References
+
+    - Original Model: [Qwen/Qwen3-VL-32B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct)
+    - Technical Report: [Qwen3 Technical Report (arXiv)](https://arxiv.org/abs/2505.09388)
+    - Quantization by: [mradermacher](https://huggingface.co/mradermacher)
+
+    > ✅ **Note**: The model shown here is **not the original vision-language model** — it's a **text-only conversion** of the Qwen3-VL-32B-Instruct model, ideal for pure language tasks.
+  overrides:
+    parameters:
+      model: Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
+  files:
+    - filename: Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
+      sha256: 789d55249614cd1acee1a23278133cd56ca898472259fa2261f77d65ed7f8367
+      uri: huggingface://mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF/Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-vlto-32b-thinking"
+  urls:
+    - https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Thinking-GGUF
+  description: |
+    **Model Name:** Qwen3-VLTO-32B-Thinking
+    **Model Type:** Large Language Model (Text-Only)
+    **Base Model:** Qwen/Qwen3-VL-32B-Thinking (vanilla Qwen3-VL-32B with vision components removed)
+    **Architecture:** Transformer-based, 32-billion parameter model optimized for reasoning and complex text generation.
+
+    ### Description:
+    Qwen3-VLTO-32B-Thinking is a pure text-only variant of the Qwen3-VL-32B-Thinking model, stripped of its vision capabilities while preserving the full reasoning and language understanding power. It is derived by transferring the weights from the vision-language model into a text-only transformer architecture, maintaining the same high-quality behavior for tasks such as logical reasoning, code generation, and dialogue.
+
+    This model is ideal for applications requiring deep linguistic reasoning and long-context understanding without image input. It supports advanced multimodal reasoning capabilities *in text form*—perfect for research, chatbots, and content generation.
+
+    ### Key Features:
+    - ✅ 32B parameters, high reasoning capability
+    - ✅ No vision components — fully text-only
+    - ✅ Trained for complex thinking and step-by-step reasoning
+    - ✅ Compatible with Hugging Face Transformers and GGUF inference tools
+    - ✅ Available in multiple quantization levels (Q2_K to Q8_0) for efficient deployment
+
+    ### Use Case:
+    Ideal for advanced text generation, logical inference, coding, and conversational AI where vision is not needed.
+
+    > 🔗 **Base Model**: [Qwen/Qwen3-VL-32B-Thinking](https://huggingface.co/Qwen/Qwen3-VL-32B-Thinking)
+    > 📦 **Quantized Versions**: Available via [mradermacher/Qwen3-VLTO-32B-Thinking-GGUF](https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Thinking-GGUF)
+
+    ---
+    *Note: The original model was created by Alibaba’s Qwen team. This variant was adapted by qingy2024 and quantized by mradermacher.*
+  overrides:
+    parameters:
+      model: Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
+  files:
+    - filename: Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
+      sha256: d88b75df7c40455dfa21ded23c8b25463a8d58418bb6296304052b7e70e96954
+      uri: huggingface://mradermacher/Qwen3-VLTO-32B-Thinking-GGUF/Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
+- !!merge <<: *gemma3
+  name: "gemma-3-the-grand-horror-27b"
+  urls:
+    - https://huggingface.co/DavidAU/Gemma-3-The-Grand-Horror-27B-GGUF
+  description: |
+    The **Gemma-3-The-Grand-Horror-27B-GGUF** model is a **fine-tuned version** of Google's **Gemma 3 27B** language model, specifically optimized for **extreme horror-themed text generation**. It was trained using the **Unsloth framework** on a custom in-house dataset of horror content, resulting in a model that produces vivid, graphic, and psychologically intense narratives—featuring gore, madness, and disturbing imagery—often even when prompts don't explicitly request horror.
+
+    Key characteristics:
+    - **Base Model**: Gemma 3 27B (original by Google, not the quantized version)
+    - **Fine-tuned For**: High-intensity horror storytelling, long-form narrative generation, and immersive scene creation
+    - **Use Case**: Creative writing, horror RP, dark fiction, and experimental storytelling
+    - **Not Suitable For**: General use, children, sensitive audiences, or content requiring neutral/positive tone
+    - **Quantization**: Available in GGUF format (e.g., q3k, q4, etc.), making it accessible for local inference on consumer hardware
+
+    > ✅ **Note**: The model card you see is for a **quantized, fine-tuned derivative**, not the original. The true base model is **Gemma 3 27B**, available at: https://huggingface.co/google/gemma-3-27b
+
+    This model is not for all audiences — it generates content with a consistently dark, unsettling tone. Use responsibly.
+  overrides:
+    parameters:
+      model: Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
+  files:
+    - filename: Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
+      sha256: 46f0b06b785d19804a1a796bec89a8eeba8a4e2ef21e2ab8dbb8fa2ff0d675b1
+      uri: huggingface://DavidAU/Gemma-3-The-Grand-Horror-27B-GGUF/Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-nemotron-32b-rlbff-i1"
+  urls:
+    - https://huggingface.co/mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF
+  description: |
+    **Model Name:** Qwen3-Nemotron-32B-RLBFF
+    **Base Model:** Qwen/Qwen3-32B
+    **Developer:** NVIDIA
+    **License:** NVIDIA Open Model License
+
+    **Description:**
+    Qwen3-Nemotron-32B-RLBFF is a high-performance, fine-tuned large language model built on the Qwen3-32B foundation. It is specifically optimized to generate high-quality, helpful responses in a default thinking mode through advanced reinforcement learning with binary flexible feedback (RLBFF). Trained on the HelpSteer3 dataset, this model excels in reasoning, planning, coding, and information-seeking tasks while maintaining strong safety and alignment with human preferences.
+
+    **Key Performance (as of Sep 2025):**
+    - **MT-Bench:** 9.50 (near GPT-4-Turbo level)
+    - **Arena Hard V2:** 55.6%
+    - **WildBench:** 70.33%
+
+    **Architecture & Efficiency:**
+    - 32 billion parameters, based on the Qwen3 Transformer architecture
+    - Designed for deployment on NVIDIA GPUs (Ampere, Hopper, Turing)
+    - Achieves performance comparable to DeepSeek R1 and O3-mini at less than 5% of the inference cost
+
+    **Use Case:**
+    Ideal for applications requiring reliable, thoughtful, and safe responses—such as advanced chatbots, research assistants, and enterprise AI systems.
+
+    **Access & Usage:**
+    Available on Hugging Face with support for Hugging Face Transformers and vLLM.
+    **Cite:** [Wang et al., 2025 — RLBFF: Binary Flexible Feedback](https://arxiv.org/abs/2509.21319)
+
+    👉 *Note: The GGUF version (mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF) is a user-quantized variant. The original model is available at nvidia/Qwen3-Nemotron-32B-RLBFF.*
+  overrides:
+    parameters:
+      model: Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
+      sha256: 000e8c65299fc232d1a832f1cae831ceaa16425eccfb7d01702d73e8bd3eafee
+      uri: huggingface://mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF/Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
+- !!merge <<: *gptoss
+  name: "financial-gpt-oss-20b-q8-i1"
+  urls:
+    - https://huggingface.co/mradermacher/financial-gpt-oss-20b-q8-i1-GGUF
+  description: |
+    ### **Financial GPT-OSS 20B (Base Model)**
+
+    **Model Type:** Causal Language Model (Fine-tuned for Financial Analysis)
+    **Architecture:** Mixture of Experts (MoE) – 20B parameters, 32 experts (4 active per token)
+    **Base Model:** `unsloth/gpt-oss-20b-unsloth-bnb-4bit`
+    **Fine-tuned With:** LoRA (Low-Rank Adaptation) on financial conversation data
+    **Training Data:** 22,250 financial dialogue pairs covering stocks (AAPL, NVDA, TSLA, etc.), technical analysis, risk assessment, and trading signals
+    **Context Length:** 131,072 tokens
+    **Quantization:** Q8_0 GGUF (for efficient inference)
+    **License:** Apache 2.0
+
+    **Key Features:**
+    - Specialized in financial market analysis: technical indicators (RSI, MACD), risk assessments, trading signals, and price forecasts
+    - Handles complex financial queries with structured, actionable insights
+    - Designed for real-time use with low-latency inference (GGUF format)
+    - Supports S&P 500 stocks and major asset classes across tech, healthcare, energy, and finance sectors
+
+    **Use Case:** Ideal for traders, analysts, and developers building financial AI tools. Use with caution—**not financial advice**.
+
+    **Citation:**
+    ```bibtex
+    @misc{financial-gpt-oss-20b-q8,
+      title={Financial GPT-OSS 20B Q8: Fine-tuned Financial Analysis Model},
+      author={beenyb},
+      year={2025},
+      publisher={Hugging Face Hub},
+      url={https://huggingface.co/beenyb/financial-gpt-oss-20b-q8}
+    }
+    ```
+  overrides:
+    parameters:
+      model: financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
+  files:
+    - filename: financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
+      sha256: 14586673de2a769f88bd51f88464b9b1f73d3ad986fa878b2e0c1473f1c1fc59
+      uri: huggingface://mradermacher/financial-gpt-oss-20b-q8-i1-GGUF/financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "qwen3-grand-horror-light-1.7b"
+  urls:
+    - https://huggingface.co/mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF
+  description: |
+    **Model Name:** Qwen3-Grand-Horror-Light-1.7B
+    **Base Model:** qingy2024/Qwen3-VLTO-1.7B-Instruct
+    **Model Type:** Fine-tuned Language Model (Text Generation)
+    **Size:** 1.7B parameters
+    **License:** Apache 2.0
+    **Language:** English
+    **Use Case:** Horror storytelling, creative writing, roleplay, scene generation
+    **Fine-Tuned On:** Custom horror dataset (`DavidAU/horror-nightmare1`)
+    **Training Method:** Fine-tuned via Unsloth
+    **Key Features:**
+    - Specialized in generating atmospheric, intense horror content with elements of madness, gore, and suspense
+    - Optimized for roleplay and narrative generation with low to medium horror intensity
+    - Supports high-quality output across multiple quantization levels (Q2_K to Q8_0, f16)
+    - Designed for use with tools like KoboldCpp, oobabooga/text-generation-webui, and Silly Tavern
+    - Recommended settings: Temperature 0.4–1.2, Repetition penalty 1.1, Smoothing factor 1.5 for smoother output
+
+    **Note:** This model is a fine-tuned variant of the Qwen3 series, not a quantized version. The original base model is available at [qingy2024/Qwen3-VLTO-1.7B-Instruct](https://huggingface.co/qingy2024/Qwen3-VLTO-1.7B-Instruct) and was further adapted for horror-themed creative writing.
+
+    **Ideal For:** Creators, writers, and roleplayers seeking a compact, expressive model for immersive horror storytelling.
+  overrides:
+    parameters:
+      model: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
+  files:
+    - filename: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
+      sha256: cbbb0c5f6874130a8ae253377fdc7ad25fa2c1e9bb45f1aaad88db853ef985dc
+      uri: huggingface://mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF/Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
--- a/gallery/qwen3.yaml
+++ b/gallery/qwen3.yaml
@@ -6,15 +6,20 @@ config_file: |
  backend: "llama-cpp"
  template:
    chat_message: |
-      <|im_start|>{{ .RoleName }}
-      {{ if .FunctionCall -}}
-      {{ else if eq .RoleName "tool" -}}
+      <|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
+      {{ if eq .RoleName "tool" -}}
+      <tool_response>
      {{ end -}}
      {{ if .Content -}}
      {{.Content }}
      {{ end -}}
+      {{ if eq .RoleName "tool" -}}
+      </tool_response>
+      {{ end -}}
      {{ if .FunctionCall -}}
+      <tool_call>
      {{toJson .FunctionCall}}
+      </tool_call>
      {{ end -}}<|im_end|>
    function: |
      <|im_start|>system
Author	SHA1	Message	Date
LocalAI [bot]	9ecfdc5938	chore: ⬆️ Update ggml-org/llama.cpp to `31c511a968348281e11d590446bb815048a1e912` (#6970 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-31 21:04:53 +00:00
Ettore Di Giacinto	c332ef5cce	chore: fix linting issues Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 19:08:34 +01:00
Ettore Di Giacinto	6e7a8c6041	chore(model gallery): add qwen3-vl-2b-instruct (#6967 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 19:04:10 +01:00
Ettore Di Giacinto	43e707ec4f	chore(model gallery): add qwen3-vl-2b-thinking (#6966 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 19:03:23 +01:00
Ettore Di Giacinto	fed3663a74	chore(model gallery): add qwen3-vl-4b-thinking (#6965 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 19:02:22 +01:00
Ettore Di Giacinto	5b72798db3	chore(model gallery): add qwen3-vl-32b-instruct (#6964 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 19:01:11 +01:00
Ettore Di Giacinto	d24d6d4e93	chore(model gallery): add qwen3-vl-4b-instruct (#6963 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 18:57:50 +01:00
Ettore Di Giacinto	50ee1fbe06	chore(model gallery): add qwen3-vl-30b-a3b-thinking (#6962 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 18:53:13 +01:00
Ettore Di Giacinto	19f3425ce0	chore(model gallery): add huihui-qwen3-vl-30b-a3b-instruct-abliterated (#6961 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 18:46:25 +01:00
Ettore Di Giacinto	a6ef245534	chore(model gallery): add qwen3-vl-30b-a3b-instruct (#6960 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-31 18:37:12 +01:00
LocalAI [bot]	88cb379c2d	chore(model gallery): 🤖 add 1 new models via gallery agent (#6940 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-31 16:57:18 +01:00
LocalAI [bot]	0ddb2e8dcf	chore: ⬆️ Update ggml-org/llama.cpp to `4146d6a1a6228711a487a1e3e9ddd120f8d027d7` (#6945 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-31 14:51:03 +00:00
Ettore Di Giacinto	91b9301bec	Rename workflow from 'Bump dependencies' to 'Bump Documentation' Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-10-31 14:40:50 +01:00
Ettore Di Giacinto	fad5868f7b	Rename job to 'bump-backends' in workflow Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-10-31 14:40:34 +01:00
LocalAI [bot]	1e5b9135df	chore: ⬆️ Update ggml-org/llama.cpp to `16724b5b6836a2d4b8936a5824d2ff27c52b4517` (#6925 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 21:07:33 +00:00
LocalAI [bot]	36d19e23e0	chore(model gallery): 🤖 add 1 new models via gallery agent (#6921 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 18:58:08 +01:00
LocalAI [bot]	cba9d1aac0	chore(model gallery): 🤖 add 1 new models via gallery agent (#6919 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 17:26:18 +01:00
LocalAI [bot]	dd21a0d2f9	chore: ⬆️ Update ggml-org/llama.cpp to `3464bdac37027c5e9661621fc75ffcef3c19c6ef` (#6896 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 14:17:58 +01:00
LocalAI [bot]	302a43b3ae	chore(model gallery): 🤖 add 1 new models via gallery agent (#6911 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 09:54:24 +01:00
LocalAI [bot]	2955061b42	chore(model gallery): 🤖 add 1 new models via gallery agent (#6910 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 09:39:31 +01:00
LocalAI [bot]	84644ab693	chore(model gallery): 🤖 add 1 new models via gallery agent (#6908 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-30 09:20:23 +01:00
Ettore Di Giacinto	b8f40dde1e	feat: do also text match (#6891 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-10-29 17:18:56 +01:00
LocalAI [bot]	a6c9789a54	chore(model gallery): 🤖 add 1 new models via gallery agent (#6884 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-29 10:56:57 +01:00
LocalAI [bot]	a48d9ce27c	chore(model gallery): 🤖 add 1 new models via gallery agent (#6879 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-29 08:19:51 +01:00
LocalAI [bot]	fb825a2708	chore: ⬆️ Update ggml-org/llama.cpp to `851553ea6b24cb39fd5fd188b437d777cb411de8` (#6869 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-29 08:16:55 +01:00
LocalAI [bot]	5558dce449	chore: ⬆️ Update ggml-org/whisper.cpp to `c62adfbd1ecdaea9e295c72d672992514a2d887c` (#6868 ) ⬆️ Update ggml-org/whisper.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-28 21:12:05 +00:00
LocalAI [bot]	cf74a11e65	chore(model gallery): 🤖 add 1 new models via gallery agent (#6864 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-28 17:20:57 +01:00
LocalAI [bot]	86b5deec81	chore(model gallery): 🤖 add 1 new models via gallery agent (#6863 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-10-28 16:23:57 +01:00