mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 03:32:40 -05:00
Compare commits
28 Commits
feat/stats
...
v3.7.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ecfdc5938 | ||
|
|
c332ef5cce | ||
|
|
6e7a8c6041 | ||
|
|
43e707ec4f | ||
|
|
fed3663a74 | ||
|
|
5b72798db3 | ||
|
|
d24d6d4e93 | ||
|
|
50ee1fbe06 | ||
|
|
19f3425ce0 | ||
|
|
a6ef245534 | ||
|
|
88cb379c2d | ||
|
|
0ddb2e8dcf | ||
|
|
91b9301bec | ||
|
|
fad5868f7b | ||
|
|
1e5b9135df | ||
|
|
36d19e23e0 | ||
|
|
cba9d1aac0 | ||
|
|
dd21a0d2f9 | ||
|
|
302a43b3ae | ||
|
|
2955061b42 | ||
|
|
84644ab693 | ||
|
|
b8f40dde1e | ||
|
|
a6c9789a54 | ||
|
|
a48d9ce27c | ||
|
|
fb825a2708 | ||
|
|
5558dce449 | ||
|
|
cf74a11e65 | ||
|
|
86b5deec81 |
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -1,10 +1,10 @@
|
||||
name: Bump dependencies
|
||||
name: Bump Backend dependencies
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
bump:
|
||||
bump-backends:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
4
.github/workflows/bump_docs.yaml
vendored
4
.github/workflows/bump_docs.yaml
vendored
@@ -1,10 +1,10 @@
|
||||
name: Bump dependencies
|
||||
name: Bump Documentation
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
bump:
|
||||
bump-docs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=5a4ff43e7dd049e35942bc3d12361dab2f155544
|
||||
LLAMA_VERSION?=31c511a968348281e11d590446bb815048a1e912
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=f16c12f3f55f5bd3d6ac8cf2f31ab90a42c884d5
|
||||
WHISPER_CPP_VERSION?=c62adfbd1ecdaea9e295c72d672992514a2d887c
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -61,12 +61,15 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
|
||||
term = strings.ToLower(term)
|
||||
for _, m := range gm {
|
||||
if fuzzy.Match(term, strings.ToLower(m.GetName())) ||
|
||||
fuzzy.Match(term, strings.ToLower(m.GetDescription())) ||
|
||||
fuzzy.Match(term, strings.ToLower(m.GetGallery().Name)) ||
|
||||
strings.Contains(strings.ToLower(m.GetName()), term) ||
|
||||
strings.Contains(strings.ToLower(m.GetDescription()), term) ||
|
||||
strings.Contains(strings.ToLower(m.GetGallery().Name), term) ||
|
||||
strings.Contains(strings.ToLower(strings.Join(m.GetTags(), ",")), term) {
|
||||
filteredModels = append(filteredModels, m)
|
||||
}
|
||||
}
|
||||
|
||||
return filteredModels
|
||||
}
|
||||
|
||||
|
||||
@@ -128,7 +128,6 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
router.Use(recover.New())
|
||||
}
|
||||
|
||||
// OpenTelemetry metrics for Prometheus export
|
||||
if !application.ApplicationConfig().DisableMetrics {
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
@@ -142,7 +141,6 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Health Checks should always be exempt from auth, so register these first
|
||||
routes.HealthRoutes(router)
|
||||
|
||||
@@ -204,28 +202,12 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
|
||||
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
|
||||
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
|
||||
// Create metrics store for tracking usage (before API routes registration)
|
||||
metricsStore := services.NewInMemoryMetricsStore()
|
||||
|
||||
// Add metrics middleware BEFORE API routes so it can intercept them
|
||||
router.Use(middleware.MetricsMiddleware(metricsStore))
|
||||
|
||||
// Register cleanup on shutdown
|
||||
router.Hooks().OnShutdown(func() error {
|
||||
metricsStore.Stop()
|
||||
log.Info().Msg("Metrics store stopped")
|
||||
return nil
|
||||
})
|
||||
|
||||
// Create opcache for tracking UI operations
|
||||
opcache := services.NewOpCache(application.GalleryService())
|
||||
routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, metricsStore)
|
||||
routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
|
||||
routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
|
||||
}
|
||||
|
||||
routes.RegisterJINARoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
// Define a custom 404 handler
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// SettingsEndpoint handles the settings page which shows detailed model/backend management
|
||||
func SettingsEndpoint(appConfig *config.ApplicationConfig,
|
||||
cl *config.ModelConfigLoader, ml *model.ModelLoader, opcache *services.OpCache) func(*fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelConfigs := cl.GetAllModelsConfigs()
|
||||
galleryConfigs := map[string]*gallery.ModelConfig{}
|
||||
|
||||
installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range modelConfigs {
|
||||
cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
galleryConfigs[m.Name] = cfg
|
||||
}
|
||||
|
||||
loadedModels := ml.ListLoadedModels()
|
||||
loadedModelsMap := map[string]bool{}
|
||||
for _, m := range loadedModels {
|
||||
loadedModelsMap[m.ID] = true
|
||||
}
|
||||
|
||||
modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
|
||||
|
||||
// Get model statuses to display in the UI the operation in progress
|
||||
processingModels, taskTypes := opcache.GetStatus()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Settings & Management",
|
||||
"Version": internal.PrintableVersion(),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"Models": modelsWithoutConfig,
|
||||
"ModelsConfig": modelConfigs,
|
||||
"GalleryConfig": galleryConfigs,
|
||||
"ApplicationConfig": appConfig,
|
||||
"ProcessingModels": processingModels,
|
||||
"TaskTypes": taskTypes,
|
||||
"LoadedModels": loadedModelsMap,
|
||||
"InstalledBackends": installedBackends,
|
||||
}
|
||||
|
||||
// Render settings page
|
||||
return c.Render("views/settings", summary)
|
||||
}
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// MetricsMiddleware creates a middleware that tracks API usage metrics
|
||||
// Note: Uses CONTEXT_LOCALS_KEY_MODEL_NAME constant defined in request.go
|
||||
func MetricsMiddleware(metricsStore services.MetricsStore) fiber.Handler {
|
||||
return func(c *fiber.Ctx) error {
|
||||
path := c.Path()
|
||||
|
||||
// Skip tracking for UI routes, static files, and non-API endpoints
|
||||
if shouldSkipMetrics(path) {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Record start time
|
||||
start := time.Now()
|
||||
|
||||
// Get endpoint category
|
||||
endpoint := categorizeEndpoint(path)
|
||||
|
||||
// Continue with the request
|
||||
err := c.Next()
|
||||
|
||||
// Record metrics after request completes
|
||||
duration := time.Since(start)
|
||||
success := err == nil && c.Response().StatusCode() < 400
|
||||
|
||||
// Extract model name from context (set by RequestExtractor middleware)
|
||||
// Use the same constant as RequestExtractor
|
||||
model := "unknown"
|
||||
if modelVal, ok := c.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string); ok && modelVal != "" {
|
||||
model = modelVal
|
||||
log.Debug().Str("model", model).Str("endpoint", endpoint).Msg("Recording metrics for request")
|
||||
} else {
|
||||
// Fallback: try to extract from path params or query
|
||||
model = extractModelFromRequest(c)
|
||||
log.Debug().Str("model", model).Str("endpoint", endpoint).Msg("Recording metrics for request (fallback)")
|
||||
}
|
||||
|
||||
// Extract backend from response headers if available
|
||||
backend := string(c.Response().Header.Peek("X-LocalAI-Backend"))
|
||||
|
||||
// Record the request
|
||||
metricsStore.RecordRequest(endpoint, model, backend, success, duration)
|
||||
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// shouldSkipMetrics determines if a request should be excluded from metrics
|
||||
func shouldSkipMetrics(path string) bool {
|
||||
// Skip UI routes
|
||||
skipPrefixes := []string{
|
||||
"/views/",
|
||||
"/static/",
|
||||
"/browse/",
|
||||
"/chat/",
|
||||
"/text2image/",
|
||||
"/tts/",
|
||||
"/talk/",
|
||||
"/models/edit/",
|
||||
"/import-model",
|
||||
"/settings",
|
||||
"/api/models", // UI API endpoints
|
||||
"/api/backends", // UI API endpoints
|
||||
"/api/operations", // UI API endpoints
|
||||
"/api/p2p", // UI API endpoints
|
||||
"/api/metrics", // Metrics API itself
|
||||
}
|
||||
|
||||
for _, prefix := range skipPrefixes {
|
||||
if strings.HasPrefix(path, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Also skip root path and other UI pages
|
||||
if path == "/" || path == "/index" {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// categorizeEndpoint maps request paths to friendly endpoint categories
|
||||
func categorizeEndpoint(path string) string {
|
||||
// OpenAI-compatible endpoints
|
||||
if strings.HasPrefix(path, "/v1/chat/completions") || strings.HasPrefix(path, "/chat/completions") {
|
||||
return "chat"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/completions") || strings.HasPrefix(path, "/completions") {
|
||||
return "completions"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/embeddings") || strings.HasPrefix(path, "/embeddings") {
|
||||
return "embeddings"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/images/generations") || strings.HasPrefix(path, "/images/generations") {
|
||||
return "image-generation"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/audio/transcriptions") || strings.HasPrefix(path, "/audio/transcriptions") {
|
||||
return "transcriptions"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/audio/speech") || strings.HasPrefix(path, "/audio/speech") {
|
||||
return "text-to-speech"
|
||||
}
|
||||
if strings.HasPrefix(path, "/v1/models") || strings.HasPrefix(path, "/models") {
|
||||
return "models"
|
||||
}
|
||||
|
||||
// LocalAI-specific endpoints
|
||||
if strings.HasPrefix(path, "/v1/internal") {
|
||||
return "internal"
|
||||
}
|
||||
if strings.Contains(path, "/tts") {
|
||||
return "text-to-speech"
|
||||
}
|
||||
if strings.Contains(path, "/stt") || strings.Contains(path, "/whisper") {
|
||||
return "speech-to-text"
|
||||
}
|
||||
if strings.Contains(path, "/sound-generation") {
|
||||
return "sound-generation"
|
||||
}
|
||||
|
||||
// Default to the first path segment
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) > 0 {
|
||||
return parts[0]
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// extractModelFromRequest attempts to extract the model name from the request
|
||||
func extractModelFromRequest(c *fiber.Ctx) string {
|
||||
// Try query parameter first
|
||||
model := c.Query("model")
|
||||
if model != "" {
|
||||
return model
|
||||
}
|
||||
|
||||
// Try to extract from JSON body for POST requests
|
||||
if c.Method() == fiber.MethodPost {
|
||||
// Read body
|
||||
bodyBytes := c.Body()
|
||||
if len(bodyBytes) > 0 {
|
||||
// Parse JSON
|
||||
var reqBody map[string]interface{}
|
||||
if err := json.Unmarshal(bodyBytes, &reqBody); err == nil {
|
||||
if modelVal, ok := reqBody["model"]; ok {
|
||||
if modelStr, ok := modelVal.(string); ok {
|
||||
return modelStr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try path parameter for endpoints like /models/:model
|
||||
model = c.Params("model")
|
||||
if model != "" {
|
||||
return model
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
@@ -127,10 +127,6 @@ func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIR
|
||||
log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
|
||||
input.ModelName(&localModelName)
|
||||
}
|
||||
} else {
|
||||
// Update context locals with the model name from the request body
|
||||
// This ensures downstream middleware (like metrics) can access it
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, input.ModelName(nil))
|
||||
}
|
||||
|
||||
cfg, err := re.modelConfigLoader.LoadModelConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
|
||||
|
||||
@@ -23,9 +23,6 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
|
||||
|
||||
// Settings page - detailed model/backend management
|
||||
app.Get("/settings", localai.SettingsEndpoint(appConfig, cl, ml, processingOps))
|
||||
|
||||
// P2P
|
||||
app.Get("/p2p", func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
|
||||
@@ -18,7 +18,7 @@ import (
|
||||
)
|
||||
|
||||
// RegisterUIAPIRoutes registers JSON API routes for the web UI
|
||||
func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, metricsStore services.MetricsStore) {
|
||||
func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) {
|
||||
|
||||
// Operations API - Get all current operations (models + backends)
|
||||
app.Get("/api/operations", func(c *fiber.Ctx) error {
|
||||
@@ -716,104 +716,4 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// Metrics API endpoints
|
||||
if metricsStore != nil {
|
||||
// Get metrics summary
|
||||
app.Get("/api/metrics/summary", func(c *fiber.Ctx) error {
|
||||
endpointStats := metricsStore.GetEndpointStats()
|
||||
modelStats := metricsStore.GetModelStats()
|
||||
backendStats := metricsStore.GetBackendStats()
|
||||
|
||||
// Get top 5 models
|
||||
type modelStat struct {
|
||||
Name string `json:"name"`
|
||||
Count int64 `json:"count"`
|
||||
}
|
||||
topModels := make([]modelStat, 0)
|
||||
for model, count := range modelStats {
|
||||
topModels = append(topModels, modelStat{Name: model, Count: count})
|
||||
}
|
||||
sort.Slice(topModels, func(i, j int) bool {
|
||||
return topModels[i].Count > topModels[j].Count
|
||||
})
|
||||
if len(topModels) > 5 {
|
||||
topModels = topModels[:5]
|
||||
}
|
||||
|
||||
// Get top 5 endpoints
|
||||
type endpointStat struct {
|
||||
Name string `json:"name"`
|
||||
Count int64 `json:"count"`
|
||||
}
|
||||
topEndpoints := make([]endpointStat, 0)
|
||||
for endpoint, count := range endpointStats {
|
||||
topEndpoints = append(topEndpoints, endpointStat{Name: endpoint, Count: count})
|
||||
}
|
||||
sort.Slice(topEndpoints, func(i, j int) bool {
|
||||
return topEndpoints[i].Count > topEndpoints[j].Count
|
||||
})
|
||||
if len(topEndpoints) > 5 {
|
||||
topEndpoints = topEndpoints[:5]
|
||||
}
|
||||
|
||||
return c.JSON(fiber.Map{
|
||||
"totalRequests": metricsStore.GetTotalRequests(),
|
||||
"successRate": metricsStore.GetSuccessRate(),
|
||||
"topModels": topModels,
|
||||
"topEndpoints": topEndpoints,
|
||||
"topBackends": backendStats,
|
||||
})
|
||||
})
|
||||
|
||||
// Get endpoint statistics
|
||||
app.Get("/api/metrics/endpoints", func(c *fiber.Ctx) error {
|
||||
stats := metricsStore.GetEndpointStats()
|
||||
return c.JSON(fiber.Map{
|
||||
"endpoints": stats,
|
||||
})
|
||||
})
|
||||
|
||||
// Get model statistics
|
||||
app.Get("/api/metrics/models", func(c *fiber.Ctx) error {
|
||||
stats := metricsStore.GetModelStats()
|
||||
return c.JSON(fiber.Map{
|
||||
"models": stats,
|
||||
})
|
||||
})
|
||||
|
||||
// Get backend statistics
|
||||
app.Get("/api/metrics/backends", func(c *fiber.Ctx) error {
|
||||
stats := metricsStore.GetBackendStats()
|
||||
return c.JSON(fiber.Map{
|
||||
"backends": stats,
|
||||
})
|
||||
})
|
||||
|
||||
// Get time series data
|
||||
app.Get("/api/metrics/timeseries", func(c *fiber.Ctx) error {
|
||||
// Default to last 24 hours
|
||||
hours := 24
|
||||
if hoursParam := c.Query("hours"); hoursParam != "" {
|
||||
if h, err := strconv.Atoi(hoursParam); err == nil && h > 0 {
|
||||
hours = h
|
||||
}
|
||||
}
|
||||
|
||||
timeSeries := metricsStore.GetRequestsOverTime(hours)
|
||||
return c.JSON(fiber.Map{
|
||||
"timeseries": timeSeries,
|
||||
"hours": hours,
|
||||
})
|
||||
})
|
||||
|
||||
// Reset metrics (optional - for testing/admin purposes)
|
||||
app.Post("/api/metrics/reset", func(c *fiber.Ctx) error {
|
||||
metricsStore.Reset()
|
||||
return c.JSON(fiber.Map{
|
||||
"success": true,
|
||||
"message": "Metrics reset successfully",
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,9 +22,6 @@
|
||||
<a href="./" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
|
||||
<i class="fas fa-home text-[#38BDF8] mr-2 group-hover:scale-110 transition-transform"></i>Home
|
||||
</a>
|
||||
<a href="settings" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
|
||||
<i class="fas fa-cog text-[#8B5CF6] mr-2 group-hover:scale-110 transition-transform"></i>Settings
|
||||
</a>
|
||||
<a href="browse/" class="text-[#94A3B8] hover:text-[#E5E7EB] px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[#1E293B] hover:shadow-[0_0_12px_rgba(56,189,248,0.15)] flex items-center group">
|
||||
<i class="fas fa-brain text-[#38BDF8] mr-2 group-hover:scale-110 transition-transform"></i>Models
|
||||
</a>
|
||||
@@ -58,9 +55,6 @@
|
||||
<a href="./" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
|
||||
<i class="fas fa-home text-[#38BDF8] mr-3 w-5 text-center"></i>Home
|
||||
</a>
|
||||
<a href="settings" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
|
||||
<i class="fas fa-cog text-[#8B5CF6] mr-3 w-5 text-center"></i>Settings
|
||||
</a>
|
||||
<a href="browse/" class="block text-[#94A3B8] hover:text-[#E5E7EB] hover:bg-[#1E293B] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
|
||||
<i class="fas fa-brain text-[#38BDF8] mr-3 w-5 text-center"></i>Models
|
||||
</a>
|
||||
|
||||
@@ -1,609 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
|
||||
<body class="bg-[#101827] text-[#E5E7EB]">
|
||||
<div class="flex flex-col min-h-screen" x-data="indexDashboard()">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
|
||||
<!-- Notifications -->
|
||||
<div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
|
||||
<template x-for="notification in notifications" :key="notification.id">
|
||||
<div x-show="true"
|
||||
x-transition:enter="transform ease-out duration-300 transition"
|
||||
x-transition:enter-start="translate-x-full opacity-0"
|
||||
x-transition:enter-end="translate-x-0 opacity-100"
|
||||
x-transition:leave="transform ease-in duration-200 transition"
|
||||
x-transition:leave-start="translate-x-0 opacity-100"
|
||||
x-transition:leave-end="translate-x-full opacity-0"
|
||||
:class="notification.type === 'error' ? 'bg-red-500' : 'bg-green-500'"
|
||||
class="rounded-lg shadow-xl p-4 text-white flex items-start space-x-3">
|
||||
<div class="flex-shrink-0">
|
||||
<i :class="notification.type === 'error' ? 'fas fa-exclamation-circle' : 'fas fa-check-circle'" class="text-xl"></i>
|
||||
</div>
|
||||
<div class="flex-1 min-w-0">
|
||||
<p class="text-sm font-medium break-words" x-text="notification.message"></p>
|
||||
</div>
|
||||
<button @click="dismissNotification(notification.id)" class="flex-shrink-0 text-white hover:text-gray-200">
|
||||
<i class="fas fa-times"></i>
|
||||
</button>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<div class="container mx-auto px-4 py-8 flex-grow">
|
||||
<!-- Hero Section -->
|
||||
<div class="relative bg-[#1E293B] border border-[#38BDF8]/20 rounded-3xl shadow-2xl shadow-[#38BDF8]/10 p-8 mb-12 overflow-hidden">
|
||||
<!-- Background Pattern -->
|
||||
<div class="absolute inset-0 opacity-10">
|
||||
<div class="absolute inset-0 bg-gradient-to-r from-[#38BDF8]/20 to-[#8B5CF6]/20"></div>
|
||||
<div class="absolute top-0 left-0 w-full h-full" style="background-image: radial-gradient(circle at 1px 1px, rgba(56,189,248,0.15) 1px, transparent 0); background-size: 20px 20px;"></div>
|
||||
</div>
|
||||
|
||||
<div class="relative max-w-5xl mx-auto text-center">
|
||||
<h1 class="text-5xl md:text-6xl font-bold text-[#E5E7EB] mb-6">
|
||||
<span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] via-[#8B5CF6] to-[#38BDF8]">
|
||||
Settings & Management
|
||||
</span>
|
||||
</h1>
|
||||
<p class="text-xl md:text-2xl text-[#94A3B8] mb-8 font-light">Manage your models, backends, and system configuration</p>
|
||||
|
||||
<div class="flex flex-wrap justify-center gap-4">
|
||||
<a href="/"
|
||||
class="group relative inline-flex items-center bg-gray-600 hover:bg-gray-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(75,85,99,0.4)]">
|
||||
<i class="fas fa-home mr-3 text-lg"></i>
|
||||
<span>Back to Dashboard</span>
|
||||
<i class="fas fa-arrow-left ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
|
||||
</a>
|
||||
|
||||
<a href="https://localai.io" target="_blank"
|
||||
class="group relative inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(56,189,248,0.4)]">
|
||||
<i class="fas fa-book-reader mr-3 text-lg"></i>
|
||||
<span>Documentation</span>
|
||||
<i class="fas fa-external-link-alt ml-3 text-sm opacity-70 group-hover:opacity-100 transition-opacity"></i>
|
||||
</a>
|
||||
|
||||
<a href="browse"
|
||||
class="group relative inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
|
||||
<i class="fas fa-images mr-3 text-lg"></i>
|
||||
<span>Model Gallery</span>
|
||||
<i class="fas fa-arrow-right ml-3 opacity-0 group-hover:opacity-100 group-hover:translate-x-1 transition-all duration-300"></i>
|
||||
</a>
|
||||
|
||||
<a href="/import-model"
|
||||
class="group relative inline-flex items-center bg-green-600 hover:bg-green-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(34,197,94,0.4)]">
|
||||
<i class="fas fa-plus mr-3 text-lg"></i>
|
||||
<span>Import Model</span>
|
||||
<i class="fas fa-upload ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
|
||||
</a>
|
||||
|
||||
<button id="reload-models-btn"
|
||||
class="group relative inline-flex items-center bg-orange-600 hover:bg-orange-700 text-white py-3 px-8 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-[0_0_20px_rgba(234,88,12,0.4)]">
|
||||
<i class="fas fa-sync-alt mr-3 text-lg"></i>
|
||||
<span>Update Models</span>
|
||||
<i class="fas fa-refresh ml-3 opacity-70 group-hover:opacity-100 transition-opacity"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Models Section -->
|
||||
<div class="models mt-8">
|
||||
{{template "views/partials/inprogress" .}}
|
||||
|
||||
{{ if eq (len .ModelsConfig) 0 }}
|
||||
<!-- No Models State -->
|
||||
<div class="relative bg-[#1E293B]/80 border border-[#38BDF8]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
|
||||
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-yellow-500/5 to-orange-500/5"></div>
|
||||
<div class="relative text-center max-w-4xl mx-auto">
|
||||
<div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-yellow-500/10 border border-yellow-500/20 mb-6">
|
||||
<i class="text-yellow-400 text-3xl fas fa-robot"></i>
|
||||
</div>
|
||||
<h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">No models installed yet</h2>
|
||||
<p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">Get started by installing models from the gallery or check our documentation for guidance</p>
|
||||
|
||||
<div class="flex flex-wrap justify-center gap-4 mb-8">
|
||||
<a href="browse" class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(56,189,248,0.4)]">
|
||||
<i class="fas fa-images mr-2"></i>
|
||||
Browse Gallery
|
||||
</a>
|
||||
<a href="https://localai.io/basics/getting_started/" class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
|
||||
<i class="fas fa-book mr-2"></i>
|
||||
Documentation
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{{ if ne (len .Models) 0 }}
|
||||
<div class="mt-12 pt-8 border-t border-[#38BDF8]/20">
|
||||
<h3 class="text-2xl font-bold text-[#E5E7EB] mb-6">Detected Model Files</h3>
|
||||
<p class="text-[#94A3B8] mb-6">These models were found but don't have configuration files yet</p>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{{ range .Models }}
|
||||
<div class="bg-[#101827] border border-[#38BDF8]/20 rounded-xl p-4 flex items-center hover:border-[#38BDF8]/50 transition-all duration-300 hover:shadow-[0_0_12px_rgba(56,189,248,0.15)]">
|
||||
<div class="w-10 h-10 rounded-lg bg-[#1E293B] flex items-center justify-center mr-3">
|
||||
<i class="fas fa-brain text-[#38BDF8]"></i>
|
||||
</div>
|
||||
<div class="flex-1">
|
||||
<p class="font-semibold text-[#E5E7EB] truncate">{{.}}</p>
|
||||
<p class="text-xs text-[#94A3B8]">No configuration</p>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
{{ else }}
|
||||
<!-- Models Grid -->
|
||||
{{ $modelsN := len .ModelsConfig}}
|
||||
{{ $modelsN = add $modelsN (len .Models)}}
|
||||
<div class="mb-8 flex flex-col md:flex-row md:items-center md:justify-between">
|
||||
<div class="mb-4 md:mb-0">
|
||||
<h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-2">
|
||||
Installed Models
|
||||
</h2>
|
||||
<p class="text-[#94A3B8]">
|
||||
<span class="text-[#38BDF8] font-semibold">{{$modelsN}}</span> model{{if gt $modelsN 1}}s{{end}} ready to use
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6">
|
||||
{{$galleryConfig:=.GalleryConfig}}
|
||||
{{ $loadedModels := .LoadedModels }}
|
||||
{{$noicon:="https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"}}
|
||||
|
||||
{{ range .ModelsConfig }}
|
||||
{{ $backendCfg := . }}
|
||||
{{ $cfg:= index $galleryConfig .Name}}
|
||||
<div class="group relative bg-[#1E293B] border border-[#38BDF8]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_20px_rgba(56,189,248,0.2)] hover:-translate-y-2 hover:border-[#38BDF8]/50">
|
||||
<!-- Card Header -->
|
||||
<div class="relative p-6 border-b border-[#101827]">
|
||||
<div class="flex items-start space-x-4">
|
||||
<div class="relative w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center group-hover:scale-110 transition-transform duration-300">
|
||||
<img {{ if and $cfg $cfg.Icon }}
|
||||
src="{{$cfg.Icon}}"
|
||||
{{ else }}
|
||||
src="{{$noicon}}"
|
||||
{{ end }}
|
||||
class="w-full h-full object-contain"
|
||||
alt="{{.Name}} icon"
|
||||
>
|
||||
{{ if index $loadedModels .Name }}
|
||||
<div class="absolute -top-1 -right-1 w-4 h-4 bg-green-500 rounded-full border-2 border-[#1E293B] animate-pulse"></div>
|
||||
{{ end }}
|
||||
</div>
|
||||
|
||||
<div class="flex-1 min-w-0">
|
||||
<div class="flex items-center justify-between">
|
||||
<h3 class="font-bold text-xl text-[#E5E7EB] truncate group-hover:text-[#38BDF8] transition-colors">{{.Name}}</h3>
|
||||
</div>
|
||||
|
||||
<div class="mt-2 flex flex-wrap gap-2">
|
||||
{{ if .Backend }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#38BDF8]/20 text-[#38BDF8] border border-[#38BDF8]/30">
|
||||
<i class="fas fa-cog mr-1"></i>{{.Backend}}
|
||||
</span>
|
||||
{{ else }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-yellow-500/10 text-yellow-300 border border-yellow-500/30">
|
||||
<i class="fas fa-magic mr-1"></i>Auto
|
||||
</span>
|
||||
{{ end }}
|
||||
|
||||
{{ if and $backendCfg (or (ne $backendCfg.MCP.Servers "") (ne $backendCfg.MCP.Stdio "")) }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#8B5CF6]/20 text-[#8B5CF6] border border-[#8B5CF6]/30">
|
||||
<i class="fas fa-plug mr-1"></i>MCP
|
||||
</span>
|
||||
{{ end }}
|
||||
|
||||
{{ if index $loadedModels .Name }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-green-500/10 text-green-300 border border-green-500/30">
|
||||
<i class="fas fa-play mr-1"></i>Running
|
||||
</span>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Usage Buttons -->
|
||||
<div class="p-6">
|
||||
<div class="flex flex-wrap gap-2 mb-4">
|
||||
{{ range .KnownUsecaseStrings }}
|
||||
{{ if eq . "FLAG_CHAT" }}
|
||||
<a href="chat/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/chat inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-[#38BDF8] hover:bg-[#38BDF8]/90 text-[#101827] transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(56,189,248,0.4)]">
|
||||
<i class="fas fa-comment-alt mr-2 group-hover/chat:animate-bounce"></i>
|
||||
Chat
|
||||
</a>
|
||||
{{ end }}
|
||||
{{ if eq . "FLAG_IMAGE" }}
|
||||
<a href="text2image/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/image inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-green-600 hover:bg-green-700 text-white transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(34,197,94,0.4)]">
|
||||
<i class="fas fa-image mr-2 group-hover/image:animate-pulse"></i>
|
||||
Image
|
||||
</a>
|
||||
{{ end }}
|
||||
{{ if eq . "FLAG_TTS" }}
|
||||
<a href="tts/{{$backendCfg.Name}}" class="flex-1 min-w-0 group/tts inline-flex items-center justify-center rounded-xl px-4 py-3 text-sm font-semibold bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_15px_rgba(139,92,246,0.4)]">
|
||||
<i class="fas fa-microphone mr-2 group-hover/tts:animate-pulse"></i>
|
||||
TTS
|
||||
</a>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</div>
|
||||
|
||||
<!-- Action Buttons -->
|
||||
<div class="flex justify-between items-center pt-4 border-t border-[#101827]">
|
||||
<div class="flex gap-2">
|
||||
{{ if index $loadedModels .Name }}
|
||||
<button class="group/stop inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200"
|
||||
data-twe-ripple-init=""
|
||||
onclick="handleStopModel('{{.Name}}')">
|
||||
<i class="fas fa-stop mr-2 group-hover/stop:animate-pulse"></i>Stop
|
||||
</button>
|
||||
{{ end }}
|
||||
</div>
|
||||
|
||||
<div class="flex gap-2">
|
||||
<a href="/models/edit/{{.Name}}"
|
||||
class="group/edit inline-flex items-center text-sm font-semibold text-[#38BDF8] hover:text-[#8B5CF6] hover:bg-[#38BDF8]/10 rounded-lg px-3 py-2 transition-all duration-200">
|
||||
<i class="fas fa-edit mr-2 group-hover/edit:animate-pulse"></i>Edit
|
||||
</a>
|
||||
<button
|
||||
class="group/delete inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200"
|
||||
data-twe-ripple-init=""
|
||||
onclick="handleDeleteModel('{{.Name}}')">
|
||||
<i class="fas fa-trash-alt mr-2 group-hover/delete:animate-bounce"></i>Delete
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<!-- Models without config -->
|
||||
{{ range .Models }}
|
||||
<div class="group relative bg-[#1E293B]/80 border border-[#38BDF8]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_15px_rgba(234,179,8,0.15)] hover:-translate-y-1 hover:border-yellow-500/30">
|
||||
<div class="p-6">
|
||||
<div class="flex items-start space-x-4">
|
||||
<div class="w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center">
|
||||
<i class="fas fa-brain text-2xl text-[#94A3B8]"></i>
|
||||
</div>
|
||||
<div class="flex-1 min-w-0">
|
||||
<h3 class="font-bold text-xl text-[#E5E7EB] truncate mb-2">{{.}}</h3>
|
||||
|
||||
<div class="flex flex-wrap gap-2 mb-4">
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-yellow-500/10 text-yellow-300 border border-yellow-500/30">
|
||||
<i class="fas fa-magic mr-1"></i>Auto Backend
|
||||
</span>
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-orange-500/10 text-orange-300 border border-orange-500/30">
|
||||
<i class="fas fa-exclamation-triangle mr-1"></i>No Config
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-center pt-4">
|
||||
<span class="inline-flex items-center text-sm font-medium text-[#94A3B8] px-4 py-2 bg-[#101827]/50 rounded-lg">
|
||||
<i class="fas fa-info-circle mr-2"></i>
|
||||
Configuration required for full functionality
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{ end }}
|
||||
</div>
|
||||
|
||||
<!-- Backends Section -->
|
||||
<div class="mt-12">
|
||||
<div class="mb-8">
|
||||
<h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-2">
|
||||
Installed Backends
|
||||
</h2>
|
||||
<p class="text-[#94A3B8]">
|
||||
<span class="text-[#8B5CF6] font-semibold">{{len .InstalledBackends}}</span> backend{{if gt (len .InstalledBackends) 1}}s{{end}} ready to use
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{{ if eq (len .InstalledBackends) 0 }}
|
||||
<!-- No backends state -->
|
||||
<div class="relative bg-[#1E293B]/80 border border-[#8B5CF6]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
|
||||
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-purple-500/5 to-cyan-500/5"></div>
|
||||
<div class="relative text-center max-w-4xl mx-auto">
|
||||
<div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-[#8B5CF6]/10 border border-[#8B5CF6]/20 mb-6">
|
||||
<i class="text-[#8B5CF6] text-3xl fas fa-cogs"></i>
|
||||
</div>
|
||||
<h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">No backends installed yet</h2>
|
||||
<p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">Backends power your AI models. Install them from the backend gallery to get started</p>
|
||||
|
||||
<div class="flex flex-wrap justify-center gap-4">
|
||||
<a href="/browse/backends" class="inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
|
||||
<i class="fas fa-cogs mr-2"></i>
|
||||
Browse Backend Gallery
|
||||
</a>
|
||||
<a href="https://localai.io/backends/" target="_blank" class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#8B5CF6]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
|
||||
<i class="fas fa-book mr-2"></i>
|
||||
Documentation
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ else }}
|
||||
<!-- Backends Grid -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6">
|
||||
{{ range .InstalledBackends }}
|
||||
<div class="group relative bg-[#1E293B] border border-[#8B5CF6]/20 rounded-2xl overflow-hidden transition-all duration-500 hover:shadow-[0_0_20px_rgba(139,92,246,0.2)] hover:-translate-y-2 hover:border-[#8B5CF6]/50">
|
||||
<!-- Card Header -->
|
||||
<div class="relative p-6 border-b border-[#101827]">
|
||||
<div class="flex items-start space-x-4">
|
||||
<div class="w-16 h-16 rounded-xl overflow-hidden flex-shrink-0 bg-[#101827] flex items-center justify-center group-hover:scale-110 transition-transform duration-300">
|
||||
<i class="fas fa-cog text-2xl text-[#8B5CF6]"></i>
|
||||
</div>
|
||||
<div class="flex-1 min-w-0">
|
||||
<h3 class="font-bold text-xl text-[#E5E7EB] truncate mb-2 group-hover:text-[#8B5CF6] transition-colors">{{.Name}}</h3>
|
||||
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{{ if .IsSystem }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-blue-500/10 text-blue-300 border border-blue-500/30">
|
||||
<i class="fas fa-shield-alt mr-1"></i>System
|
||||
</span>
|
||||
{{ else }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-green-500/10 text-green-300 border border-green-500/30">
|
||||
<i class="fas fa-download mr-1"></i>User Installed
|
||||
</span>
|
||||
{{ end }}
|
||||
|
||||
{{ if .IsMeta }}
|
||||
<span class="inline-flex items-center px-3 py-1 rounded-full text-xs font-semibold bg-[#8B5CF6]/20 text-[#8B5CF6] border border-[#8B5CF6]/30">
|
||||
<i class="fas fa-layer-group mr-1"></i>Meta
|
||||
</span>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Backend Details -->
|
||||
<div class="p-6">
|
||||
<div class="space-y-3 text-sm">
|
||||
{{ if and .Metadata .Metadata.Alias }}
|
||||
<div class="flex items-start">
|
||||
<i class="fas fa-tag text-[#94A3B8] mr-2 mt-0.5"></i>
|
||||
<div class="flex-1">
|
||||
<span class="text-[#94A3B8]">Alias:</span>
|
||||
<span class="text-[#E5E7EB] ml-1">{{.Metadata.Alias}}</span>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ if and .Metadata .Metadata.InstalledAt }}
|
||||
<div class="flex items-start">
|
||||
<i class="fas fa-calendar text-[#94A3B8] mr-2 mt-0.5"></i>
|
||||
<div class="flex-1">
|
||||
<span class="text-[#94A3B8]">Installed:</span>
|
||||
<span class="text-[#E5E7EB] ml-1">{{.Metadata.InstalledAt}}</span>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ if and .Metadata .Metadata.MetaBackendFor }}
|
||||
<div class="flex items-start">
|
||||
<i class="fas fa-link text-[#94A3B8] mr-2 mt-0.5"></i>
|
||||
<div class="flex-1">
|
||||
<span class="text-[#94A3B8]">Meta backend for:</span>
|
||||
<span class="text-[#8B5CF6] ml-1 font-semibold">{{.Metadata.MetaBackendFor}}</span>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ if and .Metadata .Metadata.GalleryURL }}
|
||||
<div class="flex items-start">
|
||||
<i class="fas fa-globe text-[#94A3B8] mr-2 mt-0.5"></i>
|
||||
<div class="flex-1">
|
||||
<span class="text-[#94A3B8]">Gallery:</span>
|
||||
<a href="{{.Metadata.GalleryURL}}" target="_blank" class="text-[#38BDF8] hover:text-[#38BDF8]/80 ml-1 truncate inline-block max-w-[200px] align-bottom">
|
||||
{{.Metadata.GalleryURL}}
|
||||
<i class="fas fa-external-link-alt text-xs ml-1"></i>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<div class="flex items-start">
|
||||
<i class="fas fa-folder text-[#94A3B8] mr-2 mt-0.5"></i>
|
||||
<div class="flex-1">
|
||||
<span class="text-[#94A3B8]">Path:</span>
|
||||
<span class="text-[#E5E7EB] ml-1 text-xs font-mono truncate block">{{.RunFile}}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Action Buttons -->
|
||||
{{ if not .IsSystem }}
|
||||
<div class="flex justify-end items-center pt-4 mt-4 border-t border-[#101827]">
|
||||
<button
|
||||
@click="deleteBackend('{{.Name}}')"
|
||||
class="group/delete inline-flex items-center text-sm font-semibold text-red-400 hover:text-red-300 hover:bg-red-500/10 rounded-lg px-3 py-2 transition-all duration-200">
|
||||
<i class="fas fa-trash-alt mr-2 group-hover/delete:animate-bounce"></i>Delete
|
||||
</button>
|
||||
</div>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Alpine.js component for index dashboard
|
||||
function indexDashboard() {
|
||||
return {
|
||||
notifications: [],
|
||||
|
||||
init() {
|
||||
// Initialize component
|
||||
},
|
||||
|
||||
addNotification(message, type = 'success') {
|
||||
const id = Date.now();
|
||||
this.notifications.push({ id, message, type });
|
||||
// Auto-dismiss after 5 seconds
|
||||
setTimeout(() => this.dismissNotification(id), 5000);
|
||||
},
|
||||
|
||||
dismissNotification(id) {
|
||||
this.notifications = this.notifications.filter(n => n.id !== id);
|
||||
},
|
||||
|
||||
async deleteBackend(backendName) {
|
||||
if (!confirm(`Are you sure you want to delete the backend "${backendName}"?`)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`/api/backends/system/delete/${encodeURIComponent(backendName)}`, {
|
||||
method: 'POST'
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (response.ok && data.success) {
|
||||
this.addNotification(`Backend "${backendName}" deleted successfully!`, 'success');
|
||||
// Reload page after short delay
|
||||
setTimeout(() => {
|
||||
window.location.reload();
|
||||
}, 1500);
|
||||
} else {
|
||||
this.addNotification(`Failed to delete backend: ${data.error || 'Unknown error'}`, 'error');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error deleting backend:', error);
|
||||
this.addNotification(`Failed to delete backend: ${error.message}`, 'error');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function handleStopModel(modelName) {
|
||||
if (!confirm('Are you sure you wish to stop this model?')) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch('/backend/shutdown', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ model: modelName })
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
window.location.reload();
|
||||
} else {
|
||||
alert('Failed to stop model');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error stopping model:', error);
|
||||
alert('Failed to stop model');
|
||||
}
|
||||
}
|
||||
|
||||
async function handleDeleteModel(modelName) {
|
||||
if (!confirm('Are you sure you wish to delete this model?')) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`/api/models/delete/${encodeURIComponent(modelName)}`, {
|
||||
method: 'POST'
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
window.location.reload();
|
||||
} else {
|
||||
alert('Failed to delete model');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error deleting model:', error);
|
||||
alert('Failed to delete model');
|
||||
}
|
||||
}
|
||||
|
||||
// Handle reload models button
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const reloadBtn = document.getElementById('reload-models-btn');
|
||||
if (reloadBtn) {
|
||||
reloadBtn.addEventListener('click', function() {
|
||||
const button = this;
|
||||
const originalText = button.querySelector('span').textContent;
|
||||
const icon = button.querySelector('i');
|
||||
|
||||
// Show loading state
|
||||
button.disabled = true;
|
||||
button.querySelector('span').textContent = 'Updating...';
|
||||
icon.classList.add('fa-spin');
|
||||
|
||||
// Make the API call
|
||||
fetch('/models/reload', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
// Show success state briefly
|
||||
button.querySelector('span').textContent = 'Updated!';
|
||||
icon.classList.remove('fa-spin', 'fa-sync-alt');
|
||||
icon.classList.add('fa-check');
|
||||
|
||||
// Reload the page after a short delay
|
||||
setTimeout(() => {
|
||||
window.location.reload();
|
||||
}, 1000);
|
||||
} else {
|
||||
// Show error state
|
||||
button.querySelector('span').textContent = 'Error!';
|
||||
icon.classList.remove('fa-spin');
|
||||
console.error('Failed to reload models:', data.error);
|
||||
|
||||
// Reset button after delay
|
||||
setTimeout(() => {
|
||||
button.disabled = false;
|
||||
button.querySelector('span').textContent = originalText;
|
||||
icon.classList.remove('fa-check');
|
||||
icon.classList.add('fa-sync-alt');
|
||||
}, 3000);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
// Show error state
|
||||
button.querySelector('span').textContent = 'Error!';
|
||||
icon.classList.remove('fa-spin');
|
||||
console.error('Error reloading models:', error);
|
||||
|
||||
// Reset button after delay
|
||||
setTimeout(() => {
|
||||
button.disabled = false;
|
||||
button.querySelector('span').textContent = originalText;
|
||||
icon.classList.remove('fa-check');
|
||||
icon.classList.add('fa-sync-alt');
|
||||
}, 3000);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -2,8 +2,6 @@ package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
@@ -12,315 +10,6 @@ import (
|
||||
metricApi "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
// MetricsStore is the interface for storing and retrieving metrics
|
||||
// This allows for future implementations with persistence (JSON files, databases, etc.)
|
||||
type MetricsStore interface {
|
||||
RecordRequest(endpoint, model, backend string, success bool, duration time.Duration)
|
||||
GetEndpointStats() map[string]int64
|
||||
GetModelStats() map[string]int64
|
||||
GetBackendStats() map[string]int64
|
||||
GetRequestsOverTime(hours int) []TimeSeriesPoint
|
||||
GetTotalRequests() int64
|
||||
GetSuccessRate() float64
|
||||
Reset()
|
||||
}
|
||||
|
||||
// TimeSeriesPoint represents a single point in the time series
|
||||
type TimeSeriesPoint struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Count int64 `json:"count"`
|
||||
}
|
||||
|
||||
// RequestRecord stores individual request information
|
||||
type RequestRecord struct {
|
||||
Timestamp time.Time
|
||||
Endpoint string
|
||||
Model string
|
||||
Backend string
|
||||
Success bool
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
// InMemoryMetricsStore implements MetricsStore with in-memory storage
|
||||
type InMemoryMetricsStore struct {
|
||||
endpoints map[string]int64
|
||||
models map[string]int64
|
||||
backends map[string]int64
|
||||
timeSeries []RequestRecord
|
||||
successCount int64
|
||||
failureCount int64
|
||||
mu sync.RWMutex
|
||||
stopChan chan struct{}
|
||||
maxRecords int // Maximum number of time series records to keep
|
||||
maxMapKeys int // Maximum number of unique keys per map
|
||||
pruneEvery time.Duration // How often to prune old data
|
||||
}
|
||||
|
||||
// NewInMemoryMetricsStore creates a new in-memory metrics store
|
||||
func NewInMemoryMetricsStore() *InMemoryMetricsStore {
|
||||
store := &InMemoryMetricsStore{
|
||||
endpoints: make(map[string]int64),
|
||||
models: make(map[string]int64),
|
||||
backends: make(map[string]int64),
|
||||
timeSeries: make([]RequestRecord, 0),
|
||||
stopChan: make(chan struct{}),
|
||||
maxRecords: 10000, // Limit to 10k records (~1-2MB of memory)
|
||||
maxMapKeys: 1000, // Limit to 1000 unique keys per map (~50KB per map)
|
||||
pruneEvery: 5 * time.Minute, // Prune every 5 minutes instead of every request
|
||||
}
|
||||
|
||||
// Start background pruning goroutine
|
||||
go store.pruneLoop()
|
||||
|
||||
return store
|
||||
}
|
||||
|
||||
// pruneLoop runs periodically to clean up old data
|
||||
func (m *InMemoryMetricsStore) pruneLoop() {
|
||||
ticker := time.NewTicker(m.pruneEvery)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
m.pruneOldData()
|
||||
case <-m.stopChan:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pruneOldData removes data older than 24 hours and enforces max record limit
|
||||
func (m *InMemoryMetricsStore) pruneOldData() {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
cutoff := time.Now().Add(-24 * time.Hour)
|
||||
newTimeSeries := make([]RequestRecord, 0, len(m.timeSeries))
|
||||
|
||||
for _, r := range m.timeSeries {
|
||||
if r.Timestamp.After(cutoff) {
|
||||
newTimeSeries = append(newTimeSeries, r)
|
||||
}
|
||||
}
|
||||
|
||||
// If still over the limit, keep only the most recent records
|
||||
if len(newTimeSeries) > m.maxRecords {
|
||||
// Keep the most recent maxRecords entries
|
||||
newTimeSeries = newTimeSeries[len(newTimeSeries)-m.maxRecords:]
|
||||
log.Warn().
|
||||
Int("dropped", len(m.timeSeries)-len(newTimeSeries)).
|
||||
Int("kept", len(newTimeSeries)).
|
||||
Msg("Metrics store exceeded maximum records, dropping oldest entries")
|
||||
}
|
||||
|
||||
m.timeSeries = newTimeSeries
|
||||
|
||||
// Also check if maps have grown too large
|
||||
m.pruneMapIfNeeded("endpoints", m.endpoints, m.maxMapKeys)
|
||||
m.pruneMapIfNeeded("models", m.models, m.maxMapKeys)
|
||||
m.pruneMapIfNeeded("backends", m.backends, m.maxMapKeys)
|
||||
}
|
||||
|
||||
// pruneMapIfNeeded keeps only the top N entries in a map by count
|
||||
func (m *InMemoryMetricsStore) pruneMapIfNeeded(name string, mapData map[string]int64, maxKeys int) {
|
||||
if len(mapData) <= maxKeys {
|
||||
return
|
||||
}
|
||||
|
||||
// Convert to slice for sorting
|
||||
type kv struct {
|
||||
key string
|
||||
value int64
|
||||
}
|
||||
|
||||
entries := make([]kv, 0, len(mapData))
|
||||
for k, v := range mapData {
|
||||
entries = append(entries, kv{k, v})
|
||||
}
|
||||
|
||||
// Sort by value descending (keep highest counts)
|
||||
for i := 0; i < len(entries); i++ {
|
||||
for j := i + 1; j < len(entries); j++ {
|
||||
if entries[i].value < entries[j].value {
|
||||
entries[i], entries[j] = entries[j], entries[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keep only top maxKeys entries
|
||||
for k := range mapData {
|
||||
delete(mapData, k)
|
||||
}
|
||||
|
||||
for i := 0; i < maxKeys && i < len(entries); i++ {
|
||||
mapData[entries[i].key] = entries[i].value
|
||||
}
|
||||
|
||||
log.Warn().
|
||||
Str("map", name).
|
||||
Int("dropped", len(entries)-maxKeys).
|
||||
Int("kept", maxKeys).
|
||||
Msg("Metrics map exceeded maximum keys, keeping only top entries")
|
||||
}
|
||||
|
||||
// Stop gracefully shuts down the metrics store
|
||||
func (m *InMemoryMetricsStore) Stop() {
|
||||
close(m.stopChan)
|
||||
}
|
||||
|
||||
// RecordRequest records a new API request
|
||||
func (m *InMemoryMetricsStore) RecordRequest(endpoint, model, backend string, success bool, duration time.Duration) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
// Record endpoint
|
||||
if endpoint != "" {
|
||||
m.endpoints[endpoint]++
|
||||
}
|
||||
|
||||
// Record model
|
||||
if model != "" {
|
||||
m.models[model]++
|
||||
}
|
||||
|
||||
// Record backend
|
||||
if backend != "" {
|
||||
m.backends[backend]++
|
||||
}
|
||||
|
||||
// Record success/failure
|
||||
if success {
|
||||
m.successCount++
|
||||
} else {
|
||||
m.failureCount++
|
||||
}
|
||||
|
||||
// Add to time series
|
||||
record := RequestRecord{
|
||||
Timestamp: time.Now(),
|
||||
Endpoint: endpoint,
|
||||
Model: model,
|
||||
Backend: backend,
|
||||
Success: success,
|
||||
Duration: duration,
|
||||
}
|
||||
m.timeSeries = append(m.timeSeries, record)
|
||||
|
||||
// Note: Pruning is done periodically by pruneLoop() to avoid overhead on every request
|
||||
}
|
||||
|
||||
// GetEndpointStats returns request counts per endpoint
|
||||
func (m *InMemoryMetricsStore) GetEndpointStats() map[string]int64 {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
result := make(map[string]int64)
|
||||
for k, v := range m.endpoints {
|
||||
result[k] = v
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetModelStats returns request counts per model
|
||||
func (m *InMemoryMetricsStore) GetModelStats() map[string]int64 {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
result := make(map[string]int64)
|
||||
for k, v := range m.models {
|
||||
result[k] = v
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetBackendStats returns request counts per backend
|
||||
func (m *InMemoryMetricsStore) GetBackendStats() map[string]int64 {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
result := make(map[string]int64)
|
||||
for k, v := range m.backends {
|
||||
result[k] = v
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetRequestsOverTime returns time series data for the specified number of hours
|
||||
func (m *InMemoryMetricsStore) GetRequestsOverTime(hours int) []TimeSeriesPoint {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
cutoff := time.Now().Add(-time.Duration(hours) * time.Hour)
|
||||
|
||||
// Group by hour
|
||||
hourlyBuckets := make(map[int64]int64)
|
||||
for _, record := range m.timeSeries {
|
||||
if record.Timestamp.After(cutoff) {
|
||||
// Round down to the hour
|
||||
hourTimestamp := record.Timestamp.Truncate(time.Hour).Unix()
|
||||
hourlyBuckets[hourTimestamp]++
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to sorted time series
|
||||
result := make([]TimeSeriesPoint, 0)
|
||||
for ts, count := range hourlyBuckets {
|
||||
result = append(result, TimeSeriesPoint{
|
||||
Timestamp: time.Unix(ts, 0),
|
||||
Count: count,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by timestamp
|
||||
for i := 0; i < len(result); i++ {
|
||||
for j := i + 1; j < len(result); j++ {
|
||||
if result[i].Timestamp.After(result[j].Timestamp) {
|
||||
result[i], result[j] = result[j], result[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// GetTotalRequests returns the total number of requests recorded
|
||||
func (m *InMemoryMetricsStore) GetTotalRequests() int64 {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
return m.successCount + m.failureCount
|
||||
}
|
||||
|
||||
// GetSuccessRate returns the percentage of successful requests
|
||||
func (m *InMemoryMetricsStore) GetSuccessRate() float64 {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
total := m.successCount + m.failureCount
|
||||
if total == 0 {
|
||||
return 0.0
|
||||
}
|
||||
return float64(m.successCount) / float64(total) * 100.0
|
||||
}
|
||||
|
||||
// Reset clears all metrics
|
||||
func (m *InMemoryMetricsStore) Reset() {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
m.endpoints = make(map[string]int64)
|
||||
m.models = make(map[string]int64)
|
||||
m.backends = make(map[string]int64)
|
||||
m.timeSeries = make([]RequestRecord, 0)
|
||||
m.successCount = 0
|
||||
m.failureCount = 0
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// OpenTelemetry Metrics Service (for Prometheus export)
|
||||
// ============================================================================
|
||||
|
||||
type LocalAIMetricsService struct {
|
||||
Meter metric.Meter
|
||||
ApiTimeMetric metric.Float64Histogram
|
||||
@@ -334,7 +23,7 @@ func (m *LocalAIMetricsService) ObserveAPICall(method string, path string, durat
|
||||
m.ApiTimeMetric.Record(context.Background(), duration, opts)
|
||||
}
|
||||
|
||||
// NewLocalAIMetricsService bootstraps the OpenTelemetry pipeline for Prometheus export.
|
||||
// setupOTelSDK bootstraps the OpenTelemetry pipeline.
|
||||
// If it does not return an error, make sure to call shutdown for proper cleanup.
|
||||
func NewLocalAIMetricsService() (*LocalAIMetricsService, error) {
|
||||
exporter, err := prometheus.New()
|
||||
|
||||
@@ -1,4 +1,186 @@
|
||||
---
|
||||
- &qwen3vl
|
||||
url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- image-to-text
|
||||
- multimodal
|
||||
- cpu
|
||||
- qwen
|
||||
- qwen3
|
||||
- thinking
|
||||
- reasoning
|
||||
name: "qwen3-vl-30b-a3b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF
|
||||
description: |
|
||||
Meet Qwen3-VL — the most powerful vision-language model in the Qwen series to date.
|
||||
|
||||
This generation delivers comprehensive upgrades across the board: superior text understanding & generation, deeper visual perception & reasoning, extended context length, enhanced spatial and video dynamics comprehension, and stronger agent interaction capabilities.
|
||||
|
||||
Available in Dense and MoE architectures that scale from edge to cloud, with Instruct and reasoning‑enhanced Thinking editions for flexible, on-demand deployment.
|
||||
|
||||
#### Key Enhancements:
|
||||
|
||||
* **Visual Agent**: Operates PC/mobile GUIs—recognizes elements, understands functions, invokes tools, completes tasks.
|
||||
|
||||
* **Visual Coding Boost**: Generates Draw.io/HTML/CSS/JS from images/videos.
|
||||
|
||||
* **Advanced Spatial Perception**: Judges object positions, viewpoints, and occlusions; provides stronger 2D grounding and enables 3D grounding for spatial reasoning and embodied AI.
|
||||
|
||||
* **Long Context & Video Understanding**: Native 256K context, expandable to 1M; handles books and hours-long video with full recall and second-level indexing.
|
||||
|
||||
* **Enhanced Multimodal Reasoning**: Excels in STEM/Math—causal analysis and logical, evidence-based answers.
|
||||
|
||||
* **Upgraded Visual Recognition**: Broader, higher-quality pretraining is able to “recognize everything”—celebrities, anime, products, landmarks, flora/fauna, etc.
|
||||
|
||||
* **Expanded OCR**: Supports 32 languages (up from 19); robust in low light, blur, and tilt; better with rare/ancient characters and jargon; improved long-document structure parsing.
|
||||
|
||||
* **Text Understanding on par with pure LLMs**: Seamless text–vision fusion for lossless, unified comprehension.
|
||||
|
||||
#### Model Architecture Updates:
|
||||
|
||||
1. **Interleaved-MRoPE**: Full‑frequency allocation over time, width, and height via robust positional embeddings, enhancing long‑horizon video reasoning.
|
||||
|
||||
2. **DeepStack**: Fuses multi‑level ViT features to capture fine-grained details and sharpen image–text alignment.
|
||||
|
||||
3. **Text–Timestamp Alignment:** Moves beyond T‑RoPE to precise, timestamp‑grounded event localization for stronger video temporal modeling.
|
||||
|
||||
This is the weight repository for Qwen3-VL-30B-A3B-Instruct.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
|
||||
sha256: 75d8f4904016d90b71509c8576ebd047a0606cc5aa788eada29d4bedf9b761a6
|
||||
uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF/Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-F16.gguf
|
||||
sha256: 7e7cec67a3a887bddbf38099738d08570e85f08dd126578fa00a7acf4dacef01
|
||||
uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-30b-a3b-thinking"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF
|
||||
description: |
|
||||
Qwen3-VL-30B-A3B-Thinking is a 30B parameter model that is thinking.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
|
||||
sha256: d3e12c6b15f59cc1c6db685d33eb510184d006ebbff0e038e7685e57ce628b3b
|
||||
uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF/Qwen3-VL-30B-A3B-Thinking-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-F16.gguf
|
||||
sha256: 7e7cec67a3a887bddbf38099738d08570e85f08dd126578fa00a7acf4dacef01
|
||||
uri: huggingface://unsloth/Qwen3-VL-30B-A3B-Thinking-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-4b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-4B-Instruct-GGUF
|
||||
description: |
|
||||
Qwen3-VL-4B-Instruct is the 4B parameter model of the Qwen3-VL series.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Qwen3-VL-4B-Instruct-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-4B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-4B-Instruct-Q4_K_M.gguf
|
||||
sha256: d4dcd426bfba75752a312b266b80fec8136fbaca13c62d93b7ac41fa67f0492b
|
||||
uri: huggingface://unsloth/Qwen3-VL-4B-Instruct-GGUF/Qwen3-VL-4B-Instruct-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Qwen3-VL-4B-Instruct-F16.gguf
|
||||
sha256: 1b9f4e92f0fbda14d7d7b58baed86039b8a980fe503d9d6a9393f25c0028f1fc
|
||||
uri: huggingface://unsloth/Qwen3-VL-4B-Instruct-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-32b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-32B-Instruct-GGUF
|
||||
description: |
|
||||
Qwen3-VL-32B-Instruct is the 32B parameter model of the Qwen3-VL series.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Qwen3-VL-32B-Instruct-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-32B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-32B-Instruct-Q4_K_M.gguf
|
||||
sha256: 17885d28e964b22b2faa981a7eaeeeb78da0972ee5f826ad5965f7583a610d9f
|
||||
uri: huggingface://unsloth/Qwen3-VL-32B-Instruct-GGUF/Qwen3-VL-32B-Instruct-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Qwen3-VL-32B-Instruct-F16.gguf
|
||||
sha256: 14b1d68befa75a5e646dd990c5bb429c912b7aa9b49b9ab18231ca5f750421c9
|
||||
uri: huggingface://unsloth/Qwen3-VL-32B-Instruct-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-4b-thinking"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-4B-Thinking-GGUF
|
||||
description: |
|
||||
Qwen3-VL-4B-Thinking is the 4B parameter model of the Qwen3-VL series that is thinking.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Qwen3-VL-4B-Thinking-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-4B-Thinking-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-4B-Thinking-Q4_K_M.gguf
|
||||
sha256: bd73237f16265a1014979b7ed34ff9265e7e200ae6745bb1da383a1bbe0f9211
|
||||
uri: huggingface://unsloth/Qwen3-VL-4B-Thinking-GGUF/Qwen3-VL-4B-Thinking-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Qwen3-VL-4B-Thinking-F16.gguf
|
||||
sha256: 72354fcd3fc75935b84e745ca492d6e78dd003bb5a020d71b296e7650926ac87
|
||||
uri: huggingface://unsloth/Qwen3-VL-4B-Thinking-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-2b-thinking"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-2B-Thinking-GGUF
|
||||
description: |
|
||||
Qwen3-VL-2B-Thinking is the 2B parameter model of the Qwen3-VL series that is thinking.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Qwen3-VL-2B-Thinking-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-2B-Thinking-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-2B-Thinking-Q4_K_M.gguf
|
||||
sha256: 5f282086042d96b78b138839610f5148493b354524090fadc5c97c981b70a26e
|
||||
uri: huggingface://unsloth/Qwen3-VL-2B-Thinking-GGUF/Qwen3-VL-2B-Thinking-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Qwen3-VL-2B-Thinking-F16.gguf
|
||||
sha256: 4eabc90a52fe890d6ca1dad92548782eab6edc91f012a365fff95cf027ba529d
|
||||
uri: huggingface://unsloth/Qwen3-VL-2B-Thinking-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "qwen3-vl-2b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen3-VL-2B-Instruct-GGUF
|
||||
description: |
|
||||
Qwen3-VL-2B-Instruct is the 2B parameter model of the Qwen3-VL series.
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Qwen3-VL-2B-Instruct-F16.gguf
|
||||
parameters:
|
||||
model: Qwen3-VL-2B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VL-2B-Instruct-Q4_K_M.gguf
|
||||
sha256: 858fcf2a39dc73b26dd86592cb0a5f949b59d1edb365d1dea98e46b02e955e56
|
||||
uri: huggingface://unsloth/Qwen3-VL-2B-Instruct-GGUF/Qwen3-VL-2B-Instruct-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Qwen3-VL-2B-Instruct-F16.gguf
|
||||
sha256: cd5a851d3928697fa1bd76d459d2cc409b6cf40c9d9682b2f5c8e7c6a9f9630f
|
||||
uri: huggingface://unsloth/Qwen3-VL-2B-Instruct-GGUF/mmproj-F16.gguf
|
||||
- !!merge <<: *qwen3vl
|
||||
name: "huihui-qwen3-vl-30b-a3b-instruct-abliterated"
|
||||
urls:
|
||||
- https://huggingface.co/noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF
|
||||
description: |
|
||||
These are quantizations of the model Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF
|
||||
overrides:
|
||||
mmproj: mmproj/mmproj-Huihui-Qwen3-VL-30B-A3B-F16.gguf
|
||||
parameters:
|
||||
model: Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
|
||||
sha256: 1e94a65167a39d2ff4427393746d4dbc838f3d163c639d932e9ce983f575eabf
|
||||
uri: huggingface://noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-Q4_K_M.gguf
|
||||
- filename: mmproj/mmproj-Huihui-Qwen3-VL-30B-A3B-F16.gguf
|
||||
sha256: 4bfd655851a5609b29201154e0bd4fe5f9274073766b8ab35b3a8acba0dd77a7
|
||||
uri: huggingface://noctrex/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated-GGUF/mmproj-F16.gguf
|
||||
- &jamba
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/65e60c0ed5313c06372446ff/QwehUHgP2HtVAMW5MzJ2j.png
|
||||
name: "ai21labs_ai21-jamba-reasoning-3b"
|
||||
@@ -22795,3 +22977,389 @@
|
||||
- filename: wraith-8b.i1-Q4_K_M.gguf
|
||||
sha256: 180469f9de3e1b5a77b7cf316899dbe4782bd5e6d4f161fb18ea95aa612e6926
|
||||
uri: huggingface://mradermacher/wraith-8b-i1-GGUF/wraith-8b.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "pokee_research_7b"
|
||||
urls:
|
||||
- https://huggingface.co/Mungert/pokee_research_7b-GGUF
|
||||
description: |
|
||||
**Model Name:** Qwen2.5-7B-Instruct
|
||||
**Base Model:** Qwen/Qwen2.5-7B
|
||||
**Model Type:** Instruction-tuned large language model (7.61B parameters)
|
||||
**License:** Apache 2.0
|
||||
|
||||
**Description:**
|
||||
Qwen2.5-7B-Instruct is a powerful, instruction-following language model designed for advanced reasoning, coding, and multi-turn dialogue. Built on the Qwen2.5 architecture, it delivers state-of-the-art performance in understanding complex prompts, generating long-form text (up to 8K tokens), and handling structured outputs like JSON. It supports multilingual communication (29+ languages), including English, Chinese, and European languages, and excels in long-context tasks with support for up to 131,072 tokens.
|
||||
|
||||
Ideal for research, creative writing, coding assistance, and agent-based workflows, this model is optimized for real-world applications requiring robustness, accuracy, and scalability.
|
||||
|
||||
**Key Features:**
|
||||
- 7.61 billion parameters
|
||||
- Context length: 131K tokens (supports long-context via YaRN)
|
||||
- Strong performance in math, coding, and factual reasoning
|
||||
- Fine-tuned for instruction following and chat interactions
|
||||
- Deployable with Hugging Face Transformers, vLLM, and llama.cpp
|
||||
|
||||
**Use Case:**
|
||||
Perfect for developers, researchers, and enterprises building intelligent assistants, autonomous agents, or content generation systems.
|
||||
|
||||
**Citation:**
|
||||
```bibtex
|
||||
@misc{qwen2.5,
|
||||
title = {Qwen2.5: A Party of Foundation Models},
|
||||
url = {https://qwenlm.github.io/blog/qwen2.5/},
|
||||
author = {Qwen Team},
|
||||
month = {September},
|
||||
year = {2024}
|
||||
}
|
||||
```
|
||||
overrides:
|
||||
parameters:
|
||||
model: pokee_research_7b-q4_k_m.gguf
|
||||
files:
|
||||
- filename: pokee_research_7b-q4_k_m.gguf
|
||||
sha256: 670706711d82fcdbae951fda084f77c9c479edf3eb5d8458d1cfddd46cf4b767
|
||||
uri: huggingface://Mungert/pokee_research_7b-GGUF/pokee_research_7b-q4_k_m.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "deepkat-32b-i1"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/DeepKAT-32B-i1-GGUF
|
||||
description: |
|
||||
**DeepKAT-32B** is a high-performance, open-source coding agent built by merging two leading RL-tuned models—**DeepSWE-Preview** and **KAT-Dev**—on the **Qwen3-32B** base architecture using Arcee MergeKit’s TIES method. This 32B parameter model excels in complex software engineering tasks, including code generation, bug fixing, refactoring, and autonomous agent workflows with tool use.
|
||||
|
||||
Key strengths:
|
||||
- Achieves ~62% SWE-Bench Verified score (on par with top open-source models).
|
||||
- Strong performance in multi-file reasoning, multi-turn planning, and sparse reward environments.
|
||||
- Optimized for agentic behavior with step-by-step reasoning and tool chaining.
|
||||
|
||||
Ideal for developers, AI researchers, and teams building intelligent code assistants or autonomous software agents.
|
||||
|
||||
> 🔗 **Base Model**: Qwen/Qwen3-32B
|
||||
> 🛠️ **Built With**: MergeKit (TIES), RL-finetuned components
|
||||
> 📊 **Benchmarks**: SWE-Bench Verified: ~62%, HumanEval Pass@1: ~85%
|
||||
|
||||
*Note: The model is a merge of two RL-tuned models and not a direct training from scratch.*
|
||||
overrides:
|
||||
parameters:
|
||||
model: mradermacher/DeepKAT-32B-i1-GGUF
|
||||
- !!merge <<: *granite4
|
||||
name: "ibm-granite.granite-4.0-1b"
|
||||
urls:
|
||||
- https://huggingface.co/DevQuasar/ibm-granite.granite-4.0-1b-GGUF
|
||||
description: |
|
||||
### **Granite-4.0-1B**
|
||||
*By IBM | Apache 2.0 License*
|
||||
|
||||
**Overview:**
|
||||
Granite-4.0-1B is a lightweight, instruction-tuned language model designed for efficient on-device and research use. Built on a decoder-only dense transformer architecture, it delivers strong performance in instruction following, code generation, tool calling, and multilingual tasks—making it ideal for applications requiring low latency and minimal resource usage.
|
||||
|
||||
**Key Features:**
|
||||
- **Size:** 1.6 billion parameters (1B Dense), optimized for efficiency.
|
||||
- **Capabilities:**
|
||||
- Text generation, summarization, question answering
|
||||
- Code completion and function calling (e.g., API integration)
|
||||
- Multilingual support (English, Spanish, French, German, Japanese, Chinese, Arabic, Korean, Portuguese, Italian, Dutch, Czech)
|
||||
- Robust safety and alignment via instruction tuning and reinforcement learning
|
||||
- **Architecture:** Uses GQA (Grouped Query Attention), SwiGLU activation, RMSNorm, shared input/output embeddings, and RoPE position embeddings.
|
||||
- **Context Length:** Up to 128K tokens — suitable for long-form content and complex reasoning.
|
||||
- **Training:** Finetuned from *Granite-4.0-1B-Base* using open-source datasets, synthetic data, and human-curated instruction pairs.
|
||||
|
||||
**Performance Highlights (1B Dense):**
|
||||
- **MMLU (5-shot):** 59.39
|
||||
- **HumanEval (pass@1):** 74
|
||||
- **IFEval (Alignment):** 80.82
|
||||
- **GSM8K (8-shot):** 76.35
|
||||
- **SALAD-Bench (Safety):** 93.44
|
||||
|
||||
**Use Cases:**
|
||||
- On-device AI applications
|
||||
- Research and prototyping
|
||||
- Fine-tuning for domain-specific tasks
|
||||
- Low-resource environments with high performance expectations
|
||||
|
||||
**Resources:**
|
||||
- [Hugging Face Model](https://huggingface.co/ibm-granite/granite-4.0-1b)
|
||||
- [Granite Docs](https://www.ibm.com/granite/docs/)
|
||||
- [GitHub Repository](https://github.com/ibm-granite/granite-4.0-nano-language-models)
|
||||
|
||||
> *“Make knowledge free for everyone.” – IBM Granite Team*
|
||||
overrides:
|
||||
parameters:
|
||||
model: ibm-granite.granite-4.0-1b.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ibm-granite.granite-4.0-1b.Q4_K_M.gguf
|
||||
sha256: 0e0ef42486b7f1f95dfe33af2e696df1149253e500c48f3fb8db0125afa2922c
|
||||
uri: huggingface://DevQuasar/ibm-granite.granite-4.0-1b-GGUF/ibm-granite.granite-4.0-1b.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "apollo-astralis-4b-i1"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/apollo-astralis-4b-i1-GGUF
|
||||
description: |
|
||||
**Apollo-Astralis V1 4B**
|
||||
*A warm, enthusiastic, and empathetic reasoning model built on Qwen3-4B-Thinking*
|
||||
|
||||
**Overview**
|
||||
Apollo-Astralis V1 4B is a 4-billion-parameter conversational AI designed for collaborative, emotionally intelligent problem-solving. Developed by VANTA Research, it combines rigorous logical reasoning with a vibrant, supportive communication style—making it ideal for creative brainstorming, educational support, and personal development.
|
||||
|
||||
**Key Features**
|
||||
- 🤔 **Explicit Reasoning**: Uses `</tool_call>` tags to break down thought processes step by step
|
||||
- 💬 **Warm & Enthusiastic Tone**: Celebrates achievements with energy and empathy
|
||||
- 🤝 **Collaborative Style**: Engages users with "we" language and clarifying questions
|
||||
- 🔍 **High Accuracy**: Achieves 100% in enthusiasm detection and 90% in empathy recognition
|
||||
- 🎯 **Fine-Tuned for Real-World Use**: Trained with LoRA on a dataset emphasizing emotional intelligence and consistency
|
||||
|
||||
**Base Model**
|
||||
Built on **Qwen3-4B-Thinking** and enhanced with lightweight LoRA fine-tuning (33M trainable parameters).
|
||||
Available in both full and quantized (GGUF) formats via Hugging Face and Ollama.
|
||||
|
||||
**Use Cases**
|
||||
- Personal coaching & motivation
|
||||
- Creative ideation & project planning
|
||||
- Educational tutoring with emotional support
|
||||
- Mental wellness conversations (complementary, not替代)
|
||||
|
||||
**License**
|
||||
Apache 2.0 — open for research, commercial, and personal use.
|
||||
|
||||
**Try It**
|
||||
👉 [Hugging Face Page](https://huggingface.co/VANTA-Research/apollo-astralis-v1-4b)
|
||||
👉 [Ollama](https://ollama.com/vanta-research/apollo-astralis-v1-4b)
|
||||
|
||||
*Developed by VANTA Research — where reasoning meets warmth.*
|
||||
overrides:
|
||||
parameters:
|
||||
model: apollo-astralis-4b.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: apollo-astralis-4b.i1-Q4_K_M.gguf
|
||||
sha256: 94e1d371420b03710fc7de030c1c06e75a356d9388210a134ee2adb4792a2626
|
||||
uri: huggingface://mradermacher/apollo-astralis-4b-i1-GGUF/apollo-astralis-4b.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-vlto-32b-instruct-i1"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF
|
||||
description: |
|
||||
**Model Name:** Qwen3-VL-32B-Instruct (Text-Only Variant: Qwen3-VLTO-32B-Instruct)
|
||||
**Base Model:** Qwen/Qwen3-VL-32B-Instruct
|
||||
**Repository:** [mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF](https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF)
|
||||
**Type:** Large Language Model (LLM) – Text-Only (Vision-Language model stripped of vision components)
|
||||
**Architecture:** Qwen3-VL, adapted for pure text generation
|
||||
**Size:** 32 billion parameters
|
||||
**License:** Apache 2.0
|
||||
**Framework:** Hugging Face Transformers
|
||||
|
||||
---
|
||||
|
||||
### 🔍 **Description**
|
||||
|
||||
This is a **text-only variant** of the powerful **Qwen3-VL-32B-Instruct** multimodal model, stripped of its vision components to function as a high-performance pure language model. The model retains the full text understanding and generation capabilities of its parent — including strong reasoning, long-context handling (up to 32K+ tokens), and advanced multimodal training-derived coherence — while being optimized for text-only tasks.
|
||||
|
||||
It was created by loading the weights from the full Qwen3-VL-32B-Instruct model into a text-only Qwen3 architecture, preserving all linguistic and reasoning strengths without the need for image input.
|
||||
|
||||
Perfect for applications requiring deep reasoning, long-form content generation, code synthesis, and dialogue — with all the benefits of the Qwen3 series, now in a lightweight, text-focused form.
|
||||
|
||||
---
|
||||
|
||||
### 📌 Key Features
|
||||
|
||||
- ✅ **High-Performance Text Generation** – Built on top of the state-of-the-art Qwen3-VL architecture
|
||||
- ✅ **Extended Context Length** – Supports up to 32,768 tokens (ideal for long documents and complex tasks)
|
||||
- ✅ **Strong Reasoning & Planning** – Excels at logic, math, coding, and multi-step reasoning
|
||||
- ✅ **Optimized for GGUF Format** – Available in multiple quantized versions (IQ3_M, Q2_K, etc.) for efficient inference on consumer hardware
|
||||
- ✅ **Free to Use & Modify** – Apache 2.0 license
|
||||
|
||||
---
|
||||
|
||||
### 📦 Use Case Suggestions
|
||||
|
||||
- Long-form writing, summarization, and editing
|
||||
- Code generation and debugging
|
||||
- AI agents and task automation
|
||||
- High-quality chat and dialogue systems
|
||||
- Research and experimentation with large-scale LLMs on local devices
|
||||
|
||||
---
|
||||
|
||||
### 📚 References
|
||||
|
||||
- Original Model: [Qwen/Qwen3-VL-32B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct)
|
||||
- Technical Report: [Qwen3 Technical Report (arXiv)](https://arxiv.org/abs/2505.09388)
|
||||
- Quantization by: [mradermacher](https://huggingface.co/mradermacher)
|
||||
|
||||
> ✅ **Note**: The model shown here is **not the original vision-language model** — it's a **text-only conversion** of the Qwen3-VL-32B-Instruct model, ideal for pure language tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
|
||||
files:
|
||||
- filename: Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
|
||||
sha256: 789d55249614cd1acee1a23278133cd56ca898472259fa2261f77d65ed7f8367
|
||||
uri: huggingface://mradermacher/Qwen3-VLTO-32B-Instruct-i1-GGUF/Qwen3-VLTO-32B-Instruct.i1-Q4_K_S.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-vlto-32b-thinking"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Thinking-GGUF
|
||||
description: |
|
||||
**Model Name:** Qwen3-VLTO-32B-Thinking
|
||||
**Model Type:** Large Language Model (Text-Only)
|
||||
**Base Model:** Qwen/Qwen3-VL-32B-Thinking (vanilla Qwen3-VL-32B with vision components removed)
|
||||
**Architecture:** Transformer-based, 32-billion parameter model optimized for reasoning and complex text generation.
|
||||
|
||||
### Description:
|
||||
Qwen3-VLTO-32B-Thinking is a pure text-only variant of the Qwen3-VL-32B-Thinking model, stripped of its vision capabilities while preserving the full reasoning and language understanding power. It is derived by transferring the weights from the vision-language model into a text-only transformer architecture, maintaining the same high-quality behavior for tasks such as logical reasoning, code generation, and dialogue.
|
||||
|
||||
This model is ideal for applications requiring deep linguistic reasoning and long-context understanding without image input. It supports advanced multimodal reasoning capabilities *in text form*—perfect for research, chatbots, and content generation.
|
||||
|
||||
### Key Features:
|
||||
- ✅ 32B parameters, high reasoning capability
|
||||
- ✅ No vision components — fully text-only
|
||||
- ✅ Trained for complex thinking and step-by-step reasoning
|
||||
- ✅ Compatible with Hugging Face Transformers and GGUF inference tools
|
||||
- ✅ Available in multiple quantization levels (Q2_K to Q8_0) for efficient deployment
|
||||
|
||||
### Use Case:
|
||||
Ideal for advanced text generation, logical inference, coding, and conversational AI where vision is not needed.
|
||||
|
||||
> 🔗 **Base Model**: [Qwen/Qwen3-VL-32B-Thinking](https://huggingface.co/Qwen/Qwen3-VL-32B-Thinking)
|
||||
> 📦 **Quantized Versions**: Available via [mradermacher/Qwen3-VLTO-32B-Thinking-GGUF](https://huggingface.co/mradermacher/Qwen3-VLTO-32B-Thinking-GGUF)
|
||||
|
||||
---
|
||||
*Note: The original model was created by Alibaba’s Qwen team. This variant was adapted by qingy2024 and quantized by mradermacher.*
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
|
||||
sha256: d88b75df7c40455dfa21ded23c8b25463a8d58418bb6296304052b7e70e96954
|
||||
uri: huggingface://mradermacher/Qwen3-VLTO-32B-Thinking-GGUF/Qwen3-VLTO-32B-Thinking.Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3-the-grand-horror-27b"
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Gemma-3-The-Grand-Horror-27B-GGUF
|
||||
description: |
|
||||
The **Gemma-3-The-Grand-Horror-27B-GGUF** model is a **fine-tuned version** of Google's **Gemma 3 27B** language model, specifically optimized for **extreme horror-themed text generation**. It was trained using the **Unsloth framework** on a custom in-house dataset of horror content, resulting in a model that produces vivid, graphic, and psychologically intense narratives—featuring gore, madness, and disturbing imagery—often even when prompts don't explicitly request horror.
|
||||
|
||||
Key characteristics:
|
||||
- **Base Model**: Gemma 3 27B (original by Google, not the quantized version)
|
||||
- **Fine-tuned For**: High-intensity horror storytelling, long-form narrative generation, and immersive scene creation
|
||||
- **Use Case**: Creative writing, horror RP, dark fiction, and experimental storytelling
|
||||
- **Not Suitable For**: General use, children, sensitive audiences, or content requiring neutral/positive tone
|
||||
- **Quantization**: Available in GGUF format (e.g., q3k, q4, etc.), making it accessible for local inference on consumer hardware
|
||||
|
||||
> ✅ **Note**: The model card you see is for a **quantized, fine-tuned derivative**, not the original. The true base model is **Gemma 3 27B**, available at: https://huggingface.co/google/gemma-3-27b
|
||||
|
||||
This model is not for all audiences — it generates content with a consistently dark, unsettling tone. Use responsibly.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
|
||||
files:
|
||||
- filename: Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
|
||||
sha256: 46f0b06b785d19804a1a796bec89a8eeba8a4e2ef21e2ab8dbb8fa2ff0d675b1
|
||||
uri: huggingface://DavidAU/Gemma-3-The-Grand-Horror-27B-GGUF/Gemma-3-The-Grand-Horror-27B-Q4_k_m.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-nemotron-32b-rlbff-i1"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF
|
||||
description: |
|
||||
**Model Name:** Qwen3-Nemotron-32B-RLBFF
|
||||
**Base Model:** Qwen/Qwen3-32B
|
||||
**Developer:** NVIDIA
|
||||
**License:** NVIDIA Open Model License
|
||||
|
||||
**Description:**
|
||||
Qwen3-Nemotron-32B-RLBFF is a high-performance, fine-tuned large language model built on the Qwen3-32B foundation. It is specifically optimized to generate high-quality, helpful responses in a default thinking mode through advanced reinforcement learning with binary flexible feedback (RLBFF). Trained on the HelpSteer3 dataset, this model excels in reasoning, planning, coding, and information-seeking tasks while maintaining strong safety and alignment with human preferences.
|
||||
|
||||
**Key Performance (as of Sep 2025):**
|
||||
- **MT-Bench:** 9.50 (near GPT-4-Turbo level)
|
||||
- **Arena Hard V2:** 55.6%
|
||||
- **WildBench:** 70.33%
|
||||
|
||||
**Architecture & Efficiency:**
|
||||
- 32 billion parameters, based on the Qwen3 Transformer architecture
|
||||
- Designed for deployment on NVIDIA GPUs (Ampere, Hopper, Turing)
|
||||
- Achieves performance comparable to DeepSeek R1 and O3-mini at less than 5% of the inference cost
|
||||
|
||||
**Use Case:**
|
||||
Ideal for applications requiring reliable, thoughtful, and safe responses—such as advanced chatbots, research assistants, and enterprise AI systems.
|
||||
|
||||
**Access & Usage:**
|
||||
Available on Hugging Face with support for Hugging Face Transformers and vLLM.
|
||||
**Cite:** [Wang et al., 2025 — RLBFF: Binary Flexible Feedback](https://arxiv.org/abs/2509.21319)
|
||||
|
||||
👉 *Note: The GGUF version (mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF) is a user-quantized variant. The original model is available at nvidia/Qwen3-Nemotron-32B-RLBFF.*
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
|
||||
sha256: 000e8c65299fc232d1a832f1cae831ceaa16425eccfb7d01702d73e8bd3eafee
|
||||
uri: huggingface://mradermacher/Qwen3-Nemotron-32B-RLBFF-i1-GGUF/Qwen3-Nemotron-32B-RLBFF.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *gptoss
|
||||
name: "financial-gpt-oss-20b-q8-i1"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/financial-gpt-oss-20b-q8-i1-GGUF
|
||||
description: |
|
||||
### **Financial GPT-OSS 20B (Base Model)**
|
||||
|
||||
**Model Type:** Causal Language Model (Fine-tuned for Financial Analysis)
|
||||
**Architecture:** Mixture of Experts (MoE) – 20B parameters, 32 experts (4 active per token)
|
||||
**Base Model:** `unsloth/gpt-oss-20b-unsloth-bnb-4bit`
|
||||
**Fine-tuned With:** LoRA (Low-Rank Adaptation) on financial conversation data
|
||||
**Training Data:** 22,250 financial dialogue pairs covering stocks (AAPL, NVDA, TSLA, etc.), technical analysis, risk assessment, and trading signals
|
||||
**Context Length:** 131,072 tokens
|
||||
**Quantization:** Q8_0 GGUF (for efficient inference)
|
||||
**License:** Apache 2.0
|
||||
|
||||
**Key Features:**
|
||||
- Specialized in financial market analysis: technical indicators (RSI, MACD), risk assessments, trading signals, and price forecasts
|
||||
- Handles complex financial queries with structured, actionable insights
|
||||
- Designed for real-time use with low-latency inference (GGUF format)
|
||||
- Supports S&P 500 stocks and major asset classes across tech, healthcare, energy, and finance sectors
|
||||
|
||||
**Use Case:** Ideal for traders, analysts, and developers building financial AI tools. Use with caution—**not financial advice**.
|
||||
|
||||
**Citation:**
|
||||
```bibtex
|
||||
@misc{financial-gpt-oss-20b-q8,
|
||||
title={Financial GPT-OSS 20B Q8: Fine-tuned Financial Analysis Model},
|
||||
author={beenyb},
|
||||
year={2025},
|
||||
publisher={Hugging Face Hub},
|
||||
url={https://huggingface.co/beenyb/financial-gpt-oss-20b-q8}
|
||||
}
|
||||
```
|
||||
overrides:
|
||||
parameters:
|
||||
model: financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
|
||||
sha256: 14586673de2a769f88bd51f88464b9b1f73d3ad986fa878b2e0c1473f1c1fc59
|
||||
uri: huggingface://mradermacher/financial-gpt-oss-20b-q8-i1-GGUF/financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "qwen3-grand-horror-light-1.7b"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF
|
||||
description: |
|
||||
**Model Name:** Qwen3-Grand-Horror-Light-1.7B
|
||||
**Base Model:** qingy2024/Qwen3-VLTO-1.7B-Instruct
|
||||
**Model Type:** Fine-tuned Language Model (Text Generation)
|
||||
**Size:** 1.7B parameters
|
||||
**License:** Apache 2.0
|
||||
**Language:** English
|
||||
**Use Case:** Horror storytelling, creative writing, roleplay, scene generation
|
||||
**Fine-Tuned On:** Custom horror dataset (`DavidAU/horror-nightmare1`)
|
||||
**Training Method:** Fine-tuned via Unsloth
|
||||
**Key Features:**
|
||||
- Specialized in generating atmospheric, intense horror content with elements of madness, gore, and suspense
|
||||
- Optimized for roleplay and narrative generation with low to medium horror intensity
|
||||
- Supports high-quality output across multiple quantization levels (Q2_K to Q8_0, f16)
|
||||
- Designed for use with tools like KoboldCpp, oobabooga/text-generation-webui, and Silly Tavern
|
||||
- Recommended settings: Temperature 0.4–1.2, Repetition penalty 1.1, Smoothing factor 1.5 for smoother output
|
||||
|
||||
**Note:** This model is a fine-tuned variant of the Qwen3 series, not a quantized version. The original base model is available at [qingy2024/Qwen3-VLTO-1.7B-Instruct](https://huggingface.co/qingy2024/Qwen3-VLTO-1.7B-Instruct) and was further adapted for horror-themed creative writing.
|
||||
|
||||
**Ideal For:** Creators, writers, and roleplayers seeking a compact, expressive model for immersive horror storytelling.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
|
||||
sha256: cbbb0c5f6874130a8ae253377fdc7ad25fa2c1e9bb45f1aaad88db853ef985dc
|
||||
uri: huggingface://mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF/Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf
|
||||
|
||||
@@ -6,15 +6,20 @@ config_file: |
|
||||
backend: "llama-cpp"
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
<|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
|
||||
{{ if eq .RoleName "tool" -}}
|
||||
<tool_response>
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if eq .RoleName "tool" -}}
|
||||
</tool_response>
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
<tool_call>
|
||||
{{toJson .FunctionCall}}
|
||||
</tool_call>
|
||||
{{ end -}}<|im_end|>
|
||||
function: |
|
||||
<|im_start|>system
|
||||
|
||||
Reference in New Issue
Block a user