add libx11

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
add libxcb
2026-02-05 12:12:39 -05:00 · 2025-09-28 20:30:31 +02:00 · 2025-09-28 18:59:12 +02:00 · 2025-09-28 18:34:35 +02:00 · 2025-09-28 18:24:56 +02:00 · 2025-09-28 18:20:11 +02:00
350 changed files with 11186 additions and 31440 deletions
--- a/.air.toml
+++ b/.air.toml
@@ -1,8 +0,0 @@
-# .air.toml
-[build]
-cmd = "make build"
-bin = "./local-ai"
-args_bin = [ "--debug" ]
-include_ext = ["go", "html", "yaml", "toml", "json", "txt", "md"]
-exclude_dir = ["pkg/grpc/proto"]
-delay = 1000
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -1,288 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"slices"
-	"strings"
-
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-	cogito "github.com/mudler/cogito"
-
-	"github.com/mudler/cogito/structures"
-	"github.com/sashabaranov/go-openai/jsonschema"
-)
-
-var (
-	openAIModel      = os.Getenv("OPENAI_MODEL")
-	openAIKey        = os.Getenv("OPENAI_KEY")
-	openAIBaseURL    = os.Getenv("OPENAI_BASE_URL")
-	galleryIndexPath = os.Getenv("GALLERY_INDEX_PATH")
-	//defaultclient
-	llm = cogito.NewOpenAILLM(openAIModel, openAIKey, openAIBaseURL)
-)
-
-// cleanTextContent removes trailing spaces, tabs, and normalizes line endings
-// to prevent YAML linting issues like trailing spaces and multiple empty lines
-func cleanTextContent(text string) string {
-	lines := strings.Split(text, "\n")
-	var cleanedLines []string
-	var prevEmpty bool
-	for _, line := range lines {
-		// Remove all trailing whitespace (spaces, tabs, etc.)
-		trimmed := strings.TrimRight(line, " \t\r")
-		// Avoid multiple consecutive empty lines
-		if trimmed == "" {
-			if !prevEmpty {
-				cleanedLines = append(cleanedLines, "")
-			}
-			prevEmpty = true
-		} else {
-			cleanedLines = append(cleanedLines, trimmed)
-			prevEmpty = false
-		}
-	}
-	// Remove trailing empty lines from the result
-	result := strings.Join(cleanedLines, "\n")
-	return strings.TrimRight(result, "\n")
-}
-
-// isModelExisting checks if a specific model ID exists in the gallery using text search
-func isModelExisting(modelID string) (bool, error) {
-	indexPath := getGalleryIndexPath()
-	content, err := os.ReadFile(indexPath)
-	if err != nil {
-		return false, fmt.Errorf("failed to read %s: %w", indexPath, err)
-	}
-
-	contentStr := string(content)
-	// Simple text search - if the model ID appears anywhere in the file, it exists
-	return strings.Contains(contentStr, modelID), nil
-}
-
-// filterExistingModels removes models that already exist in the gallery
-func filterExistingModels(models []ProcessedModel) ([]ProcessedModel, error) {
-	var filteredModels []ProcessedModel
-	for _, model := range models {
-		exists, err := isModelExisting(model.ModelID)
-		if err != nil {
-			fmt.Printf("Error checking if model %s exists: %v, skipping\n", model.ModelID, err)
-			continue
-		}
-
-		if !exists {
-			filteredModels = append(filteredModels, model)
-		} else {
-			fmt.Printf("Skipping existing model: %s\n", model.ModelID)
-		}
-	}
-
-	fmt.Printf("Filtered out %d existing models, %d new models remaining\n",
-		len(models)-len(filteredModels), len(filteredModels))
-
-	return filteredModels, nil
-}
-
-// getGalleryIndexPath returns the gallery index file path, with a default fallback
-func getGalleryIndexPath() string {
-	if galleryIndexPath != "" {
-		return galleryIndexPath
-	}
-	return "gallery/index.yaml"
-}
-
-func getRealReadme(ctx context.Context, repository string) (string, error) {
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to get a clear description of a large language model from huggingface by using the provided tool. I will share with you a repository that might be quantized, and as such probably not by the original model author. We need to get the real  description of the model, and not the one that might be quantized. You will have to call the tool to get the readme more than once by figuring out from the quantized readme which is the base model readme. This is the repository: `+repository)
-
-	// Execute with tools
-	result, err := cogito.ExecuteTools(llm, fragment,
-		cogito.WithIterations(3),
-		cogito.WithMaxAttempts(3),
-		cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
-	if err != nil {
-		return "", err
-	}
-
-	result = result.AddMessage("user", "Describe the model in a clear and concise way that can be shared in a model gallery.")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, result)
-	if err != nil {
-		return "", err
-	}
-
-	content := newFragment.LastMessage().Content
-	return cleanTextContent(content), nil
-}
-
-func selectMostInterestingModels(ctx context.Context, searchResult *SearchResult) ([]ProcessedModel, error) {
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to analyze a list of AI models and select the most interesting ones for a model gallery. You will be given detailed information about multiple models including their metadata, file information, and README content.
-
-Consider the following criteria when selecting models:
-1. Model popularity (download count)
-2. Model recency (last modified date)
-3. Model completeness (has preferred model file, README, etc.)
-4. Model uniqueness (not duplicates or very similar models)
-5. Model quality (based on README content and description)
-6. Model utility (practical applications)
-
-You should select models that would be most valuable for users browsing a model gallery. Prioritize models that are:
- Well-documented with clear READMEs
- Recently updated
- Popular (high download count)
- Have the preferred quantization format available
- Offer unique capabilities or are from reputable authors
-
-Return your analysis and selection reasoning.`)
-
-	// Add the search results as context
-	modelsInfo := fmt.Sprintf("Found %d models matching '%s' with quantization preference '%s':\n\n",
-		searchResult.TotalModelsFound, searchResult.SearchTerm, searchResult.Quantization)
-
-	for i, model := range searchResult.Models {
-		modelsInfo += fmt.Sprintf("Model %d:\n", i+1)
-		modelsInfo += fmt.Sprintf("  ID: %s\n", model.ModelID)
-		modelsInfo += fmt.Sprintf("  Author: %s\n", model.Author)
-		modelsInfo += fmt.Sprintf("  Downloads: %d\n", model.Downloads)
-		modelsInfo += fmt.Sprintf("  Last Modified: %s\n", model.LastModified)
-		modelsInfo += fmt.Sprintf("  Files: %d files\n", len(model.Files))
-
-		if model.PreferredModelFile != nil {
-			modelsInfo += fmt.Sprintf("  Preferred Model File: %s (%d bytes)\n",
-				model.PreferredModelFile.Path, model.PreferredModelFile.Size)
-		} else {
-			modelsInfo += "  No preferred model file found\n"
-		}
-
-		if model.ReadmeContent != "" {
-			modelsInfo += fmt.Sprintf("  README: %s\n", model.ReadmeContent)
-		}
-
-		if model.ProcessingError != "" {
-			modelsInfo += fmt.Sprintf("  Processing Error: %s\n", model.ProcessingError)
-		}
-
-		modelsInfo += "\n"
-	}
-
-	fragment = fragment.AddMessage("user", modelsInfo)
-
-	fragment = fragment.AddMessage("user", "Based on your analysis, select the top 5 most interesting models and provide a brief explanation for each selection. Also, create a filtered SearchResult with only the selected models. Return just a list of repositories IDs, you will later be asked to output it as a JSON array with the json tool.")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, fragment)
-	if err != nil {
-		return nil, err
-	}
-
-	fmt.Println(newFragment.LastMessage().Content)
-	repositories := struct {
-		Repositories []string `json:"repositories"`
-	}{}
-
-	s := structures.Structure{
-		Schema: jsonschema.Definition{
-			Type:                 jsonschema.Object,
-			AdditionalProperties: false,
-			Properties: map[string]jsonschema.Definition{
-				"repositories": {
-					Type:        jsonschema.Array,
-					Items:       &jsonschema.Definition{Type: jsonschema.String},
-					Description: "The trending repositories IDs",
-				},
-			},
-			Required: []string{"repositories"},
-		},
-		Object: &repositories,
-	}
-
-	err = newFragment.ExtractStructure(ctx, llm, s)
-	if err != nil {
-		return nil, err
-	}
-
-	filteredModels := []ProcessedModel{}
-	for _, m := range searchResult.Models {
-		if slices.Contains(repositories.Repositories, m.ModelID) {
-			filteredModels = append(filteredModels, m)
-		}
-	}
-
-	return filteredModels, nil
-}
-
-// ModelFamily represents a YAML anchor/family
-type ModelFamily struct {
-	Anchor string `json:"anchor"`
-	Name   string `json:"name"`
-}
-
-// selectModelFamily selects the appropriate model family/anchor for a given model
-func selectModelFamily(ctx context.Context, model ProcessedModel, availableFamilies []ModelFamily) (string, error) {
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to select the most appropriate model family/anchor for a given AI model. You will be provided with:
-1. Information about the model (name, description, etc.)
-2. A list of available model families/anchors
-
-You need to select the family that best matches the model's architecture, capabilities, or characteristics. Consider:
- Model architecture (e.g., Llama, Qwen, Mistral, etc.)
- Model capabilities (e.g., vision, coding, chat, etc.)
- Model size/type (e.g., small, medium, large)
- Model purpose (e.g., general purpose, specialized, etc.)
-
-Return the anchor name that best fits the model.`)
-
-	// Add model information
-	modelInfo := "Model Information:\n"
-	modelInfo += fmt.Sprintf("  ID: %s\n", model.ModelID)
-	modelInfo += fmt.Sprintf("  Author: %s\n", model.Author)
-	modelInfo += fmt.Sprintf("  Downloads: %d\n", model.Downloads)
-	modelInfo += fmt.Sprintf("  Description: %s\n", model.ReadmeContentPreview)
-
-	fragment = fragment.AddMessage("user", modelInfo)
-
-	// Add available families
-	familiesInfo := "Available Model Families:\n"
-	for _, family := range availableFamilies {
-		familiesInfo += fmt.Sprintf("  - %s (%s)\n", family.Anchor, family.Name)
-	}
-
-	fragment = fragment.AddMessage("user", familiesInfo)
-	fragment = fragment.AddMessage("user", "Select the most appropriate family anchor for this model. Return just the anchor name.")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, fragment)
-	if err != nil {
-		return "", err
-	}
-
-	// Extract the selected family
-	selectedFamily := strings.TrimSpace(newFragment.LastMessage().Content)
-
-	// Validate that the selected family exists in our list
-	for _, family := range availableFamilies {
-		if family.Anchor == selectedFamily {
-			return selectedFamily, nil
-		}
-	}
-
-	// If no exact match, try to find a close match
-	for _, family := range availableFamilies {
-		if strings.Contains(strings.ToLower(family.Anchor), strings.ToLower(selectedFamily)) ||
-			strings.Contains(strings.ToLower(selectedFamily), strings.ToLower(family.Anchor)) {
-			return family.Anchor, nil
-		}
-	}
-
-	// Default fallback
-	return "llama3", nil
-}
--- a/.github/gallery-agent/gallery.go
+++ b/.github/gallery-agent/gallery.go
@@ -1,203 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"strings"
-)
-
-// generateYAMLEntry generates a YAML entry for a model using the specified anchor
-func generateYAMLEntry(model ProcessedModel, familyAnchor string) string {
-	// Extract model name from ModelID
-	parts := strings.Split(model.ModelID, "/")
-	modelName := model.ModelID
-	if len(parts) > 0 {
-		modelName = strings.ToLower(parts[len(parts)-1])
-	}
-	// Remove common suffixes
-	modelName = strings.ReplaceAll(modelName, "-gguf", "")
-	modelName = strings.ReplaceAll(modelName, "-q4_k_m", "")
-	modelName = strings.ReplaceAll(modelName, "-q4_k_s", "")
-	modelName = strings.ReplaceAll(modelName, "-q3_k_m", "")
-	modelName = strings.ReplaceAll(modelName, "-q2_k", "")
-
-	fileName := ""
-	checksum := ""
-	if model.PreferredModelFile != nil {
-		fileParts := strings.Split(model.PreferredModelFile.Path, "/")
-		if len(fileParts) > 0 {
-			fileName = fileParts[len(fileParts)-1]
-		}
-		checksum = model.PreferredModelFile.SHA256
-	} else {
-		fileName = model.ModelID
-	}
-
-	description := model.ReadmeContent
-	if description == "" {
-		description = fmt.Sprintf("AI model: %s", modelName)
-	}
-
-	// Clean up description to prevent YAML linting issues
-	description = cleanTextContent(description)
-
-	// Format description for YAML (indent each line and ensure no trailing spaces)
-	lines := strings.Split(description, "\n")
-	var formattedLines []string
-	for _, line := range lines {
-		if strings.TrimSpace(line) == "" {
-			// Keep empty lines as empty (no indentation)
-			formattedLines = append(formattedLines, "")
-		} else {
-			// Add indentation to non-empty lines
-			formattedLines = append(formattedLines, "    "+line)
-		}
-	}
-	formattedDescription := strings.Join(formattedLines, "\n")
-	// Remove any trailing spaces from the formatted description
-	formattedDescription = strings.TrimRight(formattedDescription, " \t")
-	yamlTemplate := ""
-	if checksum != "" {
-		yamlTemplate = `- !!merge <<: *%s
-  name: "%s"
-  urls:
-    - https://huggingface.co/%s
-  description: |
-%s
-  overrides:
-    parameters:
-      model: %s
-  files:
-    - filename: %s
-      sha256: %s
-      uri: huggingface://%s/%s`
-		return fmt.Sprintf(yamlTemplate,
-			familyAnchor,
-			modelName,
-			model.ModelID,
-			formattedDescription,
-			fileName,
-			fileName,
-			checksum,
-			model.ModelID,
-			fileName,
-		)
-	} else {
-		yamlTemplate = `- !!merge <<: *%s
-  name: "%s"
-  urls:
-    - https://huggingface.co/%s
-  description: |
-%s
-  overrides:
-    parameters:
-      model: %s`
-		return fmt.Sprintf(yamlTemplate,
-			familyAnchor,
-			modelName,
-			model.ModelID,
-			formattedDescription,
-			fileName,
-		)
-	}
-}
-
-// extractModelFamilies extracts all YAML anchors from the gallery index.yaml file
-func extractModelFamilies() ([]ModelFamily, error) {
-	// Read the index.yaml file
-	indexPath := getGalleryIndexPath()
-	content, err := os.ReadFile(indexPath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read %s: %w", indexPath, err)
-	}
-
-	lines := strings.Split(string(content), "\n")
-	var families []ModelFamily
-
-	for _, line := range lines {
-		line = strings.TrimSpace(line)
-		// Look for YAML anchors (lines starting with "- &")
-		if strings.HasPrefix(line, "- &") {
-			// Extract the anchor name (everything after "- &")
-			anchor := strings.TrimPrefix(line, "- &")
-			// Remove any trailing colon or other characters
-			anchor = strings.Split(anchor, ":")[0]
-			anchor = strings.Split(anchor, " ")[0]
-
-			if anchor != "" {
-				families = append(families, ModelFamily{
-					Anchor: anchor,
-					Name:   anchor, // Use anchor as name for now
-				})
-			}
-		}
-	}
-
-	return families, nil
-}
-
-// generateYAMLForModels generates YAML entries for selected models and appends to index.yaml
-func generateYAMLForModels(ctx context.Context, models []ProcessedModel) error {
-	// Extract available model families
-	families, err := extractModelFamilies()
-	if err != nil {
-		return fmt.Errorf("failed to extract model families: %w", err)
-	}
-
-	fmt.Printf("Found %d model families: %v\n", len(families),
-		func() []string {
-			var names []string
-			for _, f := range families {
-				names = append(names, f.Anchor)
-			}
-			return names
-		}())
-
-	// Generate YAML entries for each model
-	var yamlEntries []string
-	for _, model := range models {
-		fmt.Printf("Selecting family for model: %s\n", model.ModelID)
-
-		// Select appropriate family for this model
-		familyAnchor, err := selectModelFamily(ctx, model, families)
-		if err != nil {
-			fmt.Printf("Error selecting family for %s: %v, using default\n", model.ModelID, err)
-			familyAnchor = "llama3" // Default fallback
-		}
-
-		fmt.Printf("Selected family '%s' for model %s\n", familyAnchor, model.ModelID)
-
-		// Generate YAML entry
-		yamlEntry := generateYAMLEntry(model, familyAnchor)
-		yamlEntries = append(yamlEntries, yamlEntry)
-	}
-
-	// Append to index.yaml
-	if len(yamlEntries) > 0 {
-		indexPath := getGalleryIndexPath()
-		fmt.Printf("Appending YAML entries to %s...\n", indexPath)
-
-		// Read current content
-		content, err := os.ReadFile(indexPath)
-		if err != nil {
-			return fmt.Errorf("failed to read %s: %w", indexPath, err)
-		}
-
-		// Append new entries
-		// Remove trailing whitespace from existing content and join entries without extra newlines
-		existingContent := strings.TrimRight(string(content), " \t\n\r")
-		yamlBlock := strings.Join(yamlEntries, "\n")
-		newContent := existingContent + "\n" + yamlBlock + "\n"
-
-		// Write back to file
-		err = os.WriteFile(indexPath, []byte(newContent), 0644)
-		if err != nil {
-			return fmt.Errorf("failed to write %s: %w", indexPath, err)
-		}
-
-		fmt.Printf("Successfully added %d models to %s\n", len(yamlEntries), indexPath)
-	}
-
-	return nil
-}
--- a/.github/gallery-agent/main.go
+++ b/.github/gallery-agent/main.go
@@ -1,351 +0,0 @@
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-	"time"
-
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-)
-
-// ProcessedModelFile represents a processed model file with additional metadata
-type ProcessedModelFile struct {
-	Path     string `json:"path"`
-	Size     int64  `json:"size"`
-	SHA256   string `json:"sha256"`
-	IsReadme bool   `json:"is_readme"`
-	FileType string `json:"file_type"` // "model", "readme", "other"
-}
-
-// ProcessedModel represents a processed model with all gathered metadata
-type ProcessedModel struct {
-	ModelID                 string               `json:"model_id"`
-	Author                  string               `json:"author"`
-	Downloads               int                  `json:"downloads"`
-	LastModified            string               `json:"last_modified"`
-	Files                   []ProcessedModelFile `json:"files"`
-	PreferredModelFile      *ProcessedModelFile  `json:"preferred_model_file,omitempty"`
-	ReadmeFile              *ProcessedModelFile  `json:"readme_file,omitempty"`
-	ReadmeContent           string               `json:"readme_content,omitempty"`
-	ReadmeContentPreview    string               `json:"readme_content_preview,omitempty"`
-	QuantizationPreferences []string             `json:"quantization_preferences"`
-	ProcessingError         string               `json:"processing_error,omitempty"`
-}
-
-// SearchResult represents the complete result of searching and processing models
-type SearchResult struct {
-	SearchTerm       string           `json:"search_term"`
-	Limit            int              `json:"limit"`
-	Quantization     string           `json:"quantization"`
-	TotalModelsFound int              `json:"total_models_found"`
-	Models           []ProcessedModel `json:"models"`
-	FormattedOutput  string           `json:"formatted_output"`
-}
-
-// AddedModelSummary represents a summary of models added to the gallery
-type AddedModelSummary struct {
-	SearchTerm     string   `json:"search_term"`
-	TotalFound     int      `json:"total_found"`
-	ModelsAdded    int      `json:"models_added"`
-	AddedModelIDs  []string `json:"added_model_ids"`
-	AddedModelURLs []string `json:"added_model_urls"`
-	Quantization   string   `json:"quantization"`
-	ProcessingTime string   `json:"processing_time"`
-}
-
-func main() {
-	startTime := time.Now()
-
-	// Check for synthetic mode
-	syntheticMode := os.Getenv("SYNTHETIC_MODE")
-	if syntheticMode == "true" || syntheticMode == "1" {
-		fmt.Println("Running in SYNTHETIC MODE - generating random test data")
-		err := runSyntheticMode()
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error in synthetic mode: %v\n", err)
-			os.Exit(1)
-		}
-		return
-	}
-
-	// Get configuration from environment variables
-	searchTerm := os.Getenv("SEARCH_TERM")
-	if searchTerm == "" {
-		searchTerm = "GGUF"
-	}
-
-	limitStr := os.Getenv("LIMIT")
-	if limitStr == "" {
-		limitStr = "5"
-	}
-	limit, err := strconv.Atoi(limitStr)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error parsing LIMIT: %v\n", err)
-		os.Exit(1)
-	}
-
-	quantization := os.Getenv("QUANTIZATION")
-
-	maxModels := os.Getenv("MAX_MODELS")
-	if maxModels == "" {
-		maxModels = "1"
-	}
-	maxModelsInt, err := strconv.Atoi(maxModels)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error parsing MAX_MODELS: %v\n", err)
-		os.Exit(1)
-	}
-
-	// Print configuration
-	fmt.Printf("Gallery Agent Configuration:\n")
-	fmt.Printf("  Search Term: %s\n", searchTerm)
-	fmt.Printf("  Limit: %d\n", limit)
-	fmt.Printf("  Quantization: %s\n", quantization)
-	fmt.Printf("  Max Models to Add: %d\n", maxModelsInt)
-	fmt.Printf("  Gallery Index Path: %s\n", os.Getenv("GALLERY_INDEX_PATH"))
-	fmt.Println()
-
-	result, err := searchAndProcessModels(searchTerm, limit, quantization)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
-		os.Exit(1)
-	}
-
-	fmt.Println(result.FormattedOutput)
-
-	// Use AI agent to select the most interesting models
-	fmt.Println("Using AI agent to select the most interesting models...")
-	models, err := selectMostInterestingModels(context.Background(), result)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error in model selection: %v\n", err)
-		// Continue with original result if selection fails
-		models = result.Models
-	}
-
-	fmt.Print(models)
-
-	// Filter out models that already exist in the gallery
-	fmt.Println("Filtering out existing models...")
-	models, err = filterExistingModels(models)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error filtering existing models: %v\n", err)
-		os.Exit(1)
-	}
-
-	// Limit to maxModelsInt after filtering
-	if len(models) > maxModelsInt {
-		models = models[:maxModelsInt]
-	}
-
-	// Track added models for summary
-	var addedModelIDs []string
-	var addedModelURLs []string
-
-	// Generate YAML entries and append to gallery/index.yaml
-	if len(models) > 0 {
-		for _, model := range models {
-			addedModelIDs = append(addedModelIDs, model.ModelID)
-			// Generate Hugging Face URL for the model
-			modelURL := fmt.Sprintf("https://huggingface.co/%s", model.ModelID)
-			addedModelURLs = append(addedModelURLs, modelURL)
-		}
-		fmt.Println("Generating YAML entries for selected models...")
-		err = generateYAMLForModels(context.Background(), models)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error generating YAML entries: %v\n", err)
-			os.Exit(1)
-		}
-	} else {
-		fmt.Println("No new models to add to the gallery.")
-	}
-
-	// Create and write summary
-	processingTime := time.Since(startTime).String()
-	summary := AddedModelSummary{
-		SearchTerm:     searchTerm,
-		TotalFound:     result.TotalModelsFound,
-		ModelsAdded:    len(addedModelIDs),
-		AddedModelIDs:  addedModelIDs,
-		AddedModelURLs: addedModelURLs,
-		Quantization:   quantization,
-		ProcessingTime: processingTime,
-	}
-
-	// Write summary to file
-	summaryData, err := json.MarshalIndent(summary, "", "  ")
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error marshaling summary: %v\n", err)
-	} else {
-		err = os.WriteFile("gallery-agent-summary.json", summaryData, 0644)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error writing summary file: %v\n", err)
-		} else {
-			fmt.Printf("Summary written to gallery-agent-summary.json\n")
-		}
-	}
-}
-
-func searchAndProcessModels(searchTerm string, limit int, quantization string) (*SearchResult, error) {
-	client := hfapi.NewClient()
-	var outputBuilder strings.Builder
-
-	fmt.Println("Searching for models...")
-	// Initialize the result struct
-	result := &SearchResult{
-		SearchTerm:   searchTerm,
-		Limit:        limit,
-		Quantization: quantization,
-		Models:       []ProcessedModel{},
-	}
-
-	models, err := client.GetLatest(searchTerm, limit)
-	if err != nil {
-		return nil, fmt.Errorf("failed to fetch models: %w", err)
-	}
-
-	fmt.Println("Models found:", len(models))
-	result.TotalModelsFound = len(models)
-
-	if len(models) == 0 {
-		outputBuilder.WriteString("No models found.\n")
-		result.FormattedOutput = outputBuilder.String()
-		return result, nil
-	}
-
-	outputBuilder.WriteString(fmt.Sprintf("Found %d models matching '%s':\n\n", len(models), searchTerm))
-
-	// Process each model
-	for i, model := range models {
-		outputBuilder.WriteString(fmt.Sprintf("%d. Processing Model: %s\n", i+1, model.ModelID))
-		outputBuilder.WriteString(fmt.Sprintf("   Author: %s\n", model.Author))
-		outputBuilder.WriteString(fmt.Sprintf("   Downloads: %d\n", model.Downloads))
-		outputBuilder.WriteString(fmt.Sprintf("   Last Modified: %s\n", model.LastModified))
-
-		// Initialize processed model struct
-		processedModel := ProcessedModel{
-			ModelID:                 model.ModelID,
-			Author:                  model.Author,
-			Downloads:               model.Downloads,
-			LastModified:            model.LastModified,
-			QuantizationPreferences: []string{quantization, "Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"},
-		}
-
-		// Get detailed model information
-		details, err := client.GetModelDetails(model.ModelID)
-		if err != nil {
-			errorMsg := fmt.Sprintf("   Error getting model details: %v\n", err)
-			outputBuilder.WriteString(errorMsg)
-			processedModel.ProcessingError = err.Error()
-			result.Models = append(result.Models, processedModel)
-			continue
-		}
-
-		// Define quantization preferences (in order of preference)
-		quantizationPreferences := []string{quantization, "Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"}
-
-		// Find preferred model file
-		preferredModelFile := hfapi.FindPreferredModelFile(details.Files, quantizationPreferences)
-
-		// Process files
-		processedFiles := make([]ProcessedModelFile, len(details.Files))
-		for j, file := range details.Files {
-			fileType := "other"
-			if file.IsReadme {
-				fileType = "readme"
-			} else if preferredModelFile != nil && file.Path == preferredModelFile.Path {
-				fileType = "model"
-			}
-
-			processedFiles[j] = ProcessedModelFile{
-				Path:     file.Path,
-				Size:     file.Size,
-				SHA256:   file.SHA256,
-				IsReadme: file.IsReadme,
-				FileType: fileType,
-			}
-		}
-
-		processedModel.Files = processedFiles
-
-		// Set preferred model file
-		if preferredModelFile != nil {
-			for _, file := range processedFiles {
-				if file.Path == preferredModelFile.Path {
-					processedModel.PreferredModelFile = &file
-					break
-				}
-			}
-		}
-
-		// Print file information
-		outputBuilder.WriteString(fmt.Sprintf("   Files found: %d\n", len(details.Files)))
-
-		if preferredModelFile != nil {
-			outputBuilder.WriteString(fmt.Sprintf("   Preferred Model File: %s (SHA256: %s)\n",
-				preferredModelFile.Path,
-				preferredModelFile.SHA256))
-		} else {
-			outputBuilder.WriteString(fmt.Sprintf("   No model file found with quantization preferences: %v\n", quantizationPreferences))
-		}
-
-		if details.ReadmeFile != nil {
-			outputBuilder.WriteString(fmt.Sprintf("   README File: %s\n", details.ReadmeFile.Path))
-
-			// Find and set readme file
-			for _, file := range processedFiles {
-				if file.IsReadme {
-					processedModel.ReadmeFile = &file
-					break
-				}
-			}
-
-			fmt.Println("Getting real readme for", model.ModelID, "waiting...")
-			// Use agent to get the real readme and prepare the model description
-			readmeContent, err := getRealReadme(context.Background(), model.ModelID)
-			if err == nil {
-				processedModel.ReadmeContent = readmeContent
-				processedModel.ReadmeContentPreview = truncateString(readmeContent, 200)
-				outputBuilder.WriteString(fmt.Sprintf("   README Content Preview: %s\n",
-					processedModel.ReadmeContentPreview))
-			} else {
-				continue
-			}
-			fmt.Println("Real readme got", readmeContent)
-			// Get README content
-			// readmeContent, err := client.GetReadmeContent(model.ModelID, details.ReadmeFile.Path)
-			// if err == nil {
-			// 	processedModel.ReadmeContent = readmeContent
-			// 	processedModel.ReadmeContentPreview = truncateString(readmeContent, 200)
-			// 	outputBuilder.WriteString(fmt.Sprintf("   README Content Preview: %s\n",
-			// 		processedModel.ReadmeContentPreview))
-			// }
-		}
-
-		// Print all files with their checksums
-		outputBuilder.WriteString("   All Files:\n")
-		for _, file := range processedFiles {
-			outputBuilder.WriteString(fmt.Sprintf("     - %s (%s, %d bytes", file.Path, file.FileType, file.Size))
-			if file.SHA256 != "" {
-				outputBuilder.WriteString(fmt.Sprintf(", SHA256: %s", file.SHA256))
-			}
-			outputBuilder.WriteString(")\n")
-		}
-
-		outputBuilder.WriteString("\n")
-		result.Models = append(result.Models, processedModel)
-	}
-
-	result.FormattedOutput = outputBuilder.String()
-	return result, nil
-}
-
-func truncateString(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
-	}
-	return s[:maxLen] + "..."
-}
--- a/.github/gallery-agent/testing.go
+++ b/.github/gallery-agent/testing.go
@@ -1,190 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"math/rand"
-	"strings"
-	"time"
-)
-
-// runSyntheticMode generates synthetic test data and appends it to the gallery
-func runSyntheticMode() error {
-	generator := NewSyntheticDataGenerator()
-
-	// Generate a random number of synthetic models (1-3)
-	numModels := generator.rand.Intn(3) + 1
-	fmt.Printf("Generating %d synthetic models for testing...\n", numModels)
-
-	var models []ProcessedModel
-	for i := 0; i < numModels; i++ {
-		model := generator.GenerateProcessedModel()
-		models = append(models, model)
-		fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
-	}
-
-	// Generate YAML entries and append to gallery/index.yaml
-	fmt.Println("Generating YAML entries for synthetic models...")
-	err := generateYAMLForModels(context.Background(), models)
-	if err != nil {
-		return fmt.Errorf("error generating YAML entries: %w", err)
-	}
-
-	fmt.Printf("Successfully added %d synthetic models to the gallery for testing!\n", len(models))
-	return nil
-}
-
-// SyntheticDataGenerator provides methods to generate synthetic test data
-type SyntheticDataGenerator struct {
-	rand *rand.Rand
-}
-
-// NewSyntheticDataGenerator creates a new synthetic data generator
-func NewSyntheticDataGenerator() *SyntheticDataGenerator {
-	return &SyntheticDataGenerator{
-		rand: rand.New(rand.NewSource(time.Now().UnixNano())),
-	}
-}
-
-// GenerateProcessedModelFile creates a synthetic ProcessedModelFile
-func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile {
-	fileTypes := []string{"model", "readme", "other"}
-	fileType := fileTypes[g.rand.Intn(len(fileTypes))]
-
-	var path string
-	var isReadme bool
-
-	switch fileType {
-	case "model":
-		path = fmt.Sprintf("model-%s.gguf", g.randomString(8))
-		isReadme = false
-	case "readme":
-		path = "README.md"
-		isReadme = true
-	default:
-		path = fmt.Sprintf("file-%s.txt", g.randomString(6))
-		isReadme = false
-	}
-
-	return ProcessedModelFile{
-		Path:     path,
-		Size:     int64(g.rand.Intn(1000000000) + 1000000), // 1MB to 1GB
-		SHA256:   g.randomSHA256(),
-		IsReadme: isReadme,
-		FileType: fileType,
-	}
-}
-
-// GenerateProcessedModel creates a synthetic ProcessedModel
-func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel {
-	authors := []string{"microsoft", "meta", "google", "openai", "anthropic", "mistralai", "huggingface"}
-	modelNames := []string{"llama", "gpt", "claude", "mistral", "gemma", "phi", "qwen", "codellama"}
-
-	author := authors[g.rand.Intn(len(authors))]
-	modelName := modelNames[g.rand.Intn(len(modelNames))]
-	modelID := fmt.Sprintf("%s/%s-%s", author, modelName, g.randomString(6))
-
-	// Generate files
-	numFiles := g.rand.Intn(5) + 2 // 2-6 files
-	files := make([]ProcessedModelFile, numFiles)
-
-	// Ensure at least one model file and one readme
-	hasModelFile := false
-	hasReadme := false
-
-	for i := 0; i < numFiles; i++ {
-		files[i] = g.GenerateProcessedModelFile()
-		if files[i].FileType == "model" {
-			hasModelFile = true
-		}
-		if files[i].FileType == "readme" {
-			hasReadme = true
-		}
-	}
-
-	// Add required files if missing
-	if !hasModelFile {
-		modelFile := g.GenerateProcessedModelFile()
-		modelFile.FileType = "model"
-		modelFile.Path = fmt.Sprintf("%s-Q4_K_M.gguf", modelName)
-		files = append(files, modelFile)
-	}
-
-	if !hasReadme {
-		readmeFile := g.GenerateProcessedModelFile()
-		readmeFile.FileType = "readme"
-		readmeFile.Path = "README.md"
-		readmeFile.IsReadme = true
-		files = append(files, readmeFile)
-	}
-
-	// Find preferred model file
-	var preferredModelFile *ProcessedModelFile
-	for i := range files {
-		if files[i].FileType == "model" {
-			preferredModelFile = &files[i]
-			break
-		}
-	}
-
-	// Find readme file
-	var readmeFile *ProcessedModelFile
-	for i := range files {
-		if files[i].FileType == "readme" {
-			readmeFile = &files[i]
-			break
-		}
-	}
-
-	readmeContent := g.generateReadmeContent(modelName, author)
-
-	return ProcessedModel{
-		ModelID:                 modelID,
-		Author:                  author,
-		Downloads:               g.rand.Intn(1000000) + 1000,
-		LastModified:            g.randomDate(),
-		Files:                   files,
-		PreferredModelFile:      preferredModelFile,
-		ReadmeFile:              readmeFile,
-		ReadmeContent:           readmeContent,
-		ReadmeContentPreview:    truncateString(readmeContent, 200),
-		QuantizationPreferences: []string{"Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"},
-		ProcessingError:         "",
-	}
-}
-
-// Helper methods for synthetic data generation
-func (g *SyntheticDataGenerator) randomString(length int) string {
-	const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
-	b := make([]byte, length)
-	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
-	}
-	return string(b)
-}
-
-func (g *SyntheticDataGenerator) randomSHA256() string {
-	const charset = "0123456789abcdef"
-	b := make([]byte, 64)
-	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
-	}
-	return string(b)
-}
-
-func (g *SyntheticDataGenerator) randomDate() string {
-	now := time.Now()
-	daysAgo := g.rand.Intn(365) // Random date within last year
-	pastDate := now.AddDate(0, 0, -daysAgo)
-	return pastDate.Format("2006-01-02T15:04:05.000Z")
-}
-
-func (g *SyntheticDataGenerator) generateReadmeContent(modelName, author string) string {
-	templates := []string{
-		fmt.Sprintf("# %s Model\n\nThis is a %s model developed by %s. It's designed for various natural language processing tasks including text generation, question answering, and conversation.\n\n## Features\n\n- High-quality text generation\n- Efficient inference\n- Multiple quantization options\n- Easy to use with LocalAI\n\n## Usage\n\nUse this model with LocalAI for various AI tasks.", strings.Title(modelName), modelName, author),
-		fmt.Sprintf("# %s\n\nA powerful language model from %s. This model excels at understanding and generating human-like text across multiple domains.\n\n## Capabilities\n\n- Text completion\n- Code generation\n- Creative writing\n- Technical documentation\n\n## Model Details\n\n- Architecture: Transformer-based\n- Training: Large-scale supervised learning\n- Quantization: Available in multiple formats", strings.Title(modelName), author),
-		fmt.Sprintf("# %s Language Model\n\nDeveloped by %s, this model represents state-of-the-art performance in natural language understanding and generation.\n\n## Key Features\n\n- Multilingual support\n- Context-aware responses\n- Efficient memory usage\n- Fast inference speed\n\n## Applications\n\n- Chatbots and virtual assistants\n- Content generation\n- Code completion\n- Educational tools", strings.Title(modelName), author),
-	}
-
-	return templates[g.rand.Intn(len(templates))]
-}
--- a/.github/gallery-agent/tools.go
+++ b/.github/gallery-agent/tools.go
@@ -1,46 +0,0 @@
-package main
-
-import (
-	"fmt"
-
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-	openai "github.com/sashabaranov/go-openai"
-	jsonschema "github.com/sashabaranov/go-openai/jsonschema"
-)
-
-// Get repository README from HF
-type HFReadmeTool struct {
-	client *hfapi.Client
-}
-
-func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
-	q, ok := args["repository"].(string)
-	if !ok {
-		return "", fmt.Errorf("no query")
-	}
-	readme, err := s.client.GetReadmeContent(q, "README.md")
-	if err != nil {
-		return "", err
-	}
-	return readme, nil
-}
-
-func (s *HFReadmeTool) Tool() openai.Tool {
-	return openai.Tool{
-		Type: openai.ToolTypeFunction,
-		Function: &openai.FunctionDefinition{
-			Name:        "hf_readme",
-			Description: "A tool to get the README content of a huggingface repository",
-			Parameters: jsonschema.Definition{
-				Type: jsonschema.Object,
-				Properties: map[string]jsonschema.Definition{
-					"repository": {
-						Type:        jsonschema.String,
-						Description: "The huggingface repository to get the README content of",
-					},
-				},
-				Required: []string{"repository"},
-			},
-		},
-	}
-}
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -97,7 +97,7 @@ jobs:
          && sudo apt-get install -y git

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -50,7 +50,7 @@ jobs:
        go-version: ['${{ inputs.go-version }}']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true

@@ -74,7 +74,7 @@ jobs:
          BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend

      - name: Upload ${{ inputs.backend }}.tar
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: ${{ inputs.backend }}-tar
          path: backend-images/${{ inputs.backend }}.tar
@@ -85,7 +85,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Download ${{ inputs.backend }}.tar
-        uses: actions/download-artifact@v6
+        uses: actions/download-artifact@v5
        with:
          name: ${{ inputs.backend }}-tar
          path: .
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -17,7 +17,7 @@ jobs:
      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -11,13 +11,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
-          go-version: 1.25
+          go-version: 1.23
      - name: Run GoReleaser
        run: |
          make dev-dist
@@ -25,19 +25,19 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
-          go-version: 1.25
+          go-version: 1.23
      - name: Build launcher for macOS ARM64
        run: |
          make build-launcher-darwin
          ls -liah dist
      - name: Upload macOS launcher artifacts
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: launcher-macos
          path: dist/
@@ -47,20 +47,20 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
-          go-version: 1.25
+          go-version: 1.23
      - name: Build launcher for Linux
        run: |
          sudo apt-get update
          sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
          make build-launcher-linux
      - name: Upload Linux launcher artifacts
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: launcher-linux
          path: local-ai-launcher-linux.tar.xz
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -1,10 +1,10 @@
-name: Bump Backend dependencies
+name: Bump dependencies
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump-backends:
+  bump:
    strategy:
      fail-fast: false
      matrix:
@@ -31,7 +31,7 @@ jobs:
            file: "backend/go/piper/Makefile"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Bump dependencies 🔧
        id: bump
        run: |
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -1,10 +1,10 @@
-name: Bump Documentation
+name: Bump dependencies
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump-docs:
+  bump:
    strategy:
      fail-fast: false
      matrix:
@@ -12,7 +12,7 @@ jobs:
          - repository: "mudler/LocalAI"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Bump dependencies 🔧
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -15,7 +15,7 @@ jobs:
          && sudo add-apt-repository -y ppa:git-core/ppa \
          && sudo apt-get update \
          && sudo apt-get install -y git
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Install dependencies
        run: |
          sudo apt-get update
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -20,7 +20,7 @@ jobs:
          skip-commit-verification: true

      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - uses: actions/setup-go@v5
@@ -33,7 +33,7 @@ jobs:
        run: |
          CGO_ENABLED=0 make build
      - name: rm
-        uses: appleboy/ssh-action@v1.2.3
+        uses: appleboy/ssh-action@v1.2.2
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
            rm: true
            target: ./local-ai
      - name: restarting
-        uses: appleboy/ssh-action@v1.2.3
+        uses: appleboy/ssh-action@v1.2.2
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -1,119 +0,0 @@
-name: Gallery Agent
-on:
-
-  schedule:
-    - cron: '0 */3 * * *'  # Run every 4 hours
-  workflow_dispatch:
-    inputs:
-      search_term:
-        description: 'Search term for models'
-        required: false
-        default: 'GGUF'
-        type: string
-      limit:
-        description: 'Maximum number of models to process'
-        required: false
-        default: '15'
-        type: string
-      quantization:
-        description: 'Preferred quantization format'
-        required: false
-        default: 'Q4_K_M'
-        type: string
-      max_models:
-        description: 'Maximum number of models to add to the gallery'
-        required: false
-        default: '1'
-        type: string
-jobs:
-  gallery-agent:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.21'
-
-
-      - name: Run gallery agent
-        env:
-          OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
-          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
-          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
-          SEARCH_TERM: ${{ github.event.inputs.search_term || 'GGUF' }}
-          LIMIT: ${{ github.event.inputs.limit || '15' }}
-          QUANTIZATION: ${{ github.event.inputs.quantization || 'Q4_K_M' }}
-          MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
-        run: |
-          export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
-          go run .github/gallery-agent
-
-      - name: Check for changes
-        id: check_changes
-        run: |
-          if git diff --quiet gallery/index.yaml; then
-            echo "changes=false" >> $GITHUB_OUTPUT
-            echo "No changes detected in gallery/index.yaml"
-          else
-            echo "changes=true" >> $GITHUB_OUTPUT
-            echo "Changes detected in gallery/index.yaml"
-            git diff gallery/index.yaml
-          fi
-
-      - name: Read gallery agent summary
-        id: read_summary
-        if: steps.check_changes.outputs.changes == 'true'
-        run: |
-          if [ -f ".github/gallery-agent/gallery-agent-summary.json" ]; then
-            echo "summary_exists=true" >> $GITHUB_OUTPUT
-            # Extract summary data using jq
-            echo "search_term=$(jq -r '.search_term' .github/gallery-agent/gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "total_found=$(jq -r '.total_found' .github/gallery-agent/gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "models_added=$(jq -r '.models_added' .github/gallery-agent/gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "quantization=$(jq -r '.quantization' .github/gallery-agent/gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "processing_time=$(jq -r '.processing_time' .github/gallery-agent/gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            
-            # Create a formatted list of added models with URLs
-            added_models=$(jq -r 'range(0; .added_model_ids | length) as $i | "- [\(.added_model_ids[$i])](\(.added_model_urls[$i]))"' .github/gallery-agent/gallery-agent-summary.json | tr '\n' '\n')
-            echo "added_models<<EOF" >> $GITHUB_OUTPUT
-            echo "$added_models" >> $GITHUB_OUTPUT
-            echo "EOF" >> $GITHUB_OUTPUT
-            rm -f .github/gallery-agent/gallery-agent-summary.json
-          else
-            echo "summary_exists=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Create Pull Request
-        if: steps.check_changes.outputs.changes == 'true'
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: 'chore(model gallery): :robot: add new models via gallery agent'
-          title: 'chore(model gallery): :robot: add ${{ steps.read_summary.outputs.models_added || 0 }} new models via gallery agent'
-          # Branch has to be unique so PRs are not overriding each other
-          branch-suffix: timestamp
-          body: |
-            This PR was automatically created by the gallery agent workflow.
-            
-            **Summary:**
-            - **Search Term:** ${{ steps.read_summary.outputs.search_term || github.event.inputs.search_term || 'GGUF' }}
-            - **Models Found:** ${{ steps.read_summary.outputs.total_found || 'N/A' }}
-            - **Models Added:** ${{ steps.read_summary.outputs.models_added || '0' }}
-            - **Quantization:** ${{ steps.read_summary.outputs.quantization || github.event.inputs.quantization || 'Q4_K_M' }}
-            - **Processing Time:** ${{ steps.read_summary.outputs.processing_time || 'N/A' }}
-            
-            **Added Models:**
-            ${{ steps.read_summary.outputs.added_models || '- No models added' }}
-            
-            **Workflow Details:**
-            - Triggered by: `${{ github.event_name }}`
-            - Run ID: `${{ github.run_id }}`
-            - Commit: `${{ github.sha }}`
-          signoff: true
-          delete-branch: true
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -73,7 +73,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Cache GRPC
        uses: docker/build-push-action@v6
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -16,7 +16,7 @@ jobs:
      matrix:
        include:
          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
    steps:
@@ -43,7 +43,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Cache Intel images
        uses: docker/build-push-action@v6
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -60,7 +60,7 @@ jobs:
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-core'
            runs-on: 'ubuntu-latest'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -101,7 +101,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
            aio: "-aio-gpu-nvidia-cuda-12"
          - build-type: 'vulkan'
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan'
            runs-on: 'ubuntu-latest'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -94,7 +94,7 @@ jobs:
          && sudo apt-get update \
          && sudo apt-get install -y git
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -11,10 +11,10 @@ permissions:
 jobs:
  dependabot:
    runs-on: ubuntu-latest
-    if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
+    if: ${{ github.actor == 'localai-bot' }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -1,27 +1,22 @@
 name: Notifications for new models
 on:
-  pull_request_target:
+  pull_request:
     types:
       - closed

-permissions:
-  contents: read
-  pull-requests: read
-
 jobs:
  notify-discord:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
-        MODEL_NAME: gemma-3-12b-it-qat
+        MODEL_NAME: gemma-3-12b-it
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v5
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
-        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
    - uses: mudler/localai-github-action@v1
      with:
-        model: 'gemma-3-12b-it-qat' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
+        model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
        # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.1
      id: git-diff-action
@@ -84,7 +79,7 @@ jobs:
        args: ${{ steps.summarize.outputs.message }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.23
+      uses: mxschmitt/action-tmate@v3.22
      with:
        detached: true
        connect-timeout-seconds: 180
@@ -92,13 +87,12 @@ jobs:
  notify-twitter:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
-        MODEL_NAME: gemma-3-12b-it-qat
+        MODEL_NAME: gemma-3-12b-it
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v5
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
-        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
@@ -167,7 +161,7 @@ jobs:
        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.23
+      uses: mxschmitt/action-tmate@v3.22
      with:
        detached: true
        connect-timeout-seconds: 180
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -11,11 +11,10 @@ jobs:
        RELEASE_BODY: ${{ github.event.release.body }}
        RELEASE_TITLE: ${{ github.event.release.name }}
        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
-        MODEL_NAME: gemma-3-12b-it-qat
    steps:
    - uses: mudler/localai-github-action@v1
      with:
-        model: 'gemma-3-12b-it-qat' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
+        model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
    - name: Summarize
      id: summarize
      run: |
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -10,7 +10,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -28,7 +28,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -46,7 +46,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
      GO111MODULE: on
    steps:
      - name: Checkout Source
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
@@ -24,7 +24,7 @@ jobs:
          args: '-no-fail -fmt sarif -out results.sarif ./...'
      - name: Upload SARIF file
        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: github/codeql-action/upload-sarif@v4
+        uses: github/codeql-action/upload-sarif@v3
        with:
          # Path to SARIF file relative to the root of the repository
          sarif_file: results.sarif
--- a/.github/workflows/stalebot.yml
+++ b/.github/workflows/stalebot.yml
@@ -10,7 +10,7 @@ jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v9
+      - uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v9
        with:
          stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
          stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -19,7 +19,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -40,7 +40,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -61,7 +61,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -83,7 +83,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -104,7 +104,7 @@ jobs:
  #  runs-on: ubuntu-latest
  #  steps:
  #    - name: Clone
-  #      uses: actions/checkout@v6
+  #      uses: actions/checkout@v5
  #      with:
  #        submodules: true
  #    - name: Dependencies
@@ -124,7 +124,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -186,7 +186,7 @@ jobs:
  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
  #           df -h
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -211,7 +211,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -232,7 +232,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        go-version: ['1.25.x']
+        go-version: ['1.21.x']
    steps:
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
@@ -70,7 +70,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -124,7 +124,7 @@ jobs:
          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -166,7 +166,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -183,7 +183,7 @@ jobs:
            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -193,10 +193,10 @@ jobs:
    runs-on: macOS-14
    strategy:
      matrix:
-        go-version: ['1.25.x']
+        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -226,7 +226,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -9,7 +9,7 @@ jobs:
      fail-fast: false
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
+[submodule "docs/themes/lotusdocs"]
+	path = docs/themes/lotusdocs
+	url = https://github.com/colinwilson/lotusdocs
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,7 +30,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
 4. Build LocalAI: `make build`
 5. Run LocalAI: `./local-ai`
-6. To Build and live reload: `make build-dev`

 ## Contributing

@@ -77,7 +76,7 @@ LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
 ## Documentation

 We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
-
+ 
 ## Community and Communication

 - You can reach out via the Github issue tracker.
--- a/32
+++ b/32
@@ -32,15 +32,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        echo "vulkan" > /run/localai/capability
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

@@ -332,6 +344,6 @@ RUN mkdir -p /models /backends
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1

-VOLUME /models /backends /configuration
+VOLUME /models /backends
 EXPOSE 8080
 ENTRYPOINT [ "/entrypoint.sh" ]
--- a/13
+++ b/13
@@ -103,10 +103,6 @@ build-launcher: ## Build the launcher application

 build-all: build build-launcher ## Build both server and launcher

-build-dev: ## Run LocalAI in dev mode with live reload
-	@command -v air >/dev/null 2>&1 || go install github.com/air-verse/air@latest
-	air -c .air.toml
-
 dev-dist:
 	$(GORELEASER) build --snapshot --clean

@@ -380,9 +376,6 @@ backends/llama-cpp-darwin: build
 	bash ./scripts/build/llama-cpp-darwin.sh
 	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

-backends/neutts: docker-build-neutts docker-save-neutts build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/neutts.tar)"
-
 build-darwin-python-backend: build
 	bash ./scripts/build/python-darwin.sh

@@ -439,12 +432,6 @@ docker-save-kitten-tts: backend-images
 docker-save-chatterbox: backend-images
 	docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar

-docker-build-neutts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend
-
-docker-save-neutts: backend-images
-	docker save local-ai-backend:neutts -o backend-images/neutts.tar
-
 docker-build-kokoro:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend

--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@

 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
 [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -108,7 +108,7 @@ Run the installer script:
 curl https://localai.io/install.sh | sh
 ```

-For more installation options, see [Installer Options](https://localai.io/installation/).
+For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).

 ### macOS Download:

@@ -116,17 +116,8 @@ For more installation options, see [Installer Options](https://localai.io/instal
  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
 </a>

-> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
-
 Or run with docker:

-> **💡 Docker Run vs Docker Start**
-> 
-> - `docker run` creates and starts a new container. If a container with the same name already exists, this command will fail.
-> - `docker start` starts an existing container that was previously created with `docker run`.
-> 
-> If you've already run LocalAI before and want to start it again, use: `docker start -i local-ai`
-
 ### CPU only image:

 ```bash
@@ -202,13 +193,10 @@ local-ai run oci://localai/phi-2:latest

 > ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).

-For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)

 ## 📰 Latest project news

- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
- October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
- September 2025: New Launcher application for MacOS and Linux, extended support to many backends for Mac and Nvidia L4T devices. Models: Added MLX-Audio, WAN 2.2. WebUI improvements and Python-based backends now ships portable python environments.
 - August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
 - July/August 2025: 🔍 [Object Detection](https://localai.io/features/object-detection/) added to the API featuring [rf-detr](https://github.com/roboflow/rf-detr)
 - July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
@@ -247,7 +235,7 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
 - 🔍 [Object Detection](https://localai.io/features/object-detection/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🆕🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) - Agentic capabilities with external tools and [LocalAGI's Agentic capabilities](https://github.com/mudler/LocalAGI)
+- [Agentic capabilities](https://github.com/mudler/LocalAGI)
 - 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!

@@ -278,7 +266,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 | **piper** | Fast neural TTS system | CPU |
 | **kitten-tts** | Kitten TTS models | CPU |
 | **silero-vad** | Voice Activity Detection | CPU |
-| **neutts** | Text-to-speech with voice cloning | CUDA 12, ROCm, CPU |

 ### Image & Video Generation
 | Backend | Description | Acceleration Support |
@@ -300,7 +287,7 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 |-------------------|-------------------|------------------|
 | **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware |
 | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
-| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts | AMD Graphics |
+| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark | AMD Graphics |
 | **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, exllama2, coqui, kokoro, bark | Intel Arc, Intel iGPUs |
 | **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, bark-cpp | Apple M1/M2/M3+ |
 | **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs |
@@ -317,12 +304,6 @@ WebUIs:
 - https://github.com/go-skynet/LocalAI-frontend
 - QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot

-Agentic Libraries:
- https://github.com/mudler/cogito
-
-MCPs:
- https://github.com/mudler/MCPs
-
 Model galleries
 - https://github.com/go-skynet/model-gallery

--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -37,14 +37,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -85,14 +85,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils libxcb1 libx11-6 && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && vulkaninfo \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

@@ -197,7 +210,7 @@ EOT


 # Copy libraries using a script to handle architecture differences
-RUN make -BC /LocalAI/backend/cpp/llama-cpp package
+RUN make -C /LocalAI/backend/cpp/llama-cpp package


 FROM scratch
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -28,7 +28,7 @@ RUN apt-get update && \
        curl python3-pip \
        python-is-python3 \
        python3-dev llvm \
-        python3-venv make cmake && \
+        python3-venv make && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/* && \
    pip install --upgrade pip
@@ -45,14 +45,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -154,10 +154,6 @@ message PredictOptions {
  repeated string Videos = 45;
  repeated string Audios = 46;
  string CorrelationId = 47;
-  string Tools = 48;  // JSON array of available tools/functions for tool calling
-  string ToolChoice = 49;  // JSON string or object specifying tool choice behavior
-  int32 Logprobs = 50;  // Number of top logprobs to return (maps to OpenAI logprobs parameter)
-  int32 TopLogprobs = 51;  // Number of top logprobs to return per token (maps to OpenAI top_logprobs parameter)
 }

 // The response message containing the result
@@ -168,7 +164,6 @@ message Reply {
  double timing_prompt_processing = 4;
  double timing_token_generation = 5;
  bytes audio = 6;
-  bytes logprobs = 7;  // JSON-encoded logprobs data matching OpenAI format
 }

 message GrammarTrigger {
@@ -387,11 +382,6 @@ message StatusResponse {
 message Message {
  string role = 1;
  string content = 2;
-  // Optional fields for OpenAI-compatible message format
-  string name = 3;                    // Tool name (for tool messages)
-  string tool_call_id = 4;            // Tool call ID (for tool messages)
-  string reasoning_content = 5;       // Reasoning content (for thinking models)
-  string tool_calls = 6;              // Tool calls as JSON string (for assistant messages with tool calls)
 }

 message DetectOptions {
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -57,7 +57,7 @@ add_library(hw_grpc_proto
  ${hw_proto_srcs}
  ${hw_proto_hdrs} )

-add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
+add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)

 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=583cb83416467e8abf9b37349dcf1f6a0083745a
+LLAMA_VERSION?=4807e8f96a61b2adccebd5e57444c94d18de7264
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -9,13 +9,10 @@ done

 set -e

-for file in $(ls llama.cpp/tools/server/); do
-    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
-done
-
 cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
 cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
+cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/

 set +e
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,8 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=19ceec8eac980403b714d603e5ca31653cd42a3f
-SO_TARGET?=libgowhisper.so
+WHISPER_CPP_VERSION?=44fa2f647cf2a6953493b21ab83b50d5f5dbc483

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

@@ -58,18 +57,15 @@ sources/whisper.cpp:
 	git checkout $(WHISPER_CPP_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch

-# Detect OS
-UNAME_S := $(shell uname -s)
+libgowhisper.so: sources/whisper.cpp CMakeLists.txt gowhisper.cpp gowhisper.h
+	mkdir -p build && \
+	cd build && \
+	cmake .. $(CMAKE_ARGS) && \
+	cmake --build . --config Release -j$(JOBS) && \
+	cd .. && \
+	mv build/libgowhisper.so ./

-# Only build CPU variants on Linux
-ifeq ($(UNAME_S),Linux)
-	VARIANT_TARGETS = libgowhisper-avx.so libgowhisper-avx2.so libgowhisper-avx512.so libgowhisper-fallback.so
-else
-	# On non-Linux (e.g., Darwin), build only fallback variant
-	VARIANT_TARGETS = libgowhisper-fallback.so
-endif
-
-whisper: main.go gowhisper.go $(VARIANT_TARGETS)
+whisper: main.go gowhisper.go libgowhisper.so
 	CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o whisper ./

 package: whisper
@@ -77,46 +73,5 @@ package: whisper

 build: package

-clean: purge
-	rm -rf libgowhisper*.so sources/whisper.cpp whisper
-
-purge:
-	rm -rf build*
-
-# Build all variants (Linux only)
-ifeq ($(UNAME_S),Linux)
-libgowhisper-avx.so: sources/whisper.cpp
-	$(MAKE) purge
-	$(info ${GREEN}I whisper build info:avx${RESET})
-	SO_TARGET=libgowhisper-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) libgowhisper-custom
-	rm -rfv build*
-
-libgowhisper-avx2.so: sources/whisper.cpp
-	$(MAKE) purge
-	$(info ${GREEN}I whisper build info:avx2${RESET})
-	SO_TARGET=libgowhisper-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) libgowhisper-custom
-	rm -rfv build*
-
-libgowhisper-avx512.so: sources/whisper.cpp
-	$(MAKE) purge
-	$(info ${GREEN}I whisper build info:avx512${RESET})
-	SO_TARGET=libgowhisper-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) libgowhisper-custom
-	rm -rfv build*
-endif
-
-# Build fallback variant (all platforms)
-libgowhisper-fallback.so: sources/whisper.cpp
-	$(MAKE) purge
-	$(info ${GREEN}I whisper build info:fallback${RESET})
-	SO_TARGET=libgowhisper-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) libgowhisper-custom
-	rm -rfv build*
-
-libgowhisper-custom: CMakeLists.txt gowhisper.cpp gowhisper.h
-	mkdir -p build-$(SO_TARGET) && \
-	cd build-$(SO_TARGET) && \
-	cmake .. $(CMAKE_ARGS) && \
-	cmake --build . --config Release -j$(JOBS) && \
-	cd .. && \
-	mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET)
-
-all: whisper package
+clean:
+	rm -rf libgowhisper.o build whisper
--- a/backend/go/whisper/main.go
+++ b/backend/go/whisper/main.go
@@ -3,7 +3,6 @@ package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
-	"os"

 	"github.com/ebitengine/purego"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -19,13 +18,7 @@ type LibFuncs struct {
 }

 func main() {
-	// Get library name from environment variable, default to fallback
-	libName := os.Getenv("WHISPER_LIBRARY")
-	if libName == "" {
-		libName = "./libgowhisper-fallback.so"
-	}
-
-	gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+	gosd, err := purego.Dlopen("./libgowhisper.so", purego.RTLD_NOW|purego.RTLD_GLOBAL)
 	if err != nil {
 		panic(err)
 	}
--- a/backend/go/whisper/package.sh
+++ b/backend/go/whisper/package.sh
@@ -10,8 +10,7 @@ CURDIR=$(dirname "$(realpath $0)")
 # Create lib directory
 mkdir -p $CURDIR/package/lib

-cp -avf $CURDIR/whisper $CURDIR/package/
-cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/
+cp -avf $CURDIR/whisper $CURDIR/libgowhisper.so $CURDIR/package/
 cp -fv $CURDIR/run.sh $CURDIR/package/

 # Detect architecture and copy appropriate libraries
--- a/backend/go/whisper/run.sh
+++ b/backend/go/whisper/run.sh
@@ -1,52 +1,14 @@
 #!/bin/bash
 set -ex

-# Get the absolute current dir where the script is located
 CURDIR=$(dirname "$(realpath $0)")

-cd /
-
-echo "CPU info:"
-if [ "$(uname)" != "Darwin" ]; then
-	grep -e "model\sname" /proc/cpuinfo | head -1
-	grep -e "flags" /proc/cpuinfo | head -1
-fi
-
-LIBRARY="$CURDIR/libgowhisper-fallback.so"
-
-if [ "$(uname)" != "Darwin" ]; then
-	if grep -q -e "\savx\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX    found OK"
-		if [ -e $CURDIR/libgowhisper-avx.so ]; then
-			LIBRARY="$CURDIR/libgowhisper-avx.so"
-		fi
-	fi
-
-	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX2   found OK"
-		if [ -e $CURDIR/libgowhisper-avx2.so ]; then
-			LIBRARY="$CURDIR/libgowhisper-avx2.so"
-		fi
-	fi
-
-	# Check avx 512
-	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX512F found OK"
-		if [ -e $CURDIR/libgowhisper-avx512.so ]; then
-			LIBRARY="$CURDIR/libgowhisper-avx512.so"
-		fi
-	fi
-fi
-
 export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-export WHISPER_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
 if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
-	echo "Using library: $LIBRARY"
 	exec $CURDIR/lib/ld.so $CURDIR/whisper "$@"
 fi

-echo "Using library: $LIBRARY"
 exec $CURDIR/whisper "$@"
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -427,68 +427,6 @@
    - text-to-speech
    - TTS
  license: apache-2.0
- &neutts
-  name: "neutts"
-  urls:
-    - https://github.com/neuphonic/neutts-air
-  description: |
-    NeuTTS Air is the world’s first super-realistic, on-device, TTS speech language model with instant voice cloning. Built off a 0.5B LLM backbone, NeuTTS Air brings natural-sounding speech, real-time performance, built-in security and speaker cloning to your local device - unlocking a new category of embedded voice agents, assistants, toys, and compliance-safe apps.
-  tags:
-    - text-to-speech
-    - TTS
-  license: apache-2.0
-  capabilities:
-    default: "cpu-neutts"
-    nvidia: "cuda12-neutts"
-    amd: "rocm-neutts"
-    nvidia-l4t: "nvidia-l4t-neutts"
- !!merge <<: *neutts
-  name: "neutts-development"
-  capabilities:
-    default: "cpu-neutts-development"
-    nvidia: "cuda12-neutts-development"
-    amd: "rocm-neutts-development"
-    nvidia-l4t: "nvidia-l4t-neutts-development"
- !!merge <<: *neutts
-  name: "cpu-neutts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-neutts"
-  mirrors:
-    - localai/localai-backends:latest-cpu-neutts
- !!merge <<: *neutts
-  name: "cuda12-neutts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-neutts"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-12-neutts
- !!merge <<: *neutts
-  name: "rocm-neutts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-neutts"
-  mirrors:
-    - localai/localai-backends:latest-gpu-rocm-hipblas-neutts
- !!merge <<: *neutts
-  name: "nvidia-l4t-neutts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-neutts"
-  mirrors:
-    - localai/localai-backends:latest-nvidia-l4t-arm64-neutts
- !!merge <<: *neutts
-  name: "cpu-neutts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-neutts"
-  mirrors:
-    - localai/localai-backends:master-cpu-neutts
- !!merge <<: *neutts
-  name: "cuda12-neutts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-neutts"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-12-neutts
- !!merge <<: *neutts
-  name: "rocm-neutts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-neutts"
-  mirrors:
-    - localai/localai-backends:master-gpu-rocm-hipblas-neutts
- !!merge <<: *neutts
-  name: "nvidia-l4t-neutts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-neutts"
-  mirrors:
-    - localai/localai-backends:master-nvidia-l4t-arm64-neutts
 - !!merge <<: *mlx
  name: "mlx-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 certifi
--- a/backend/python/chatterbox/backend.py
+++ b/backend/python/chatterbox/backend.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-This is an extra gRPC server of LocalAI for Chatterbox TTS
+This is an extra gRPC server of LocalAI for Bark TTS
 """
 from concurrent import futures
 import time
@@ -16,7 +16,6 @@ import torchaudio as ta
 from chatterbox.tts import ChatterboxTTS
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import grpc
-import tempfile

 def is_float(s):
    """Check if a string can be converted to float."""
@@ -33,79 +32,11 @@ def is_int(s):
    except ValueError:
        return False

-def split_text_at_word_boundary(text, max_length=250):
-    """
-    Split text at word boundaries without truncating words.
-    Returns a list of text chunks.
-    """
-    if not text or len(text) <= max_length:
-        return [text]
-    
-    chunks = []
-    words = text.split()
-    current_chunk = ""
-    
-    for word in words:
-        # Check if adding this word would exceed the limit
-        if len(current_chunk) + len(word) + 1 <= max_length:
-            if current_chunk:
-                current_chunk += " " + word
-            else:
-                current_chunk = word
-        else:
-            # If current chunk is not empty, add it to chunks
-            if current_chunk:
-                chunks.append(current_chunk)
-                current_chunk = word
-            else:
-                # If a single word is longer than max_length, we have to include it anyway
-                chunks.append(word)
-                current_chunk = ""
-    
-    # Add the last chunk if it's not empty
-    if current_chunk:
-        chunks.append(current_chunk)
-    
-    return chunks
-
-def merge_audio_files(audio_files, output_path, sample_rate):
-    """
-    Merge multiple audio files into a single audio file.
-    """
-    if not audio_files:
-        return
-    
-    if len(audio_files) == 1:
-        # If only one file, just copy it
-        import shutil
-        shutil.copy2(audio_files[0], output_path)
-        return
-    
-    # Load all audio files
-    waveforms = []
-    for audio_file in audio_files:
-        waveform, sr = ta.load(audio_file)
-        if sr != sample_rate:
-            # Resample if necessary
-            resampler = ta.transforms.Resample(sr, sample_rate)
-            waveform = resampler(waveform)
-        waveforms.append(waveform)
-    
-    # Concatenate all waveforms
-    merged_waveform = torch.cat(waveforms, dim=1)
-    
-    # Save the merged audio
-    ta.save(output_path, merged_waveform, sample_rate)
-    
-    # Clean up temporary files
-    for audio_file in audio_files:
-        if os.path.exists(audio_file):
-            os.remove(audio_file)
-
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24

 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)

 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
@@ -187,33 +118,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            # add options to kwargs
            kwargs.update(self.options)

-            # Check if text exceeds 250 characters
-            # (chatterbox does not support long text)
-            # https://github.com/resemble-ai/chatterbox/issues/60
-            # https://github.com/resemble-ai/chatterbox/issues/110
-            if len(request.text) > 250:
-                # Split text at word boundaries
-                text_chunks = split_text_at_word_boundary(request.text, max_length=250)
-                print(f"Splitting text into chunks of 250 characters: {len(text_chunks)}", file=sys.stderr)
-                # Generate audio for each chunk
-                temp_audio_files = []
-                for i, chunk in enumerate(text_chunks):
-                    # Generate audio for this chunk
-                    wav = self.model.generate(chunk, **kwargs)
-                    
-                    # Create temporary file for this chunk
-                    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
-                    temp_file.close()
-                    ta.save(temp_file.name, wav, self.model.sr)
-                    temp_audio_files.append(temp_file.name)
-                
-                # Merge all audio files
-                merge_audio_files(temp_audio_files, request.dst, self.model.sr)
-            else:
-                # Generate audio using ChatterboxTTS for short text
-                wav = self.model.generate(request.text, **kwargs)
-                # Save the generated audio
-                ta.save(request.dst, wav, self.model.sr)
+            # Generate audio using ChatterboxTTS
+            wav = self.model.generate(request.text, **kwargs)
+            # Save the generated audio
+            ta.save(request.dst, wav, self.model.sr)
            
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
--- a/backend/python/chatterbox/requirements-cpu.txt
+++ b/backend/python/chatterbox/requirements-cpu.txt
@@ -2,7 +2,6 @@
 accelerate
 torch
 torchaudio
-numpy>=1.24.0,<1.26.0
 transformers
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
--- a/backend/python/chatterbox/requirements-cublas11.txt
+++ b/backend/python/chatterbox/requirements-cublas11.txt
@@ -2,7 +2,6 @@
 torch==2.6.0+cu118
 torchaudio==2.6.0+cu118
 transformers==4.46.3
-numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-cublas12.txt
+++ b/backend/python/chatterbox/requirements-cublas12.txt
@@ -1,7 +1,6 @@
 torch
 torchaudio
 transformers
-numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -2,7 +2,6 @@
 torch==2.6.0+rocm6.1
 torchaudio==2.6.0+rocm6.1
 transformers
-numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-intel.txt
+++ b/backend/python/chatterbox/requirements-intel.txt
@@ -3,7 +3,6 @@ intel-extension-for-pytorch==2.3.110+xpu
 torch==2.3.1+cxx11.abi
 torchaudio==2.3.1+cxx11.abi
 transformers
-numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-l4t.txt
+++ b/backend/python/chatterbox/requirements-l4t.txt
@@ -2,6 +2,5 @@
 torch
 torchaudio
 transformers
-numpy>=1.24.0,<1.26.0
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements.txt
+++ b/backend/python/chatterbox/requirements.txt
@@ -2,5 +2,4 @@ grpcio==1.71.0
 protobuf
 certifi
 packaging
-setuptools
-poetry
+setuptools
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 grpcio-tools
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 certifi
 packaging==24.1
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.76.0
+grpcio==1.74.0
 pillow
 protobuf
 certifi
--- a/backend/python/diffusers/test.py
+++ b/backend/python/diffusers/test.py
@@ -31,7 +31,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method tests if the server starts up successfully
        """
-        time.sleep(20)
+        time.sleep(10)
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
@@ -48,7 +48,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method tests if the model is loaded successfully
        """
-        time.sleep(20)
+        time.sleep(10)
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
@@ -66,7 +66,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method tests if the backend can generate images
        """
-        time.sleep(20)
+        time.sleep(10)
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 certifi
 wheel
--- a/backend/python/kokoro/backend.py
+++ b/backend/python/kokoro/backend.py
@@ -64,15 +64,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            # Generate audio using Kokoro pipeline
            generator = self.pipeline(request.text, voice=voice)
            
-            speechs = []
-            # Get all the audio segment
+            # Get the first (and typically only) audio segment
            for i, (gs, ps, audio) in enumerate(generator):
-                speechs.append(audio)
+                # Save audio to the destination file
+                sf.write(request.dst, audio, 24000)
                print(f"Generated audio segment {i}: gs={gs}, ps={ps}", file=sys.stderr)
-            # Merges the audio segments and writes them to the destination
-            speech = torch.cat(speechs, dim=0)
-            sf.write(request.dst, speech, 24000)
-
+                # For now, we only process the first segment
+                # If you need to handle multiple segments, you might want to modify this
+                break
+                
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        
--- a/backend/python/neutts/Makefile
+++ b/backend/python/neutts/Makefile
@@ -1,23 +0,0 @@
-.PHONY: neutts
-neutts:
-	bash install.sh
-
-.PHONY: run
-run: neutts
-	@echo "Running neutts..."
-	bash run.sh
-	@echo "neutts run."
-
-.PHONY: test
-test: neutts
-	@echo "Testing neutts..."
-	bash test.sh
-	@echo "neutts tested."
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
--- a/backend/python/neutts/backend.py
+++ b/backend/python/neutts/backend.py
@@ -1,162 +0,0 @@
-#!/usr/bin/env python3
-"""
-This is an extra gRPC server of LocalAI for NeuTTSAir
-"""
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-import backend_pb2
-import backend_pb2_grpc
-import torch
-from neuttsair.neutts import NeuTTSAir
-import soundfile as sf
-
-import grpc
-
-def is_float(s):
-    """Check if a string can be converted to float."""
-    try:
-        float(s)
-        return True
-    except ValueError:
-        return False
-def is_int(s):
-    """Check if a string can be converted to int."""
-    try:
-        int(s)
-        return True
-    except ValueError:
-        return False
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    BackendServicer is the class that implements the gRPC service
-    """
-    def Health(self, request, context):
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-    def LoadModel(self, request, context):
-
-        # Get device
-        # device = "cuda" if request.CUDA else "cpu"
-        if torch.cuda.is_available():
-            print("CUDA is available", file=sys.stderr)
-            device = "cuda"
-        else:
-            print("CUDA is not available", file=sys.stderr)
-            device = "cpu"
-        mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
-        if mps_available:
-            device = "mps"
-        if not torch.cuda.is_available() and request.CUDA:
-            return backend_pb2.Result(success=False, message="CUDA is not available")
-
-
-        options = request.Options
-
-        # empty dict
-        self.options = {}
-        self.ref_text = None
-
-        # The options are a list of strings in this form optname:optvalue
-        # We are storing all the options in a dict so we can use it later when
-        # generating the images
-        for opt in options:
-            if ":" not in opt:
-                continue
-            key, value = opt.split(":")
-            # if value is a number, convert it to the appropriate type
-            if is_float(value):
-                value = float(value)
-            elif is_int(value):
-                value = int(value)
-            elif value.lower() in ["true", "false"]:
-                value = value.lower() == "true"
-            self.options[key] = value
-
-        codec_repo = "neuphonic/neucodec"
-        if "codec_repo" in self.options:
-            codec_repo = self.options["codec_repo"]
-            del self.options["codec_repo"]
-        if "ref_text" in self.options:
-            self.ref_text = self.options["ref_text"]
-            del self.options["ref_text"]
-
-        self.AudioPath = None
-
-        if os.path.isabs(request.AudioPath):
-            self.AudioPath = request.AudioPath
-        elif request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
-            # get base path of modelFile
-            modelFileBase = os.path.dirname(request.ModelFile)
-            # modify LoraAdapter to be relative to modelFileBase
-            self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
-        try:
-            print("Preparing models, please wait", file=sys.stderr)
-            self.model = NeuTTSAir(backbone_repo=request.Model, backbone_device=device, codec_repo=codec_repo, codec_device=device)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        # Implement your logic here for the LoadModel service
-        # Replace this with your desired response
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def TTS(self, request, context):
-        try:
-            kwargs = {}
-
-            # add options to kwargs
-            kwargs.update(self.options)
-
-            ref_codes = self.model.encode_reference(self.AudioPath)
-
-            wav = self.model.infer(request.text, ref_codes, self.ref_text)
-
-            sf.write(request.dst, wav, 24000)            
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
-        options=[
-            ('grpc.max_message_length', 50 * 1024 * 1024),  # 50MB
-            ('grpc.max_send_message_length', 50 * 1024 * 1024),  # 50MB
-            ('grpc.max_receive_message_length', 50 * 1024 * 1024),  # 50MB
-        ])
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
--- a/backend/python/neutts/install.sh
+++ b/backend/python/neutts/install.sh
@@ -1,33 +0,0 @@
-#!/bin/bash
-set -e
-
-backend_dir=$(dirname $0)
-if [ -d $backend_dir/common ]; then
-    source $backend_dir/common/libbackend.sh
-else
-    source $backend_dir/../common/libbackend.sh
-fi
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-if [ "x${BUILD_TYPE}" == "xcublas" ] || [ "x${BUILD_TYPE}" == "xl4t" ]; then
-    export CMAKE_ARGS="-DGGML_CUDA=on"
-fi
-
-if [ "x${BUILD_TYPE}" == "xhipblas" ]; then
-    export CMAKE_ARGS="-DGGML_HIPBLAS=on"
-fi
-
-EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
-
-git clone https://github.com/neuphonic/neutts-air neutts-air
-
-cp -rfv neutts-air/neuttsair ./
-
-installRequirements
--- a/backend/python/neutts/requirements-after.txt
+++ b/backend/python/neutts/requirements-after.txt
@@ -1,2 +0,0 @@
-datasets==4.1.1
-torchtune==0.6.1
--- a/backend/python/neutts/requirements-cpu.txt
+++ b/backend/python/neutts/requirements-cpu.txt
@@ -1,10 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cpu
-accelerate
-torch==2.8.0
-transformers==4.56.1
-librosa==0.11.0
-neucodec>=0.0.4
-phonemizer==3.3.0
-soundfile==0.13.1
-resemble-perth==1.0.1
-llama-cpp-python
--- a/backend/python/neutts/requirements-cublas12.txt
+++ b/backend/python/neutts/requirements-cublas12.txt
@@ -1,8 +0,0 @@
-librosa==0.11.0
-neucodec>=0.0.4
-phonemizer==3.3.0
-soundfile==0.13.1
-torch==2.8.0
-transformers==4.56.1
-resemble-perth==1.0.1
-accelerate
--- a/backend/python/neutts/requirements-hipblas.txt
+++ b/backend/python/neutts/requirements-hipblas.txt
@@ -1,10 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.8.0+rocm6.3
-transformers==4.56.1
-accelerate
-librosa==0.11.0
-neucodec>=0.0.4
-phonemizer==3.3.0
-soundfile==0.13.1
-resemble-perth==1.0.1
-llama-cpp-python
--- a/backend/python/neutts/requirements-l4t.txt
+++ b/backend/python/neutts/requirements-l4t.txt
@@ -1,10 +0,0 @@
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
-torch
-transformers
-accelerate
-librosa==0.11.0
-neucodec>=0.0.4
-phonemizer==3.3.0
-soundfile==0.13.1
-resemble-perth==1.0.1
-llama-cpp-python
--- a/backend/python/neutts/requirements.txt
+++ b/backend/python/neutts/requirements.txt
@@ -1,7 +0,0 @@
-grpcio==1.71.0
-protobuf
-certifi
-packaging
-setuptools
-numpy==2.2.6
-scikit_build_core
--- a/backend/python/neutts/run.sh
+++ b/backend/python/neutts/run.sh
@@ -1,10 +0,0 @@
-#!/bin/bash
-backend_dir=$(dirname $0)
-if [ -d $backend_dir/common ]; then
-    source $backend_dir/common/libbackend.sh
-else
-    source $backend_dir/../common/libbackend.sh
-fi
-
-
-startBackend $@
--- a/backend/python/neutts/test.py
+++ b/backend/python/neutts/test.py
@@ -1,82 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(30)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions())
-                print(response)
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions())
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
--- a/backend/python/neutts/test.sh
+++ b/backend/python/neutts/test.sh
@@ -1,11 +0,0 @@
-#!/bin/bash
-set -e
-
-backend_dir=$(dirname $0)
-if [ -d $backend_dir/common ]; then
-    source $backend_dir/common/libbackend.sh
-else
-    source $backend_dir/../common/libbackend.sh
-fi
-
-runUnittests
--- a/backend/python/rerankers/backend.py
+++ b/backend/python/rerankers/backend.py
@@ -61,7 +61,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.PipelineType != "": # Reuse the PipelineType field for language
                kwargs['lang'] = request.PipelineType
            self.model_name = model_name
-            self.model = Reranker(model_name, **kwargs)
+            self.model = Reranker(model_name, **kwargs)  
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

@@ -75,13 +75,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            documents.append(doc)
        ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
        # Prepare results to return
-        cropped_results = ranked_results.top_k(request.top_n) if request.top_n > 0 else ranked_results
        results = [
            backend_pb2.DocumentResult(
                index=res.doc_id,
                text=res.text,
                relevance_score=res.score
-            ) for res in (cropped_results)
+            ) for res in ranked_results.results
        ]

        # Calculate the usage and total tokens
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 certifi
--- a/backend/python/rerankers/test.py
+++ b/backend/python/rerankers/test.py
@@ -76,7 +76,7 @@ class TestBackendServicer(unittest.TestCase):
                )
                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
                self.assertTrue(response.success)
-
+               
                rerank_response = stub.Rerank(request)
                print(rerank_response.results[0])
                self.assertIsNotNone(rerank_response.results)
@@ -87,60 +87,4 @@ class TestBackendServicer(unittest.TestCase):
            print(err)
            self.fail("Reranker service failed")
        finally:
-            self.tearDown()
-
-    def test_rerank_omit_top_n(self):
-        """
-        This method tests if the embeddings are generated successfully even top_n is omitted
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                request = backend_pb2.RerankRequest(
-                    query="I love you",
-                    documents=["I hate you", "I really like you"],
-                    top_n=0 # 
-                )
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
-                self.assertTrue(response.success)
-
-                rerank_response = stub.Rerank(request)
-                print(rerank_response.results[0])
-                self.assertIsNotNone(rerank_response.results)
-                self.assertEqual(len(rerank_response.results), 2)
-                self.assertEqual(rerank_response.results[0].text, "I really like you")
-                self.assertEqual(rerank_response.results[1].text, "I hate you")
-        except Exception as err:
-            print(err)
-            self.fail("Reranker service failed")
-        finally:
-            self.tearDown()
-
-    def test_rerank_crop(self):
-        """
-        This method tests top_n cropping
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                request = backend_pb2.RerankRequest(
-                    query="I love you",
-                    documents=["I hate you", "I really like you", "I hate ignoring top_n"],
-                    top_n=2
-                )
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
-                self.assertTrue(response.success)
-
-                rerank_response = stub.Rerank(request)
-                print(rerank_response.results[0])
-                self.assertIsNotNone(rerank_response.results)
-                self.assertEqual(len(rerank_response.results), 2)
-                self.assertEqual(rerank_response.results[0].text, "I really like you")
-                self.assertEqual(rerank_response.results[1].text, "I hate you")
-        except Exception as err:
-            print(err)
-            self.fail("Reranker service failed")
-        finally:
-            self.tearDown()
+            self.tearDown()
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -7,4 +7,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -8,4 +8,4 @@ bitsandbytes
 outetts
 bitsandbytes
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -10,4 +10,4 @@ intel-extension-for-transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,5 +1,5 @@
-grpcio==1.76.0
-protobuf==6.33.1
+grpcio==1.75.0
+protobuf==6.32.0
 certifi
 setuptools
 scipy==1.15.1
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -3,13 +3,6 @@ set -e

 EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

-# Avoid to overcommit the CPU during build
-# https://github.com/vllm-project/vllm/issues/20079
-# https://docs.vllm.ai/en/v0.8.3/serving/env_vars.html
-# https://docs.redhat.com/it/documentation/red_hat_ai_inference_server/3.0/html/vllm_server_arguments/environment_variables-server-arguments
-export NVCC_THREADS=2
-export MAX_JOBS=1
-
 backend_dir=$(dirname $0)

 if [ -d $backend_dir/common ]; then
--- a/backend/python/vllm/requirements-cublas12-after.txt
+++ b/backend/python/vllm/requirements-cublas12-after.txt
@@ -1 +1 @@
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
+flash-attn
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.76.0
+grpcio==1.74.0
 protobuf
 certifi
 setuptools
--- a/cmd/launcher/main.go
+++ b/cmd/launcher/main.go
@@ -2,12 +2,14 @@ package main

 import (
 	"log"
+	"os"
+	"os/signal"
+	"syscall"

 	"fyne.io/fyne/v2"
 	"fyne.io/fyne/v2/app"
 	"fyne.io/fyne/v2/driver/desktop"
 	coreLauncher "github.com/mudler/LocalAI/cmd/launcher/internal"
-	"github.com/mudler/LocalAI/pkg/signals"
 )

 func main() {
@@ -40,12 +42,7 @@ func main() {
 	}

 	// Setup signal handling for graceful shutdown
-	signals.RegisterGracefulTerminationHandler(func() {
-		// Perform cleanup
-		if err := launcher.Shutdown(); err != nil {
-			log.Printf("Error during shutdown: %v", err)
-		}
-	})
+	setupSignalHandling(launcher)

 	// Initialize the launcher state
 	go func() {
@@ -70,3 +67,26 @@ func main() {
 	// Run the application in background (window only shown when "Settings" is clicked)
 	myApp.Run()
 }
+
+// setupSignalHandling sets up signal handlers for graceful shutdown
+func setupSignalHandling(launcher *coreLauncher.Launcher) {
+	// Create a channel to receive OS signals
+	sigChan := make(chan os.Signal, 1)
+
+	// Register for interrupt and terminate signals
+	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+
+	// Handle signals in a separate goroutine
+	go func() {
+		sig := <-sigChan
+		log.Printf("Received signal %v, shutting down gracefully...", sig)
+
+		// Perform cleanup
+		if err := launcher.Shutdown(); err != nil {
+			log.Printf("Error during shutdown: %v", err)
+		}
+
+		// Exit the application
+		os.Exit(0)
+	}()
+}
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -1,9 +1,6 @@
 package application

 import (
-	"context"
-	"sync"
-
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/core/templates"
@@ -14,14 +11,8 @@ type Application struct {
 	backendLoader      *config.ModelConfigLoader
 	modelLoader        *model.ModelLoader
 	applicationConfig  *config.ApplicationConfig
-	startupConfig      *config.ApplicationConfig // Stores original config from env vars (before file loading)
 	templatesEvaluator *templates.Evaluator
 	galleryService     *services.GalleryService
-	watchdogMutex      sync.Mutex
-	watchdogStop       chan bool
-	p2pMutex           sync.Mutex
-	p2pCtx             context.Context
-	p2pCancel          context.CancelFunc
 }

 func newApplication(appConfig *config.ApplicationConfig) *Application {
@@ -53,11 +44,6 @@ func (a *Application) GalleryService() *services.GalleryService {
 	return a.galleryService
 }

-// StartupConfig returns the original startup configuration (from env vars, before file loading)
-func (a *Application) StartupConfig() *config.ApplicationConfig {
-	return a.startupConfig
-}
-
 func (a *Application) start() error {
 	galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader())
 	err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState)
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -1,343 +1,180 @@
-package application
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path"
-	"path/filepath"
-	"time"
-
-	"dario.cat/mergo"
-	"github.com/fsnotify/fsnotify"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/rs/zerolog/log"
-)
-
-type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
-
-type configFileHandler struct {
-	handlers map[string]fileHandler
-
-	watcher *fsnotify.Watcher
-
-	appConfig *config.ApplicationConfig
-}
-
-// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
-// then we can export it to other packages
-func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
-	c := configFileHandler{
-		handlers:  make(map[string]fileHandler),
-		appConfig: appConfig,
-	}
-	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
-	}
-	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
-	}
-	err = c.Register("runtime_settings.json", readRuntimeSettingsJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "runtime_settings.json").Msg("unable to register config file handler")
-	}
-	return c
-}
-
-func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
-	_, ok := c.handlers[filename]
-	if ok {
-		return fmt.Errorf("handler already registered for file %s", filename)
-	}
-	c.handlers[filename] = handler
-	if runNow {
-		c.callHandler(filename, handler)
-	}
-	return nil
-}
-
-func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
-	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
-	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
-	fileContent, err := os.ReadFile(rootedFilePath)
-	if err != nil && !os.IsNotExist(err) {
-		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
-	}
-
-	if err = handler(fileContent, c.appConfig); err != nil {
-		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
-	}
-}
-
-func (c *configFileHandler) Watch() error {
-	configWatcher, err := fsnotify.NewWatcher()
-	c.watcher = configWatcher
-	if err != nil {
-		return err
-	}
-
-	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
-		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
-		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
-		go func() {
-			for {
-				<-ticker.C
-				for file, handler := range c.handlers {
-					log.Debug().Str("file", file).Msg("polling config file")
-					c.callHandler(file, handler)
-				}
-			}
-		}()
-	}
-
-	// Start listening for events.
-	go func() {
-		for {
-			select {
-			case event, ok := <-c.watcher.Events:
-				if !ok {
-					return
-				}
-				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
-					handler, ok := c.handlers[path.Base(event.Name)]
-					if !ok {
-						continue
-					}
-
-					c.callHandler(filepath.Base(event.Name), handler)
-				}
-			case err, ok := <-c.watcher.Errors:
-				log.Error().Err(err).Msg("config watcher error received")
-				if !ok {
-					return
-				}
-			}
-		}
-	}()
-
-	// Add a path.
-	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
-	if err != nil {
-		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
-	}
-
-	return nil
-}
-
-// TODO: When we institute graceful shutdown, this should be called
-func (c *configFileHandler) Stop() error {
-	return c.watcher.Close()
-}
-
-func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing api keys runtime update")
-		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileKeys []string
-			err := json.Unmarshal(fileContent, &fileKeys)
-			if err != nil {
-				return err
-			}
-
-			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
-
-			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
-		} else {
-			log.Trace().Msg("no API keys discovered from dynamic config file")
-			appConfig.ApiKeys = startupAppConfig.ApiKeys
-		}
-		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
-		return nil
-	}
-
-	return handler
-}
-
-func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing external_backends.json")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileBackends map[string]string
-			err := json.Unmarshal(fileContent, &fileBackends)
-			if err != nil {
-				return err
-			}
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
-			if err != nil {
-				return err
-			}
-		} else {
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-		}
-		log.Debug().Msg("external backends loaded from external_backends.json")
-		return nil
-	}
-	return handler
-}
-
-type runtimeSettings struct {
-	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
-	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
-	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
-	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
-	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
-	SingleBackend            *bool             `json:"single_backend,omitempty"`
-	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
-	Threads                  *int              `json:"threads,omitempty"`
-	ContextSize              *int              `json:"context_size,omitempty"`
-	F16                      *bool             `json:"f16,omitempty"`
-	Debug                    *bool             `json:"debug,omitempty"`
-	CORS                     *bool             `json:"cors,omitempty"`
-	CSRF                     *bool             `json:"csrf,omitempty"`
-	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
-	P2PToken                 *string           `json:"p2p_token,omitempty"`
-	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
-	Federated                *bool             `json:"federated,omitempty"`
-	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
-	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
-	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
-	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
-	ApiKeys                  *[]string         `json:"api_keys,omitempty"`
-}
-
-func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing runtime_settings.json")
-
-		// Determine if settings came from env vars by comparing with startup config
-		// startupAppConfig contains the original values set from env vars at startup.
-		// If current values match startup values, they came from env vars (or defaults).
-		// We apply file settings only if current values match startup values (meaning not from env vars).
-		envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle
-		envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
-		envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
-		envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
-		envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
-		envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
-		envThreads := appConfig.Threads == startupAppConfig.Threads
-		envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
-		envF16 := appConfig.F16 == startupAppConfig.F16
-		envDebug := appConfig.Debug == startupAppConfig.Debug
-		envCORS := appConfig.CORS == startupAppConfig.CORS
-		envCSRF := appConfig.CSRF == startupAppConfig.CSRF
-		envCORSAllowOrigins := appConfig.CORSAllowOrigins == startupAppConfig.CORSAllowOrigins
-		envP2PToken := appConfig.P2PToken == startupAppConfig.P2PToken
-		envP2PNetworkID := appConfig.P2PNetworkID == startupAppConfig.P2PNetworkID
-		envFederated := appConfig.Federated == startupAppConfig.Federated
-		envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
-		envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
-
-		if len(fileContent) > 0 {
-			var settings runtimeSettings
-			err := json.Unmarshal(fileContent, &settings)
-			if err != nil {
-				return err
-			}
-
-			// Apply file settings only if they don't match startup values (i.e., not from env vars)
-			if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle {
-				appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
-				if appConfig.WatchDogIdle {
-					appConfig.WatchDog = true
-				}
-			}
-			if settings.WatchdogBusyEnabled != nil && !envWatchdogBusy {
-				appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
-				if appConfig.WatchDogBusy {
-					appConfig.WatchDog = true
-				}
-			}
-			if settings.WatchdogIdleTimeout != nil && !envWatchdogIdleTimeout {
-				dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-				if err == nil {
-					appConfig.WatchDogIdleTimeout = dur
-				} else {
-					log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
-				}
-			}
-			if settings.WatchdogBusyTimeout != nil && !envWatchdogBusyTimeout {
-				dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-				if err == nil {
-					appConfig.WatchDogBusyTimeout = dur
-				} else {
-					log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
-				}
-			}
-			if settings.SingleBackend != nil && !envSingleBackend {
-				appConfig.SingleBackend = *settings.SingleBackend
-			}
-			if settings.ParallelBackendRequests != nil && !envParallelRequests {
-				appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
-			}
-			if settings.Threads != nil && !envThreads {
-				appConfig.Threads = *settings.Threads
-			}
-			if settings.ContextSize != nil && !envContextSize {
-				appConfig.ContextSize = *settings.ContextSize
-			}
-			if settings.F16 != nil && !envF16 {
-				appConfig.F16 = *settings.F16
-			}
-			if settings.Debug != nil && !envDebug {
-				appConfig.Debug = *settings.Debug
-			}
-			if settings.CORS != nil && !envCORS {
-				appConfig.CORS = *settings.CORS
-			}
-			if settings.CSRF != nil && !envCSRF {
-				appConfig.CSRF = *settings.CSRF
-			}
-			if settings.CORSAllowOrigins != nil && !envCORSAllowOrigins {
-				appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
-			}
-			if settings.P2PToken != nil && !envP2PToken {
-				appConfig.P2PToken = *settings.P2PToken
-			}
-			if settings.P2PNetworkID != nil && !envP2PNetworkID {
-				appConfig.P2PNetworkID = *settings.P2PNetworkID
-			}
-			if settings.Federated != nil && !envFederated {
-				appConfig.Federated = *settings.Federated
-			}
-			if settings.Galleries != nil {
-				appConfig.Galleries = *settings.Galleries
-			}
-			if settings.BackendGalleries != nil {
-				appConfig.BackendGalleries = *settings.BackendGalleries
-			}
-			if settings.AutoloadGalleries != nil && !envAutoloadGalleries {
-				appConfig.AutoloadGalleries = *settings.AutoloadGalleries
-			}
-			if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
-				appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
-			}
-			if settings.ApiKeys != nil {
-				// API keys from env vars (startup) should be kept, runtime settings keys replace all runtime keys
-				// If runtime_settings.json specifies ApiKeys (even if empty), it replaces all runtime keys
-				// Start with env keys, then add runtime_settings.json keys (which may be empty to clear them)
-				envKeys := startupAppConfig.ApiKeys
-				runtimeKeys := *settings.ApiKeys
-				// Replace all runtime keys with what's in runtime_settings.json
-				appConfig.ApiKeys = append(envKeys, runtimeKeys...)
-			}
-
-			// If watchdog is enabled via file but not via env, ensure WatchDog flag is set
-			if !envWatchdogIdle && !envWatchdogBusy {
-				if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
-					appConfig.WatchDog = true
-				}
-			}
-		}
-		log.Debug().Msg("runtime settings loaded from runtime_settings.json")
-		return nil
-	}
-	return handler
-}
+package application
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"time"
+
+	"dario.cat/mergo"
+	"github.com/fsnotify/fsnotify"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/rs/zerolog/log"
+)
+
+type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
+
+type configFileHandler struct {
+	handlers map[string]fileHandler
+
+	watcher *fsnotify.Watcher
+
+	appConfig *config.ApplicationConfig
+}
+
+// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
+// then we can export it to other packages
+func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
+	c := configFileHandler{
+		handlers:  make(map[string]fileHandler),
+		appConfig: appConfig,
+	}
+	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
+	}
+	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
+	}
+	return c
+}
+
+func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
+	_, ok := c.handlers[filename]
+	if ok {
+		return fmt.Errorf("handler already registered for file %s", filename)
+	}
+	c.handlers[filename] = handler
+	if runNow {
+		c.callHandler(filename, handler)
+	}
+	return nil
+}
+
+func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
+	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
+	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
+	fileContent, err := os.ReadFile(rootedFilePath)
+	if err != nil && !os.IsNotExist(err) {
+		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
+	}
+
+	if err = handler(fileContent, c.appConfig); err != nil {
+		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
+	}
+}
+
+func (c *configFileHandler) Watch() error {
+	configWatcher, err := fsnotify.NewWatcher()
+	c.watcher = configWatcher
+	if err != nil {
+		return err
+	}
+
+	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
+		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
+		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
+		go func() {
+			for {
+				<-ticker.C
+				for file, handler := range c.handlers {
+					log.Debug().Str("file", file).Msg("polling config file")
+					c.callHandler(file, handler)
+				}
+			}
+		}()
+	}
+
+	// Start listening for events.
+	go func() {
+		for {
+			select {
+			case event, ok := <-c.watcher.Events:
+				if !ok {
+					return
+				}
+				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
+					handler, ok := c.handlers[path.Base(event.Name)]
+					if !ok {
+						continue
+					}
+
+					c.callHandler(filepath.Base(event.Name), handler)
+				}
+			case err, ok := <-c.watcher.Errors:
+				log.Error().Err(err).Msg("config watcher error received")
+				if !ok {
+					return
+				}
+			}
+		}
+	}()
+
+	// Add a path.
+	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
+	if err != nil {
+		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
+	}
+
+	return nil
+}
+
+// TODO: When we institute graceful shutdown, this should be called
+func (c *configFileHandler) Stop() error {
+	return c.watcher.Close()
+}
+
+func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing api keys runtime update")
+		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileKeys []string
+			err := json.Unmarshal(fileContent, &fileKeys)
+			if err != nil {
+				return err
+			}
+
+			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
+
+			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
+		} else {
+			log.Trace().Msg("no API keys discovered from dynamic config file")
+			appConfig.ApiKeys = startupAppConfig.ApiKeys
+		}
+		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
+		return nil
+	}
+
+	return handler
+}
+
+func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing external_backends.json")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileBackends map[string]string
+			err := json.Unmarshal(fileContent, &fileBackends)
+			if err != nil {
+				return err
+			}
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
+			if err != nil {
+				return err
+			}
+		} else {
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+		}
+		log.Debug().Msg("external backends loaded from external_backends.json")
+		return nil
+	}
+	return handler
+}
--- a/core/application/p2p.go
+++ b/core/application/p2p.go
@@ -1,240 +0,0 @@
-package application
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"slices"
-	"time"
-
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/services"
-
-	"github.com/mudler/edgevpn/pkg/node"
-	"github.com/rs/zerolog/log"
-	zlog "github.com/rs/zerolog/log"
-)
-
-func (a *Application) StopP2P() error {
-	if a.p2pCancel != nil {
-		a.p2pCancel()
-		a.p2pCancel = nil
-		a.p2pCtx = nil
-		// Wait a bit for shutdown to complete
-		time.Sleep(200 * time.Millisecond)
-	}
-	return nil
-}
-
-func (a *Application) StartP2P() error {
-	// we need a p2p token
-	if a.applicationConfig.P2PToken == "" {
-		return fmt.Errorf("P2P token is not set")
-	}
-
-	networkID := a.applicationConfig.P2PNetworkID
-
-	ctx, cancel := context.WithCancel(a.ApplicationConfig().Context)
-	a.p2pCtx = ctx
-	a.p2pCancel = cancel
-
-	var n *node.Node
-	// Here we are avoiding creating multiple nodes:
-	// - if the federated mode is enabled, we create a federated node and expose a service
-	// - exposing a service creates a node with specific options, and we don't want to create another node
-
-	// If the federated mode is enabled, we expose a service to the local instance running
-	// at r.Address
-	if a.applicationConfig.Federated {
-		_, port, err := net.SplitHostPort(a.applicationConfig.APIAddress)
-		if err != nil {
-			return err
-		}
-
-		// Here a new node is created and started
-		// and a service is exposed by the node
-		node, err := p2p.ExposeService(ctx, "localhost", port, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
-
-		if err := p2p.ServiceDiscoverer(ctx, node, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
-
-		n = node
-		// start node sync in the background
-		if err := a.p2pSync(ctx, node); err != nil {
-			return err
-		}
-	}
-
-	// If a node wasn't created previously, create it
-	if n == nil {
-		node, err := p2p.NewNode(a.applicationConfig.P2PToken)
-		if err != nil {
-			return err
-		}
-		err = node.Start(ctx)
-		if err != nil {
-			return fmt.Errorf("starting new node: %w", err)
-		}
-		n = node
-	}
-
-	// Attach a ServiceDiscoverer to the p2p node
-	log.Info().Msg("Starting P2P server discovery...")
-	if err := p2p.ServiceDiscoverer(ctx, n, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
-		var tunnelAddresses []string
-		for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
-			if v.IsOnline() {
-				tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-			} else {
-				log.Info().Msgf("Node %s is offline", v.ID)
-			}
-		}
-		if a.applicationConfig.TunnelCallback != nil {
-			a.applicationConfig.TunnelCallback(tunnelAddresses)
-		}
-	}, true); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// RestartP2P restarts the P2P stack with current ApplicationConfig settings
-// Note: This method signals that P2P should be restarted, but the actual restart
-// is handled by the caller to avoid import cycles
-func (a *Application) RestartP2P() error {
-	a.p2pMutex.Lock()
-	defer a.p2pMutex.Unlock()
-
-	// Stop existing P2P if running
-	if a.p2pCancel != nil {
-		a.p2pCancel()
-		a.p2pCancel = nil
-		a.p2pCtx = nil
-		// Wait a bit for shutdown to complete
-		time.Sleep(200 * time.Millisecond)
-	}
-
-	appConfig := a.ApplicationConfig()
-
-	// Start P2P if token is set
-	if appConfig.P2PToken == "" {
-		return fmt.Errorf("P2P token is not set")
-	}
-
-	// Create new context for P2P
-	ctx, cancel := context.WithCancel(appConfig.Context)
-	a.p2pCtx = ctx
-	a.p2pCancel = cancel
-
-	// Get API address from config
-	address := appConfig.APIAddress
-	if address == "" {
-		address = "127.0.0.1:8080" // default
-	}
-
-	// Start P2P stack in a goroutine
-	go func() {
-		if err := a.StartP2P(); err != nil {
-			log.Error().Err(err).Msg("Failed to start P2P stack")
-			cancel() // Cancel context on error
-		}
-	}()
-	log.Info().Msg("P2P stack restarted with new settings")
-
-	return nil
-}
-
-func syncState(ctx context.Context, n *node.Node, app *Application) error {
-	zlog.Debug().Msg("[p2p-sync] Syncing state")
-
-	whatWeHave := []string{}
-	for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
-		whatWeHave = append(whatWeHave, model.Name)
-	}
-
-	ledger, _ := n.Ledger()
-	currentData := ledger.CurrentData()
-	zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
-	data, exists := ledger.GetKey("shared_state", "models")
-	if !exists {
-		ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
-		zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
-	}
-
-	models := []string{}
-	if err := data.Unmarshal(&models); err != nil {
-		zlog.Warn().Err(err).Msg("error unmarshalling models")
-		return nil
-	}
-
-	zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
-
-	// Sync with our state
-	whatIsNotThere := []string{}
-	for _, model := range whatWeHave {
-		if !slices.Contains(models, model) {
-			whatIsNotThere = append(whatIsNotThere, model)
-		}
-	}
-	if len(whatIsNotThere) > 0 {
-		zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
-		ledger.AnnounceUpdate(
-			ctx,
-			1*time.Minute,
-			"shared_state",
-			"models",
-			append(models, whatIsNotThere...),
-		)
-	}
-
-	// Check if we have a model that is not in our state, otherwise install it
-	for _, model := range models {
-		if slices.Contains(whatWeHave, model) {
-			zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
-			continue
-		}
-
-		// we install model
-		zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
-
-		uuid, err := uuid.NewUUID()
-		if err != nil {
-			zlog.Error().Err(err).Msg("error generating UUID")
-			continue
-		}
-
-		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
-			ID:                 uuid.String(),
-			GalleryElementName: model,
-			Galleries:          app.ApplicationConfig().Galleries,
-			BackendGalleries:   app.ApplicationConfig().BackendGalleries,
-		}
-	}
-
-	return nil
-}
-
-func (a *Application) p2pSync(ctx context.Context, n *node.Node) error {
-	go func() {
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-time.After(1 * time.Minute):
-				if err := syncState(ctx, n, a); err != nil {
-					zlog.Error().Err(err).Msg("error syncing state")
-				}
-			}
-
-		}
-	}()
-	return nil
-}
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -1,11 +1,8 @@
 package application

 import (
-	"encoding/json"
 	"fmt"
 	"os"
-	"path/filepath"
-	"time"

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
@@ -21,24 +18,13 @@ import (

 func New(opts ...config.AppOption) (*Application, error) {
 	options := config.NewApplicationConfig(opts...)
-
-	// Store a copy of the startup config (from env vars, before file loading)
-	// This is used to determine if settings came from env vars vs file
-	startupConfigCopy := *options
 	application := newApplication(options)
-	application.startupConfig = &startupConfigCopy

 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
-
-	if err := application.start(); err != nil {
-		return nil, err
-	}
-
 	caps, err := xsysinfo.CPUCapabilities()
 	if err == nil {
 		log.Debug().Msgf("CPU capabilities: %v", caps)
-
 	}
 	gpus, err := xsysinfo.GPUs()
 	if err == nil {
@@ -70,12 +56,12 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	if err := coreStartup.InstallModels(options.Context, application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
+	if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
 		log.Error().Err(err).Msg("error installing models")
 	}

 	for _, backend := range options.ExternalBackends {
-		if err := coreStartup.InstallExternalBackends(options.Context, options.BackendGalleries, options.SystemState, application.ModelLoader(), nil, backend, "", ""); err != nil {
+		if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.SystemState, application.ModelLoader(), nil, backend, "", ""); err != nil {
 			log.Error().Err(err).Msg("error installing external backend")
 		}
 	}
@@ -118,13 +104,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	// Load runtime settings from file if DynamicConfigsDir is set
-	// This applies file settings with env var precedence (env vars take priority)
-	// Note: startupConfigCopy was already created above, so it has the original env var values
-	if options.DynamicConfigsDir != "" {
-		loadRuntimeSettingsFromFile(options)
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
@@ -135,8 +114,21 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}()

-	// Initialize watchdog with current settings (after loading from file)
-	initializeWatchdog(application, options)
+	if options.WatchDog {
+		wd := model.NewWatchDog(
+			application.ModelLoader(),
+			options.WatchDogBusyTimeout,
+			options.WatchDogIdleTimeout,
+			options.WatchDogBusy,
+			options.WatchDogIdle)
+		application.ModelLoader().SetWatchDog(wd)
+		go wd.Run()
+		go func() {
+			<-options.Context.Done()
+			log.Debug().Msgf("Context canceled, shutting down")
+			wd.Shutdown()
+		}()
+	}

 	if options.LoadToMemory != nil && !options.SingleBackend {
 		for _, m := range options.LoadToMemory {
@@ -160,6 +152,10 @@ func New(opts ...config.AppOption) (*Application, error) {
 	// Watch the configuration directory
 	startWatcher(options)

+	if err := application.start(); err != nil {
+		return nil, err
+	}
+
 	log.Info().Msg("core/startup process completed!")
 	return application, nil
 }
@@ -188,131 +184,3 @@ func startWatcher(options *config.ApplicationConfig) {
 		log.Error().Err(err).Msg("failed creating watcher")
 	}
 }
-
-// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence
-// This function is called at startup, before env vars are applied via AppOptions.
-// Since env vars are applied via AppOptions in run.go, we need to check if they're set.
-// We do this by checking if the current options values differ from defaults, which would
-// indicate they were set from env vars. However, a simpler approach is to just apply
-// file settings here, and let the AppOptions (which are applied after this) override them.
-// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars.
-// The cleanest solution: Store original values before applying file, or check if values match
-// what would be set from env vars. For now, we'll apply file settings and they'll be
-// overridden by AppOptions if env vars were set (but AppOptions are already applied).
-// Actually, this function is called in New() before AppOptions are fully processed for watchdog.
-// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog()
-// But AppOptions are applied in NewApplicationConfig() which is called first.
-// So at this point, options already has values from env vars. We should compare against
-// defaults to see if env vars were set. But we don't have defaults stored.
-// Simplest: Just apply file settings. If env vars were set, they're already in options.
-// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig.
-func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
-	settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json")
-	fileContent, err := os.ReadFile(settingsFile)
-	if err != nil {
-		if os.IsNotExist(err) {
-			log.Debug().Msg("runtime_settings.json not found, using defaults")
-			return
-		}
-		log.Warn().Err(err).Msg("failed to read runtime_settings.json")
-		return
-	}
-
-	var settings struct {
-		WatchdogEnabled         *bool   `json:"watchdog_enabled,omitempty"`
-		WatchdogIdleEnabled     *bool   `json:"watchdog_idle_enabled,omitempty"`
-		WatchdogBusyEnabled     *bool   `json:"watchdog_busy_enabled,omitempty"`
-		WatchdogIdleTimeout     *string `json:"watchdog_idle_timeout,omitempty"`
-		WatchdogBusyTimeout     *string `json:"watchdog_busy_timeout,omitempty"`
-		SingleBackend           *bool   `json:"single_backend,omitempty"`
-		ParallelBackendRequests *bool   `json:"parallel_backend_requests,omitempty"`
-	}
-
-	if err := json.Unmarshal(fileContent, &settings); err != nil {
-		log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
-		return
-	}
-
-	// At this point, options already has values from env vars (via AppOptions in run.go).
-	// To avoid env var duplication, we determine if env vars were set by checking if
-	// current values differ from defaults. Defaults are: false for bools, 0 for durations.
-	// If current value is at default, it likely wasn't set from env var, so we can apply file.
-	// If current value is non-default, it was likely set from env var, so we preserve it.
-	// Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults,
-	// but that's an acceptable limitation to avoid env var duplication.
-
-	if settings.WatchdogIdleEnabled != nil {
-		// Only apply if current value is default (false), suggesting it wasn't set from env var
-		if !options.WatchDogIdle {
-			options.WatchDogIdle = *settings.WatchdogIdleEnabled
-			if options.WatchDogIdle {
-				options.WatchDog = true
-			}
-		}
-	}
-	if settings.WatchdogBusyEnabled != nil {
-		if !options.WatchDogBusy {
-			options.WatchDogBusy = *settings.WatchdogBusyEnabled
-			if options.WatchDogBusy {
-				options.WatchDog = true
-			}
-		}
-	}
-	if settings.WatchdogIdleTimeout != nil {
-		// Only apply if current value is default (0), suggesting it wasn't set from env var
-		if options.WatchDogIdleTimeout == 0 {
-			dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			if err == nil {
-				options.WatchDogIdleTimeout = dur
-			} else {
-				log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
-			}
-		}
-	}
-	if settings.WatchdogBusyTimeout != nil {
-		if options.WatchDogBusyTimeout == 0 {
-			dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			if err == nil {
-				options.WatchDogBusyTimeout = dur
-			} else {
-				log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
-			}
-		}
-	}
-	if settings.SingleBackend != nil {
-		if !options.SingleBackend {
-			options.SingleBackend = *settings.SingleBackend
-		}
-	}
-	if settings.ParallelBackendRequests != nil {
-		if !options.ParallelBackendRequests {
-			options.ParallelBackendRequests = *settings.ParallelBackendRequests
-		}
-	}
-	if !options.WatchDogIdle && !options.WatchDogBusy {
-		if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
-			options.WatchDog = true
-		}
-	}
-
-	log.Debug().Msg("Runtime settings loaded from runtime_settings.json")
-}
-
-// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
-func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
-	if options.WatchDog {
-		wd := model.NewWatchDog(
-			application.ModelLoader(),
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle)
-		application.ModelLoader().SetWatchDog(wd)
-		go wd.Run()
-		go func() {
-			<-options.Context.Done()
-			log.Debug().Msgf("Context canceled, shutting down")
-			wd.Shutdown()
-		}()
-	}
-}
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -1,88 +0,0 @@
-package application
-
-import (
-	"time"
-
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
-)
-
-func (a *Application) StopWatchdog() error {
-	if a.watchdogStop != nil {
-		close(a.watchdogStop)
-		a.watchdogStop = nil
-	}
-	return nil
-}
-
-// startWatchdog starts the watchdog with current ApplicationConfig settings
-// This is an internal method that assumes the caller holds the watchdogMutex
-func (a *Application) startWatchdog() error {
-	appConfig := a.ApplicationConfig()
-
-	// Create new watchdog if enabled
-	if appConfig.WatchDog {
-		wd := model.NewWatchDog(
-			a.modelLoader,
-			appConfig.WatchDogBusyTimeout,
-			appConfig.WatchDogIdleTimeout,
-			appConfig.WatchDogBusy,
-			appConfig.WatchDogIdle)
-		a.modelLoader.SetWatchDog(wd)
-
-		// Create new stop channel
-		a.watchdogStop = make(chan bool, 1)
-
-		// Start watchdog goroutine
-		go wd.Run()
-
-		// Setup shutdown handler
-		go func() {
-			select {
-			case <-a.watchdogStop:
-				log.Debug().Msg("Watchdog stop signal received")
-				wd.Shutdown()
-			case <-appConfig.Context.Done():
-				log.Debug().Msg("Context canceled, shutting down watchdog")
-				wd.Shutdown()
-			}
-		}()
-
-		log.Info().Msg("Watchdog started with new settings")
-	} else {
-		log.Info().Msg("Watchdog disabled")
-	}
-
-	return nil
-}
-
-// StartWatchdog starts the watchdog with current ApplicationConfig settings
-func (a *Application) StartWatchdog() error {
-	a.watchdogMutex.Lock()
-	defer a.watchdogMutex.Unlock()
-
-	return a.startWatchdog()
-}
-
-// RestartWatchdog restarts the watchdog with current ApplicationConfig settings
-func (a *Application) RestartWatchdog() error {
-	a.watchdogMutex.Lock()
-	defer a.watchdogMutex.Unlock()
-
-	// Shutdown existing watchdog if running
-	if a.watchdogStop != nil {
-		close(a.watchdogStop)
-		a.watchdogStop = nil
-	}
-
-	// Shutdown existing watchdog if running
-	currentWD := a.modelLoader.GetWatchDog()
-	if currentWD != nil {
-		currentWD.Shutdown()
-		// Wait a bit for shutdown to complete
-		time.Sleep(100 * time.Millisecond)
-	}
-
-	// Start watchdog with new settings
-	return a.startWatchdog()
-}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -40,7 +40,3 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat

 	return fn, nil
 }
-
-// ImageGenerationFunc is a test-friendly indirection to call image generation logic.
-// Tests can override this variable to provide a stub implementation.
-var ImageGenerationFunc = ImageGeneration
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -3,6 +3,7 @@ package backend
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"regexp"
 	"slices"
 	"strings"
@@ -25,7 +26,6 @@ type LLMResponse struct {
 	Response    string // should this be []byte?
 	Usage       TokenUsage
 	AudioOutput string
-	Logprobs    *schema.Logprobs // Logprobs from the backend response
 }

 type TokenUsage struct {
@@ -35,7 +35,7 @@ type TokenUsage struct {
 	TimingTokenGeneration  float64
 }

-func ModelInference(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string, logprobs *int, topLogprobs *int, logitBias map[string]float64) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -47,7 +47,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 		if !slices.Contains(modelNames, c.Name) {
 			utils.ResetDownloadTimers()
 			// if we failed to load the model, we try to download it
-			err := gallery.InstallModelFromGallery(ctx, o.Galleries, o.BackendGalleries, o.SystemState, loader, c.Name, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries)
+			err := gallery.InstallModelFromGallery(o.Galleries, o.BackendGalleries, o.SystemState, loader, c.Name, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries)
 			if err != nil {
 				log.Error().Err(err).Msgf("failed to install model %q from gallery", modelFile)
 				//return nil, err
@@ -65,8 +65,29 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 	var protoMessages []*proto.Message
 	// if we are using the tokenizer template, we need to convert the messages to proto messages
 	// unless the prompt has already been tokenized (non-chat endpoints + functions)
-	if c.TemplateConfig.UseTokenizerTemplate && len(messages) > 0 {
-		protoMessages = messages.ToProto()
+	if c.TemplateConfig.UseTokenizerTemplate && s == "" {
+		protoMessages = make([]*proto.Message, len(messages), len(messages))
+		for i, message := range messages {
+			protoMessages[i] = &proto.Message{
+				Role: message.Role,
+			}
+			switch ct := message.Content.(type) {
+			case string:
+				protoMessages[i].Content = ct
+			case []interface{}:
+				// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
+				data, _ := json.Marshal(ct)
+				resultData := []struct {
+					Text string `json:"text"`
+				}{}
+				json.Unmarshal(data, &resultData)
+				for _, r := range resultData {
+					protoMessages[i].Content += r.Text
+				}
+			default:
+				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
+			}
+		}
 	}

 	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
@@ -78,21 +99,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 		opts.Images = images
 		opts.Videos = videos
 		opts.Audios = audios
-		opts.Tools = tools
-		opts.ToolChoice = toolChoice
-		if logprobs != nil {
-			opts.Logprobs = int32(*logprobs)
-		}
-		if topLogprobs != nil {
-			opts.TopLogprobs = int32(*topLogprobs)
-		}
-		if len(logitBias) > 0 {
-			// Serialize logit_bias map to JSON string for proto
-			logitBiasJSON, err := json.Marshal(logitBias)
-			if err == nil {
-				opts.LogitBias = string(logitBiasJSON)
-			}
-		}

 		tokenUsage := TokenUsage{}

@@ -124,7 +130,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 			}

 			ss := ""
-			var logprobs *schema.Logprobs

 			var partialRune []byte
 			err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
@@ -136,14 +141,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 				tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
 				tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing

-				// Parse logprobs from reply if present (collect from last chunk that has them)
-				if len(reply.Logprobs) > 0 {
-					var parsedLogprobs schema.Logprobs
-					if err := json.Unmarshal(reply.Logprobs, &parsedLogprobs); err == nil {
-						logprobs = &parsedLogprobs
-					}
-				}
-
 				// Process complete runes and accumulate them
 				var completeRunes []byte
 				for len(partialRune) > 0 {
@@ -169,7 +166,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 			return LLMResponse{
 				Response: ss,
 				Usage:    tokenUsage,
-				Logprobs: logprobs,
 			}, err
 		} else {
 			// TODO: Is the chicken bit the only way to get here? is that acceptable?
@@ -192,19 +188,9 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 				response = c.TemplateConfig.ReplyPrefix + response
 			}

-			// Parse logprobs from reply if present
-			var logprobs *schema.Logprobs
-			if len(reply.Logprobs) > 0 {
-				var parsedLogprobs schema.Logprobs
-				if err := json.Unmarshal(reply.Logprobs, &parsedLogprobs); err == nil {
-					logprobs = &parsedLogprobs
-				}
-			}
-
 			return LLMResponse{
 				Response: response,
 				Usage:    tokenUsage,
-				Logprobs: logprobs,
 			}, err
 		}
 	}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -129,6 +129,7 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions {
 		triggers = append(triggers, &pb.GrammarTrigger{
 			Word: t.Word,
 		})
+
 	}

 	return &pb.ModelOptions{
@@ -212,7 +213,7 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
 		}
 	}

-	pbOpts := &pb.PredictOptions{
+	return &pb.PredictOptions{
 		Temperature:         float32(*c.Temperature),
 		TopP:                float32(*c.TopP),
 		NDraft:              c.NDraft,
@@ -249,6 +250,4 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
 		TailFreeSamplingZ:   float32(*c.TFZ),
 		TypicalP:            float32(*c.TypicalP),
 	}
-	// Logprobs and TopLogprobs are set by the caller if provided
-	return pbOpts
 }
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@@ -60,7 +60,7 @@ func SoundGeneration(

 	// return RPC error if any
 	if !res.Success {
-		return "", nil, fmt.Errorf("error during sound generation: %s", res.Message)
+		return "", nil, fmt.Errorf(res.Message)
 	}

 	return filePath, res, err
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -70,7 +70,7 @@ func ModelTTS(

 	// return RPC error if any
 	if !res.Success {
-		return "", nil, fmt.Errorf("error during TTS: %s", res.Message)
+		return "", nil, fmt.Errorf(res.Message)
 	}

 	return filePath, res, err
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@@ -0,0 +1,87 @@
+package cli_api
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/edgevpn/pkg/node"
+
+	"github.com/rs/zerolog/log"
+)
+
+func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool, app *application.Application) error {
+	var n *node.Node
+	// Here we are avoiding creating multiple nodes:
+	// - if the federated mode is enabled, we create a federated node and expose a service
+	// - exposing a service creates a node with specific options, and we don't want to create another node
+
+	// If the federated mode is enabled, we expose a service to the local instance running
+	// at r.Address
+	if federated {
+		_, port, err := net.SplitHostPort(address)
+		if err != nil {
+			return err
+		}
+
+		// Here a new node is created and started
+		// and a service is exposed by the node
+		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
+
+		n = node
+
+		// start node sync in the background
+		if err := p2p.Sync(ctx, node, app); err != nil {
+			return err
+		}
+	}
+
+	// If the p2p mode is enabled, we start the service discovery
+	if token != "" {
+		// If a node wasn't created previously, create it
+		if n == nil {
+			node, err := p2p.NewNode(token)
+			if err != nil {
+				return err
+			}
+			err = node.Start(ctx)
+			if err != nil {
+				return fmt.Errorf("starting new node: %w", err)
+			}
+			n = node
+		}
+
+		// Attach a ServiceDiscoverer to the p2p node
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
+			var tunnelAddresses []string
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
+				if v.IsOnline() {
+					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+				} else {
+					log.Info().Msgf("Node %s is offline", v.ID)
+				}
+			}
+			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
+
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}, true); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ettore Di Giacinto	2aed98d14b	add libx11 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 20:30:31 +02:00
Ettore Di Giacinto	b3a1b3d63f	add libxcb Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:59:12 +02:00
Ettore Di Giacinto	e25dd2fe26	debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:34:35 +02:00
Ettore Di Giacinto	002f75ac79	ci(tests): drop me. Test vulkan build Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:24:56 +02:00
Ettore Di Giacinto	1ce8f35834	Consume custom vulkan build for arm64 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:20:11 +02:00
Ettore Di Giacinto	be8a314496	Vulkansdk needs sudo Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00
Ettore Di Giacinto	406d62d6aa	build vulkan manually on arm64 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00
Ettore Di Giacinto	9e9f953eec	chore(vulkan): enable arm64 image builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00