deterministic builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
refactor, macOS fixes
2026-07-05 22:09:02 -04:00 · 2026-04-01 19:45:31 +00:00 · 2026-04-01 19:42:16 +00:00 · 2026-04-01 17:57:03 +00:00
11 changed files with 183 additions and 68 deletions
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -133,7 +133,6 @@ func getRealReadme(ctx context.Context, repository string) (string, error) {
 	result, err := cogito.ExecuteTools(llm, fragment,
 		cogito.WithIterations(3),
 		cogito.WithMaxAttempts(3),
-		cogito.DisableSinkState,
 		cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
 	if err != nil {
 		return "", err
--- a/.github/gallery-agent/gallery.go
+++ b/.github/gallery-agent/gallery.go
@@ -79,20 +79,7 @@ func generateYAMLEntry(model ProcessedModel, quantization string) string {
 	description = cleanTextContent(description)
 	formattedDescription := formatTextContent(description)

-	// Strip name and description from config file since they are
-	// already present at the gallery entry level and should not
-	// appear under overrides.
-	configFileContent := modelConfig.ConfigFile
-	var cfgMap map[string]any
-	if err := yaml.Unmarshal([]byte(configFileContent), &cfgMap); err == nil {
-		delete(cfgMap, "name")
-		delete(cfgMap, "description")
-		if cleaned, err := yaml.Marshal(cfgMap); err == nil {
-			configFileContent = string(cleaned)
-		}
-	}
-
-	configFile := formatTextContent(configFileContent)
+	configFile := formatTextContent(modelConfig.ConfigFile)

 	filesYAML, _ := yaml.Marshal(modelConfig.Files)

--- a/.github/gallery-agent/testing.go
+++ b/.github/gallery-agent/testing.go
@@ -17,7 +17,7 @@ func runSyntheticMode() error {
 	fmt.Printf("Generating %d synthetic models for testing...\n", numModels)

 	var models []ProcessedModel
-	for range numModels {
+	for i := range numModels {
 		model := generator.GenerateProcessedModel()
 		models = append(models, model)
 		fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -14,6 +14,10 @@ jobs:
            variable: "LLAMA_VERSION"
            branch: "master"
            file: "backend/cpp/llama-cpp/Makefile"
+          - repository: "TheTom/llama-cpp-turboquant"
+            variable: "TURBOQUANT_VERSION"
+            branch: "feature/turboquant-kv-cache"
+            file: "backend/cpp/llama-cpp/Makefile"
          - repository: "ggml-org/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
@@ -63,6 +67,3 @@ jobs:
          branch: "update/${{ matrix.variable }}"
          body: ${{ steps.bump.outputs.message }}
          signoff: true
-
-
-
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -55,7 +55,7 @@ jobs:
      - name: Run gallery agent
        env:
          #OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
-          OPENAI_MODEL: Qwen3.5-2B-GGUF
+          OPENAI_MODE: Qwen3.5-2B-GGUF
          OPENAI_BASE_URL: "http://localhost:8080"
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          #OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,7 +1,9 @@

-LLAMA_VERSION?=95a6ebabb277c4cc18247e7bc2a5502133caca63
+LLAMA_VERSION?=0fcb3760b2b9a3a496ef14621a7e4dad7a8df90f
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

+TURBOQUANT_VERSION?=8ad0f00e9a38df6c29fc10363341dde300f92ae4
+
 CMAKE_ARGS?=
 BUILD_TYPE?=
 NATIVE?=false
--- a/backend/cpp/llama-cpp/patches/sources.yaml
+++ b/backend/cpp/llama-cpp/patches/sources.yaml
@@ -0,0 +1,14 @@
+# Patch sources for the llama-cpp backend.
+# Each source declares a fork whose commits are extracted as patches
+# and applied on top of upstream llama.cpp during the build.
+# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
+#
+# version_var: Makefile variable with the pinned fork commit SHA
+# base_var:    Makefile variable with the upstream base commit SHA
+# Both are read from version_file (relative to backend dir) to compute the diff.
+sources:
+  - name: turboquant
+    repo: https://github.com/TheTom/llama-cpp-turboquant.git
+    version_var: TURBOQUANT_VERSION
+    base_var: LLAMA_VERSION
+    version_file: Makefile
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -1,17 +1,13 @@
 #!/bin/bash
-
-## Patches
-
-## Apply patches from the `patches` directory
-if [ -d "patches" ]; then
-    for patch in $(ls patches); do
-        echo "Applying patch $patch"
-        patch -d llama.cpp/ -p1 < patches/$patch
-    done 
-fi
-
 set -e

+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$SCRIPT_DIR/../../.."
+
+## Apply patches from sources and/or local .patch files
+"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
+
+## Copy server files into grpc-server build directory
 for file in $(ls llama.cpp/tools/server/); do
    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
 done
@@ -28,4 +24,3 @@ else
    echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
 fi
 set -e
-
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=87ecb95cbc65dc8e58e3d88f4f4a59a0939796f5
+STABLEDIFFUSION_GGML_VERSION?=09b12d5f6d51d862749e8e0ee8baac8f012089e2

 CMAKE_ARGS+=-DGGML_MAX_NAME=128

--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,38 +1,4 @@
 ---
- name: "qwen3.5-35b-a3b-apex"
-  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
-  urls:
-    - https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF
-  description: |
-    Describe the model in a clear and concise way that can be shared in a model gallery.
-  overrides:
-    backend: llama-cpp
-    function:
-      automatic_tool_parsing_fallback: true
-      grammar:
-        disable: true
-    known_usecases:
-      - chat
-    mmproj: llama-cpp/mmproj/Qwen3.5-35B-A3B-APEX-GGUF/mmproj-F16.gguf
-    options:
-      - use_jinja:true
-    parameters:
-      min_p: 0
-      model: llama-cpp/models/Qwen3.5-35B-A3B-APEX-GGUF/Qwen3.5-35B-A3B-APEX-Quality.gguf
-      presence_penalty: 1.5
-      repeat_penalty: 1
-      temperature: 0.7
-      top_k: 20
-      top_p: 0.8
-    template:
-      use_tokenizer_template: true
-  files:
-    - filename: llama-cpp/mmproj/Qwen3.5-35B-A3B-APEX-GGUF/mmproj-F16.gguf
-      sha256: a516ab92e8240da4734d68352bdfba84c16e830ee40010b8fac80d69c77272ff
-      uri: https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF/resolve/main/mmproj-F16.gguf
-    - filename: llama-cpp/models/Qwen3.5-35B-A3B-APEX-GGUF/Qwen3.5-35B-A3B-APEX-Quality.gguf
-      sha256: 50887b60c77ee5c95bc3657814ae993abcab7b2d71868b9af1e84d6badd09a57
-      uri: https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF/resolve/main/Qwen3.5-35B-A3B-APEX-Quality.gguf
 - name: "qwen_qwen3.5-35b-a3b"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
--- a/scripts/patch_utils/apply_patches.sh
+++ b/scripts/patch_utils/apply_patches.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# apply_patches.sh — Generic patch fetcher and applier for any backend.
+#
+# Usage: ./apply_patches.sh <source-dir> <target-dir>
+#
+#   <source-dir>  Directory containing a patches/ folder (with optional sources.yaml)
+#   <target-dir>  The cloned upstream repo to patch (e.g., llama.cpp/)
+#
+# Behavior (idempotent):
+#   1. If patches/sources.yaml exists and yq is available, for each source:
+#      - If patches/<name>/ already has .patch files: skip fetching (vendored)
+#      - Otherwise: clone the fork at a pinned SHA, diff against the pinned
+#        upstream SHA, and generate patches
+#   2. Apply all patches (skips already-applied ones)
+#   3. Fails fast on any patch application error
+#
+# sources.yaml fields:
+#   name         — subdirectory name for this source's patches
+#   repo         — fork git URL
+#   version_var  — Makefile variable holding the pinned fork commit SHA
+#   base_var     — Makefile variable holding the pinned upstream commit SHA
+#   version_file — Makefile path (relative to backend dir)
+
+set -e
+
+# Use /tmp for patch temp files to avoid macOS long-path issues
+export TMPDIR="${TMPDIR_OVERRIDE:-/tmp}"
+
+read_makefile_var() {
+    grep -m1 "^${1}?=" "$2" | cut -d'=' -f2
+}
+
+apply_one_patch() {
+    local target_dir="$1"
+    local patch_file="$2"
+    local label="$3"
+
+    if patch -d "$target_dir" -p1 --reverse --dry-run < "$patch_file" >/dev/null 2>&1; then
+        echo "  Already applied, skipping: $label"
+        return 0
+    fi
+
+    echo "  Applying: $label"
+    patch -d "$target_dir" -p1 --forward < "$patch_file" || { echo "FAILED: $patch_file"; exit 1; }
+}
+
+apply_patches() {
+    local SOURCE_DIR="$(cd "$1" && pwd)"
+    local TARGET_DIR="$2"
+    local PATCHES_DIR="$SOURCE_DIR/patches"
+
+    if [ ! -d "$PATCHES_DIR" ]; then
+        return 0
+    fi
+
+    # Phase 1: Generate missing patches from fork sources
+    if [ -f "$PATCHES_DIR/sources.yaml" ] && command -v yq &>/dev/null; then
+        local SOURCE_COUNT
+        SOURCE_COUNT=$(yq '.sources | length' "$PATCHES_DIR/sources.yaml")
+
+        for i in $(seq 0 $((SOURCE_COUNT - 1))); do
+            local NAME REPO VERSION_VAR BASE_VAR VERSION_FILE
+            NAME=$(yq ".sources[$i].name" "$PATCHES_DIR/sources.yaml")
+            REPO=$(yq ".sources[$i].repo" "$PATCHES_DIR/sources.yaml")
+            VERSION_VAR=$(yq ".sources[$i].version_var" "$PATCHES_DIR/sources.yaml")
+            BASE_VAR=$(yq ".sources[$i].base_var" "$PATCHES_DIR/sources.yaml")
+            VERSION_FILE=$(yq ".sources[$i].version_file" "$PATCHES_DIR/sources.yaml")
+
+            local MAKEFILE="$SOURCE_DIR/$VERSION_FILE"
+            local FORK_SHA BASE_SHA
+            FORK_SHA=$(read_makefile_var "$VERSION_VAR" "$MAKEFILE")
+            BASE_SHA=$(read_makefile_var "$BASE_VAR" "$MAKEFILE")
+
+            if [ -z "$FORK_SHA" ] || [ -z "$BASE_SHA" ]; then
+                echo "WARNING: Could not read $VERSION_VAR or $BASE_VAR from $MAKEFILE — skipping '$NAME'"
+                continue
+            fi
+
+            local SOURCE_PATCH_DIR="$PATCHES_DIR/$NAME"
+            local EXISTING
+            EXISTING=$(ls "$SOURCE_PATCH_DIR"/*.patch 2>/dev/null | wc -l)
+
+            if [ "$EXISTING" -gt 0 ]; then
+                echo "Patches [$NAME]: $EXISTING patches already present — skipping fetch."
+            else
+                echo "Patches [$NAME]: generating from $REPO"
+                echo "  base (upstream): ${BASE_SHA:0:12}"
+                echo "  head (fork):     ${FORK_SHA:0:12}"
+
+                local TMPDIR_CLONE
+                TMPDIR_CLONE=$(mktemp -d)
+
+                if git clone "$REPO" "$TMPDIR_CLONE/fork" 2>&1; then
+                    cd "$TMPDIR_CLONE/fork"
+
+                    # Fetch the upstream base commit (may not be in the fork's history)
+                    git fetch origin "$FORK_SHA" 2>&1 || true
+                    git checkout "$FORK_SHA" 2>&1
+
+                    # We need the base commit in the history to compute the diff.
+                    # If the fork is a real GitHub fork, it shares history with upstream.
+                    # Otherwise, fetch it explicitly.
+                    if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
+                        echo "  Base commit not in fork history — fetching from upstream"
+                        local UPSTREAM_URL
+                        # Derive upstream URL from base_var context or use llama.cpp default
+                        UPSTREAM_URL=$(yq ".sources[$i].upstream_repo // \"\"" "$PATCHES_DIR/sources.yaml")
+                        if [ -n "$UPSTREAM_URL" ] && [ "$UPSTREAM_URL" != "null" ]; then
+                            git remote add upstream "$UPSTREAM_URL" 2>/dev/null || true
+                            git fetch upstream 2>&1
+                        fi
+                    fi
+
+                    local PATCH_COUNT
+                    PATCH_COUNT=$(git rev-list --count "$BASE_SHA".."$FORK_SHA" 2>/dev/null || echo "0")
+                    echo "  $PATCH_COUNT commits in diff"
+
+                    if [ "$PATCH_COUNT" -gt 0 ]; then
+                        mkdir -p "$SOURCE_PATCH_DIR"
+                        git format-patch "$BASE_SHA".."$FORK_SHA" -o "$SOURCE_PATCH_DIR/" >/dev/null 2>&1
+                        echo "  Generated $PATCH_COUNT patches in patches/$NAME/"
+                    fi
+                    cd "$SOURCE_DIR"
+                else
+                    echo "WARNING: Failed to clone $REPO — skipping source '$NAME'"
+                fi
+
+                rm -rf "$TMPDIR_CLONE"
+            fi
+        done
+    elif [ -f "$PATCHES_DIR/sources.yaml" ]; then
+        echo "WARNING: yq not found — skipping source-based patch generation."
+    fi
+
+    # Phase 2: Apply patches (subdirectories first, then top-level)
+    for source_dir in $(find "$PATCHES_DIR" -mindepth 1 -maxdepth 1 -type d | sort); do
+        for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do
+            apply_one_patch "$TARGET_DIR" "$p" "$(basename "$source_dir")/$(basename "$p")"
+        done
+    done
+    for p in $(ls "$PATCHES_DIR"/*.patch 2>/dev/null | sort); do
+        apply_one_patch "$TARGET_DIR" "$p" "$(basename "$p")"
+    done
+}
+
+# Run with arguments
+if [ $# -lt 2 ]; then
+    echo "Usage: $0 <source-dir> <target-dir>"
+    exit 1
+fi
+apply_patches "$1" "$2"