mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-20 06:35:41 -04:00
Compare commits
3 Commits
v4.1.0
...
feat/turbo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
659636195c | ||
|
|
a7a142b651 | ||
|
|
e502e51d78 |
1
.github/gallery-agent/agent.go
vendored
1
.github/gallery-agent/agent.go
vendored
@@ -133,7 +133,6 @@ func getRealReadme(ctx context.Context, repository string) (string, error) {
|
||||
result, err := cogito.ExecuteTools(llm, fragment,
|
||||
cogito.WithIterations(3),
|
||||
cogito.WithMaxAttempts(3),
|
||||
cogito.DisableSinkState,
|
||||
cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
15
.github/gallery-agent/gallery.go
vendored
15
.github/gallery-agent/gallery.go
vendored
@@ -79,20 +79,7 @@ func generateYAMLEntry(model ProcessedModel, quantization string) string {
|
||||
description = cleanTextContent(description)
|
||||
formattedDescription := formatTextContent(description)
|
||||
|
||||
// Strip name and description from config file since they are
|
||||
// already present at the gallery entry level and should not
|
||||
// appear under overrides.
|
||||
configFileContent := modelConfig.ConfigFile
|
||||
var cfgMap map[string]any
|
||||
if err := yaml.Unmarshal([]byte(configFileContent), &cfgMap); err == nil {
|
||||
delete(cfgMap, "name")
|
||||
delete(cfgMap, "description")
|
||||
if cleaned, err := yaml.Marshal(cfgMap); err == nil {
|
||||
configFileContent = string(cleaned)
|
||||
}
|
||||
}
|
||||
|
||||
configFile := formatTextContent(configFileContent)
|
||||
configFile := formatTextContent(modelConfig.ConfigFile)
|
||||
|
||||
filesYAML, _ := yaml.Marshal(modelConfig.Files)
|
||||
|
||||
|
||||
2
.github/gallery-agent/testing.go
vendored
2
.github/gallery-agent/testing.go
vendored
@@ -17,7 +17,7 @@ func runSyntheticMode() error {
|
||||
fmt.Printf("Generating %d synthetic models for testing...\n", numModels)
|
||||
|
||||
var models []ProcessedModel
|
||||
for range numModels {
|
||||
for i := range numModels {
|
||||
model := generator.GenerateProcessedModel()
|
||||
models = append(models, model)
|
||||
fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
|
||||
|
||||
7
.github/workflows/bump_deps.yaml
vendored
7
.github/workflows/bump_deps.yaml
vendored
@@ -14,6 +14,10 @@ jobs:
|
||||
variable: "LLAMA_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/cpp/llama-cpp/Makefile"
|
||||
- repository: "TheTom/llama-cpp-turboquant"
|
||||
variable: "TURBOQUANT_VERSION"
|
||||
branch: "feature/turboquant-kv-cache"
|
||||
file: "backend/cpp/llama-cpp/Makefile"
|
||||
- repository: "ggml-org/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
@@ -63,6 +67,3 @@ jobs:
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
|
||||
|
||||
|
||||
2
.github/workflows/gallery-agent.yaml
vendored
2
.github/workflows/gallery-agent.yaml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
- name: Run gallery agent
|
||||
env:
|
||||
#OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
|
||||
OPENAI_MODEL: Qwen3.5-2B-GGUF
|
||||
OPENAI_MODE: Qwen3.5-2B-GGUF
|
||||
OPENAI_BASE_URL: "http://localhost:8080"
|
||||
OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
|
||||
#OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
|
||||
LLAMA_VERSION?=95a6ebabb277c4cc18247e7bc2a5502133caca63
|
||||
LLAMA_VERSION?=0fcb3760b2b9a3a496ef14621a7e4dad7a8df90f
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
TURBOQUANT_VERSION?=8ad0f00e9a38df6c29fc10363341dde300f92ae4
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
|
||||
14
backend/cpp/llama-cpp/patches/sources.yaml
Normal file
14
backend/cpp/llama-cpp/patches/sources.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Patch sources for the llama-cpp backend.
|
||||
# Each source declares a fork whose commits are extracted as patches
|
||||
# and applied on top of upstream llama.cpp during the build.
|
||||
# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
|
||||
#
|
||||
# version_var: Makefile variable with the pinned fork commit SHA
|
||||
# base_var: Makefile variable with the upstream base commit SHA
|
||||
# Both are read from version_file (relative to backend dir) to compute the diff.
|
||||
sources:
|
||||
- name: turboquant
|
||||
repo: https://github.com/TheTom/llama-cpp-turboquant.git
|
||||
version_var: TURBOQUANT_VERSION
|
||||
base_var: LLAMA_VERSION
|
||||
version_file: Makefile
|
||||
@@ -1,17 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
## Patches
|
||||
|
||||
## Apply patches from the `patches` directory
|
||||
if [ -d "patches" ]; then
|
||||
for patch in $(ls patches); do
|
||||
echo "Applying patch $patch"
|
||||
patch -d llama.cpp/ -p1 < patches/$patch
|
||||
done
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_ROOT="$SCRIPT_DIR/../../.."
|
||||
|
||||
## Apply patches from sources and/or local .patch files
|
||||
"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
|
||||
|
||||
## Copy server files into grpc-server build directory
|
||||
for file in $(ls llama.cpp/tools/server/); do
|
||||
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
|
||||
done
|
||||
@@ -28,4 +24,3 @@ else
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||
fi
|
||||
set -e
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=87ecb95cbc65dc8e58e3d88f4f4a59a0939796f5
|
||||
STABLEDIFFUSION_GGML_VERSION?=09b12d5f6d51d862749e8e0ee8baac8f012089e2
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -1,38 +1,4 @@
|
||||
---
|
||||
- name: "qwen3.5-35b-a3b-apex"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF
|
||||
description: |
|
||||
Describe the model in a clear and concise way that can be shared in a model gallery.
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Qwen3.5-35B-A3B-APEX-GGUF/mmproj-F16.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0
|
||||
model: llama-cpp/models/Qwen3.5-35B-A3B-APEX-GGUF/Qwen3.5-35B-A3B-APEX-Quality.gguf
|
||||
presence_penalty: 1.5
|
||||
repeat_penalty: 1
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/mmproj/Qwen3.5-35B-A3B-APEX-GGUF/mmproj-F16.gguf
|
||||
sha256: a516ab92e8240da4734d68352bdfba84c16e830ee40010b8fac80d69c77272ff
|
||||
uri: https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF/resolve/main/mmproj-F16.gguf
|
||||
- filename: llama-cpp/models/Qwen3.5-35B-A3B-APEX-GGUF/Qwen3.5-35B-A3B-APEX-Quality.gguf
|
||||
sha256: 50887b60c77ee5c95bc3657814ae993abcab7b2d71868b9af1e84d6badd09a57
|
||||
uri: https://huggingface.co/mudler/Qwen3.5-35B-A3B-APEX-GGUF/resolve/main/Qwen3.5-35B-A3B-APEX-Quality.gguf
|
||||
- name: "qwen_qwen3.5-35b-a3b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
151
scripts/patch_utils/apply_patches.sh
Executable file
151
scripts/patch_utils/apply_patches.sh
Executable file
@@ -0,0 +1,151 @@
|
||||
#!/bin/bash
|
||||
# apply_patches.sh — Generic patch fetcher and applier for any backend.
|
||||
#
|
||||
# Usage: ./apply_patches.sh <source-dir> <target-dir>
|
||||
#
|
||||
# <source-dir> Directory containing a patches/ folder (with optional sources.yaml)
|
||||
# <target-dir> The cloned upstream repo to patch (e.g., llama.cpp/)
|
||||
#
|
||||
# Behavior (idempotent):
|
||||
# 1. If patches/sources.yaml exists and yq is available, for each source:
|
||||
# - If patches/<name>/ already has .patch files: skip fetching (vendored)
|
||||
# - Otherwise: clone the fork at a pinned SHA, diff against the pinned
|
||||
# upstream SHA, and generate patches
|
||||
# 2. Apply all patches (skips already-applied ones)
|
||||
# 3. Fails fast on any patch application error
|
||||
#
|
||||
# sources.yaml fields:
|
||||
# name — subdirectory name for this source's patches
|
||||
# repo — fork git URL
|
||||
# version_var — Makefile variable holding the pinned fork commit SHA
|
||||
# base_var — Makefile variable holding the pinned upstream commit SHA
|
||||
# version_file — Makefile path (relative to backend dir)
|
||||
|
||||
set -e
|
||||
|
||||
# Use /tmp for patch temp files to avoid macOS long-path issues
|
||||
export TMPDIR="${TMPDIR_OVERRIDE:-/tmp}"
|
||||
|
||||
read_makefile_var() {
|
||||
grep -m1 "^${1}?=" "$2" | cut -d'=' -f2
|
||||
}
|
||||
|
||||
apply_one_patch() {
|
||||
local target_dir="$1"
|
||||
local patch_file="$2"
|
||||
local label="$3"
|
||||
|
||||
if patch -d "$target_dir" -p1 --reverse --dry-run < "$patch_file" >/dev/null 2>&1; then
|
||||
echo " Already applied, skipping: $label"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo " Applying: $label"
|
||||
patch -d "$target_dir" -p1 --forward < "$patch_file" || { echo "FAILED: $patch_file"; exit 1; }
|
||||
}
|
||||
|
||||
apply_patches() {
|
||||
local SOURCE_DIR="$(cd "$1" && pwd)"
|
||||
local TARGET_DIR="$2"
|
||||
local PATCHES_DIR="$SOURCE_DIR/patches"
|
||||
|
||||
if [ ! -d "$PATCHES_DIR" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Phase 1: Generate missing patches from fork sources
|
||||
if [ -f "$PATCHES_DIR/sources.yaml" ] && command -v yq &>/dev/null; then
|
||||
local SOURCE_COUNT
|
||||
SOURCE_COUNT=$(yq '.sources | length' "$PATCHES_DIR/sources.yaml")
|
||||
|
||||
for i in $(seq 0 $((SOURCE_COUNT - 1))); do
|
||||
local NAME REPO VERSION_VAR BASE_VAR VERSION_FILE
|
||||
NAME=$(yq ".sources[$i].name" "$PATCHES_DIR/sources.yaml")
|
||||
REPO=$(yq ".sources[$i].repo" "$PATCHES_DIR/sources.yaml")
|
||||
VERSION_VAR=$(yq ".sources[$i].version_var" "$PATCHES_DIR/sources.yaml")
|
||||
BASE_VAR=$(yq ".sources[$i].base_var" "$PATCHES_DIR/sources.yaml")
|
||||
VERSION_FILE=$(yq ".sources[$i].version_file" "$PATCHES_DIR/sources.yaml")
|
||||
|
||||
local MAKEFILE="$SOURCE_DIR/$VERSION_FILE"
|
||||
local FORK_SHA BASE_SHA
|
||||
FORK_SHA=$(read_makefile_var "$VERSION_VAR" "$MAKEFILE")
|
||||
BASE_SHA=$(read_makefile_var "$BASE_VAR" "$MAKEFILE")
|
||||
|
||||
if [ -z "$FORK_SHA" ] || [ -z "$BASE_SHA" ]; then
|
||||
echo "WARNING: Could not read $VERSION_VAR or $BASE_VAR from $MAKEFILE — skipping '$NAME'"
|
||||
continue
|
||||
fi
|
||||
|
||||
local SOURCE_PATCH_DIR="$PATCHES_DIR/$NAME"
|
||||
local EXISTING
|
||||
EXISTING=$(ls "$SOURCE_PATCH_DIR"/*.patch 2>/dev/null | wc -l)
|
||||
|
||||
if [ "$EXISTING" -gt 0 ]; then
|
||||
echo "Patches [$NAME]: $EXISTING patches already present — skipping fetch."
|
||||
else
|
||||
echo "Patches [$NAME]: generating from $REPO"
|
||||
echo " base (upstream): ${BASE_SHA:0:12}"
|
||||
echo " head (fork): ${FORK_SHA:0:12}"
|
||||
|
||||
local TMPDIR_CLONE
|
||||
TMPDIR_CLONE=$(mktemp -d)
|
||||
|
||||
if git clone "$REPO" "$TMPDIR_CLONE/fork" 2>&1; then
|
||||
cd "$TMPDIR_CLONE/fork"
|
||||
|
||||
# Fetch the upstream base commit (may not be in the fork's history)
|
||||
git fetch origin "$FORK_SHA" 2>&1 || true
|
||||
git checkout "$FORK_SHA" 2>&1
|
||||
|
||||
# We need the base commit in the history to compute the diff.
|
||||
# If the fork is a real GitHub fork, it shares history with upstream.
|
||||
# Otherwise, fetch it explicitly.
|
||||
if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
|
||||
echo " Base commit not in fork history — fetching from upstream"
|
||||
local UPSTREAM_URL
|
||||
# Derive upstream URL from base_var context or use llama.cpp default
|
||||
UPSTREAM_URL=$(yq ".sources[$i].upstream_repo // \"\"" "$PATCHES_DIR/sources.yaml")
|
||||
if [ -n "$UPSTREAM_URL" ] && [ "$UPSTREAM_URL" != "null" ]; then
|
||||
git remote add upstream "$UPSTREAM_URL" 2>/dev/null || true
|
||||
git fetch upstream 2>&1
|
||||
fi
|
||||
fi
|
||||
|
||||
local PATCH_COUNT
|
||||
PATCH_COUNT=$(git rev-list --count "$BASE_SHA".."$FORK_SHA" 2>/dev/null || echo "0")
|
||||
echo " $PATCH_COUNT commits in diff"
|
||||
|
||||
if [ "$PATCH_COUNT" -gt 0 ]; then
|
||||
mkdir -p "$SOURCE_PATCH_DIR"
|
||||
git format-patch "$BASE_SHA".."$FORK_SHA" -o "$SOURCE_PATCH_DIR/" >/dev/null 2>&1
|
||||
echo " Generated $PATCH_COUNT patches in patches/$NAME/"
|
||||
fi
|
||||
cd "$SOURCE_DIR"
|
||||
else
|
||||
echo "WARNING: Failed to clone $REPO — skipping source '$NAME'"
|
||||
fi
|
||||
|
||||
rm -rf "$TMPDIR_CLONE"
|
||||
fi
|
||||
done
|
||||
elif [ -f "$PATCHES_DIR/sources.yaml" ]; then
|
||||
echo "WARNING: yq not found — skipping source-based patch generation."
|
||||
fi
|
||||
|
||||
# Phase 2: Apply patches (subdirectories first, then top-level)
|
||||
for source_dir in $(find "$PATCHES_DIR" -mindepth 1 -maxdepth 1 -type d | sort); do
|
||||
for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do
|
||||
apply_one_patch "$TARGET_DIR" "$p" "$(basename "$source_dir")/$(basename "$p")"
|
||||
done
|
||||
done
|
||||
for p in $(ls "$PATCHES_DIR"/*.patch 2>/dev/null | sort); do
|
||||
apply_one_patch "$TARGET_DIR" "$p" "$(basename "$p")"
|
||||
done
|
||||
}
|
||||
|
||||
# Run with arguments
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: $0 <source-dir> <target-dir>"
|
||||
exit 1
|
||||
fi
|
||||
apply_patches "$1" "$2"
|
||||
Reference in New Issue
Block a user