chore: ⬆️ Update ikawrakow/ik_llama.cpp to bbc7de475178dd0535c16ad85f204a2529806c9d (#10669 )

⬆️ Update ikawrakow/ik_llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
fix(gallery): block SSRF in gallery config URL fetch (#10665 ) (#10673 )
2026-07-03 21:07:33 -04:00 · 2026-07-03 23:35:41 +02:00 · 2026-07-03 21:32:42 +00:00 · 2026-07-03 22:46:44 +02:00 · 2026-07-03 20:36:15 +00:00
13 changed files with 144 additions and 310 deletions
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@

-IK_LLAMA_VERSION?=87fc8701ff4da81a7d2a91ec0695f95eb3066a47
+IK_LLAMA_VERSION?=bbc7de475178dd0535c16ad85f204a2529806c9d
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@@ -36,12 +36,6 @@ else
 	if [ -d "$CURDIR/lib/rocblas/library" ]; then
 		export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
 	fi
-	# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
-	# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
-	# the bundled data or it falls back to slow generic kernels (issue #10660).
-	if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
-		export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
-	fi
 fi

 # If there is a lib/ld.so, use it
--- a/backend/cpp/turboquant/run.sh
+++ b/backend/cpp/turboquant/run.sh
@@ -34,12 +34,6 @@ else
 	if [ -d "$CURDIR/lib/rocblas/library" ]; then
 		export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
 	fi
-	# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
-	# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
-	# the bundled data or it falls back to slow generic kernels (issue #10660).
-	if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
-		export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
-	fi
 fi

 # If there is a lib/ld.so, use it
--- a/core/config/gguf.go
+++ b/core/config/gguf.go
@@ -67,16 +67,6 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
 		ApplyMTPDefaults(cfg, n)
 	}

-	// Sliding-window-attention models (Gemma 2/3, Cohere2, Llama 4, ...) ship
-	// with a reduced SWA KV cache by default, which cannot reuse a prompt
-	// prefix across requests and so defeats the cross-request prefix cache
-	// (cache_reuse) we enable in serving_defaults.go. Enable the full SWA cache
-	// for these models so the prefix survives; skipped for dense models and
-	// when the user already pinned an SWA cache option.
-	if w, ok := HasSlidingWindowAttention(f); ok {
-		ApplySWAFullDefault(cfg, w)
-	}
-
 	// Thinking support detection is done after model load via DetectThinkingSupportFromBackend

 	// template estimations
--- a/core/config/swa.go
+++ b/core/config/swa.go
@@ -1,56 +0,0 @@
-package config
-
-import (
-	gguf "github.com/gpustack/gguf-parser-go"
-	"github.com/mudler/xlog"
-)
-
-// swaCacheOptionNames lists the backend option keys that control the
-// sliding-window-attention KV cache. If the user pinned any of these we leave
-// the SWA cache alone instead of forcing swa_full.
-var swaCacheOptionNames = []string{"swa_full", "n_swa"}
-
-// HasSlidingWindowAttention reports whether the parsed GGUF describes a
-// sliding-window-attention (SWA) model — Gemma 2/3, Cohere2, Llama 4 and the
-// like. The gguf-parser library normalizes the per-architecture
-// `<arch>.attention.sliding_window` metadata key into
-// GGUFArchitecture.AttentionSlidingWindow, applying the same family-specific
-// rules llama.cpp uses (e.g. Phi-3 carries the key but does not actually run
-// SWA, and is normalized to 0). A non-zero window means the model interleaves
-// SWA layers, so the returned size is also the diagnostic value we log.
-func HasSlidingWindowAttention(f *gguf.GGUFFile) (uint64, bool) {
-	if f == nil {
-		return 0, false
-	}
-	w := f.Architecture().AttentionSlidingWindow
-	return w, w > 0
-}
-
-// ApplySWAFullDefault enables the full-size SWA KV cache (swa_full:true) for a
-// sliding-window model, unless the user already pinned an SWA cache option.
-//
-// Why: llama.cpp defaults to a reduced SWA KV cache sized to the sliding window
-// (memory-light), but that reduced cache cannot preserve a prompt prefix across
-// requests. So for SWA models the cross-request prefix cache we enable in
-// serving_defaults.go (cache_reuse) is silently defeated — every turn
-// reprocesses the entire prompt. Setting swa_full:true makes llama.cpp keep the
-// full KV cache so the shared prefix is actually reused.
-//
-// The tradeoff is memory: the full SWA cache scales with context_size, so this
-// is gated to models that are genuinely SWA (never applied to dense models,
-// where it would only waste memory) and never overrides an explicit user
-// choice. `slidingWindow` is the value read from the GGUF and is used only for
-// the diagnostic log line.
-func ApplySWAFullDefault(cfg *ModelConfig, slidingWindow uint64) {
-	if cfg == nil || slidingWindow == 0 {
-		return
-	}
-	if backendOptionSet(cfg.Options, swaCacheOptionNames...) {
-		xlog.Debug("[swa] sliding-window model but an SWA cache option is already set; leaving user choice intact",
-			"name", cfg.Name, "sliding_window", slidingWindow)
-		return
-	}
-	cfg.Options = append(cfg.Options, "swa_full:true")
-	xlog.Debug("[swa] enabling swa_full for sliding-window model so the cross-request prompt-prefix cache survives (reduced SWA cache cannot reuse a prefix across requests)",
-		"name", cfg.Name, "sliding_window", slidingWindow)
-}
--- a/core/config/swa_test.go
+++ b/core/config/swa_test.go
@@ -1,120 +0,0 @@
-package config_test
-
-import (
-	. "github.com/mudler/LocalAI/core/config"
-
-	gguf "github.com/gpustack/gguf-parser-go"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-// ggufWithSlidingWindow fabricates a minimal in-memory GGUF carrying the given
-// `general.architecture` and `<arch>.attention.sliding_window` so the SWA
-// detection can be exercised without a real model file. A window of 0 omits the
-// key, modelling a dense (non-SWA) model.
-func ggufWithSlidingWindow(arch string, window uint32) *gguf.GGUFFile {
-	kvs := gguf.GGUFMetadataKVs{
-		{
-			Key:       "general.architecture",
-			ValueType: gguf.GGUFMetadataValueTypeString,
-			Value:     arch,
-		},
-	}
-	if window > 0 {
-		kvs = append(kvs, gguf.GGUFMetadataKV{
-			Key:       arch + ".attention.sliding_window",
-			ValueType: gguf.GGUFMetadataValueTypeUint32,
-			Value:     window,
-		})
-	}
-	return &gguf.GGUFFile{
-		Header: gguf.GGUFHeader{MetadataKV: kvs},
-	}
-}
-
-var _ = Describe("SWA full-cache auto-default", func() {
-	Context("HasSlidingWindowAttention", func() {
-		It("returns false on a nil GGUF file", func() {
-			w, ok := HasSlidingWindowAttention(nil)
-			Expect(ok).To(BeFalse())
-			Expect(w).To(BeZero())
-		})
-
-		It("detects a sliding-window model (Gemma 3 style)", func() {
-			w, ok := HasSlidingWindowAttention(ggufWithSlidingWindow("gemma3", 1024))
-			Expect(ok).To(BeTrue())
-			Expect(w).To(Equal(uint64(1024)))
-		})
-
-		It("detects Gemma 2 even without an explicit key (family default window)", func() {
-			// gguf-parser applies llama.cpp's family rules: gemma2 defaults the
-			// sliding window to 4096 when the metadata key is absent.
-			w, ok := HasSlidingWindowAttention(ggufWithSlidingWindow("gemma2", 0))
-			Expect(ok).To(BeTrue())
-			Expect(w).To(Equal(uint64(4096)))
-		})
-
-		It("reports a dense model as non-SWA", func() {
-			w, ok := HasSlidingWindowAttention(ggufWithSlidingWindow("llama", 0))
-			Expect(ok).To(BeFalse())
-			Expect(w).To(BeZero())
-		})
-
-		It("treats Phi-3 as non-SWA even when the key is present", func() {
-			// Phi-3 carries attention.sliding_window but does not actually run
-			// SWA; gguf-parser normalizes it to 0 to match llama.cpp.
-			w, ok := HasSlidingWindowAttention(ggufWithSlidingWindow("phi3", 2048))
-			Expect(ok).To(BeFalse())
-			Expect(w).To(BeZero())
-		})
-	})
-
-	Context("ApplySWAFullDefault", func() {
-		It("enables swa_full for a sliding-window model when unset", func() {
-			cfg := &ModelConfig{Name: "gemma3"}
-			ApplySWAFullDefault(cfg, 1024)
-			Expect(cfg.Options).To(ContainElement("swa_full:true"))
-		})
-
-		It("is a no-op for a dense model (window 0)", func() {
-			cfg := &ModelConfig{Name: "llama"}
-			ApplySWAFullDefault(cfg, 0)
-			Expect(cfg.Options).To(BeEmpty())
-		})
-
-		It("preserves an explicit swa_full:false", func() {
-			cfg := &ModelConfig{Name: "gemma3", Options: []string{"swa_full:false"}}
-			ApplySWAFullDefault(cfg, 1024)
-			Expect(cfg.Options).To(Equal([]string{"swa_full:false"}))
-		})
-
-		It("preserves an explicit swa_full:true without duplicating it", func() {
-			cfg := &ModelConfig{Name: "gemma3", Options: []string{"swa_full:true"}}
-			ApplySWAFullDefault(cfg, 1024)
-			Expect(cfg.Options).To(Equal([]string{"swa_full:true"}))
-		})
-
-		It("respects the n_swa alias", func() {
-			cfg := &ModelConfig{Name: "gemma3", Options: []string{"n_swa:512"}}
-			ApplySWAFullDefault(cfg, 1024)
-			Expect(cfg.Options).To(Equal([]string{"n_swa:512"}))
-		})
-
-		It("preserves unrelated options already on the config", func() {
-			cfg := &ModelConfig{
-				Name:    "gemma3",
-				Options: []string{"use_jinja:true", "cache_reuse:256"},
-			}
-			ApplySWAFullDefault(cfg, 1024)
-			Expect(cfg.Options).To(Equal([]string{
-				"use_jinja:true",
-				"cache_reuse:256",
-				"swa_full:true",
-			}))
-		})
-
-		It("tolerates a nil config", func() {
-			Expect(func() { ApplySWAFullDefault(nil, 1024) }).ToNot(Panic())
-		})
-	})
-})
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -15,14 +15,35 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/system"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/mudler/LocalAI/pkg/xsync"
 	"github.com/mudler/xlog"

 	"gopkg.in/yaml.v3"
 )

+// validateGalleryConfigURL guards the gallery config fetch against SSRF. A
+// gallery config URL can be attacker-controlled (e.g. POST /models/apply with
+// an empty id fetches it directly), so a plain http(s) URL must not be allowed
+// to reach private, loopback, link-local or cloud-metadata addresses. Other
+// schemes (huggingface://, github:, oci://, ollama://, file://) resolve to
+// fixed public services or local files and are not a network-SSRF vector, so
+// they are left untouched.
+// See https://github.com/mudler/LocalAI/issues/10665
+func validateGalleryConfigURL(rawURL string) error {
+	lower := strings.ToLower(strings.TrimSpace(rawURL))
+	if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") {
+		return utils.ValidateExternalURL(rawURL)
+	}
+	return nil
+}
+
 func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {
 	var config T
+	if err := validateGalleryConfigURL(url); err != nil {
+		xlog.Error("refusing to fetch gallery config", "error", err, "url", url)
+		return config, err
+	}
 	uri := downloader.URI(url)
 	err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
@@ -36,6 +57,10 @@ func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {

 func GetGalleryConfigFromURLWithContext[T any](ctx context.Context, url string, basePath string) (T, error) {
 	var config T
+	if err := validateGalleryConfigURL(url); err != nil {
+		xlog.Error("refusing to fetch gallery config", "error", err, "url", url)
+		return config, err
+	}
 	uri := downloader.URI(url)
 	err := uri.ReadWithAuthorizationAndCallback(ctx, basePath, "", func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -1,6 +1,10 @@
 package gallery_test

 import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+
 	. "github.com/mudler/LocalAI/core/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -19,4 +23,49 @@ var _ = Describe("Gallery API tests", func() {
 			Expect(e.Name).To(Equal("gpt4all-j"))
 		})
 	})
+
+	// SSRF guard: a user-supplied gallery config URL (e.g. POST /models/apply
+	// with an empty id) must not be able to reach internal network addresses.
+	// See https://github.com/mudler/LocalAI/issues/10665
+	Context("SSRF protection on config URLs", func() {
+		var server *httptest.Server
+
+		BeforeEach(func() {
+			// A reachable internal server that would happily serve a valid
+			// gallery config. Without the SSRF guard the fetch succeeds; the
+			// guard must block it before the request ever leaves the process.
+			server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				w.WriteHeader(http.StatusOK)
+				_, _ = w.Write([]byte("name: internal-ssrf\nfiles: []\n"))
+			}))
+		})
+
+		AfterEach(func() {
+			server.Close()
+		})
+
+		It("blocks fetching a config from a loopback address", func() {
+			_, err := GetGalleryConfigFromURL[ModelConfig](server.URL, "")
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("not allowed"))
+		})
+
+		It("blocks fetching a config from a loopback address (context variant)", func() {
+			_, err := GetGalleryConfigFromURLWithContext[ModelConfig](context.Background(), server.URL, "")
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("not allowed"))
+		})
+
+		It("blocks well-known internal hostnames and metadata endpoints", func() {
+			for _, u := range []string{
+				"http://localhost/secret",
+				"http://10.0.0.1/config.yaml",
+				"http://192.168.1.1/config.yaml",
+				"http://169.254.169.254/latest/meta-data/",
+			} {
+				_, err := GetGalleryConfigFromURL[ModelConfig](u, "")
+				Expect(err).To(HaveOccurred(), "expected %s to be rejected", u)
+			}
+		})
+	})
 })
--- a/core/services/routing/pii/metrics.go
+++ b/core/services/routing/pii/metrics.go
@@ -0,0 +1,48 @@
+package pii
+
+import (
+	"context"
+	"sync"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/metric"
+)
+
+// Prometheus counter for PII events. The EventStore ring buffer is
+// capacity-bound and meant for recent-audit browsing; operators also want
+// a monotonic, scrape-friendly signal ("how many detections/blocks per
+// hour, did the filter stop firing after a deploy"). Record() is the
+// single choke point every producer already goes through (request
+// middleware, response scrubbing, MITM proxy connects/intercepts), so one
+// counter here covers all paths without touching the producers.
+//
+// Initialised lazily on first Record so the package works no matter when
+// (or whether) the Prometheus-backed global MeterProvider is installed —
+// same pattern as core/services/routing/billing.
+var (
+	metricsOnce   sync.Once
+	eventsCounter metric.Int64Counter
+)
+
+func recordEventMetric(e PIIEvent) {
+	metricsOnce.Do(func() {
+		meter := otel.Meter("github.com/mudler/LocalAI")
+		c, err := meter.Int64Counter(
+			"localai_pii_events_total",
+			metric.WithDescription("PII/audit events recorded, labeled by kind, origin, action and direction"),
+		)
+		if err == nil {
+			eventsCounter = c
+		}
+	})
+	if eventsCounter == nil {
+		return
+	}
+	eventsCounter.Add(context.Background(), 1, metric.WithAttributes(
+		attribute.String("kind", string(e.Kind)),
+		attribute.String("origin", string(e.Origin)),
+		attribute.String("action", string(e.Action)),
+		attribute.String("direction", string(e.Direction)),
+	))
+}
--- a/core/services/routing/pii/store.go
+++ b/core/services/routing/pii/store.go
@@ -58,6 +58,7 @@ type memoryEventStore struct {
 }

 func (s *memoryEventStore) Record(_ context.Context, e PIIEvent) error {
+	recordEventMetric(e)
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.ring[s.cursor] = e
--- a/docs/content/features/text-generation.md
+++ b/docs/content/features/text-generation.md
@@ -507,7 +507,7 @@ The `llama.cpp` backend supports additional configuration options that can be sp
 | `fit_params_min_ctx` or `fit_ctx` | integer | Minimum context size that can be set by fit_params. Default: `4096`. | `fit_ctx:2048` |
 | `n_cache_reuse` or `cache_reuse` | integer | Minimum chunk size to attempt reusing from the cache via KV shifting. Default: `0` (disabled). | `cache_reuse:256` |
 | `slot_prompt_similarity` or `sps` | float | How much the prompt of a request must match the prompt of a slot to use that slot. Default: `0.1`. Set to `0` to disable. | `sps:0.5` |
-| `swa_full` | boolean | Use full-size SWA (Sliding Window Attention) cache. Upstream default is `false` (a memory-light reduced cache), but that reduced cache cannot reuse a prompt prefix across requests, which defeats `cache_reuse` for SWA models (Gemma 2/3, Cohere2, Llama 4, ...). LocalAI therefore **auto-enables `swa_full:true` for GGUF models detected as SWA** so the cross-request prefix cache works; it is left off for dense models. The tradeoff is memory: the full SWA cache scales with `context_size`. Set `swa_full:false` explicitly to opt back out (e.g. to save memory at a large context). | `swa_full:true` |
+| `swa_full` | boolean | Use full-size SWA (Sliding Window Attention) cache. Default: `false`. | `swa_full:true` |
 | `cont_batching` or `continuous_batching` | boolean | Enable continuous batching for handling multiple sequences. Default: `true`. | `cont_batching:true` |
 | `check_tensors` | boolean | Validate tensor data for invalid values during model loading. Default: `false`. | `check_tensors:true` |
 | `warmup` | boolean | Enable warmup run after model loading. Default: `true`. | `warmup:false` |
--- a/scripts/build/package-gpu-libs-rocm-data_test.sh
+++ b/scripts/build/package-gpu-libs-rocm-data_test.sh
@@ -1,57 +0,0 @@
-#!/bin/bash
-# Regression test for scripts/build/package-gpu-libs.sh ROCm data bundling.
-#
-# Guards issue #10660: hipBLASLt (rocblaslt) resolves its TensileLibrary_lazy_gfx*.dat
-# kernel data relative to the bundled libhipblaslt.so. The packager copied the
-# rocblas/ data dir but not the hipblaslt/ data dir, so the bundled backend
-# fell back to slow generic kernels and logged
-#   rocblaslt error: Cannot read "TensileLibrary_lazy_gfx1201.dat": No such file or directory
-#
-# This test fabricates a fake ROCm tree containing both rocblas/ and hipblaslt/
-# tensile data, points the packager at it via ROCM_BASE_DIRS, and asserts BOTH
-# data directories are bundled into the target lib dir.
-set -euo pipefail
-
-CURDIR=$(dirname "$(realpath "$0")")
-SCRIPT="$CURDIR/package-gpu-libs.sh"
-
-WORK=$(mktemp -d)
-trap 'rm -rf "$WORK"' EXIT
-
-# Fabricate a fake ROCm install with both rocblas and hipblaslt tensile data.
-FAKE_ROCM="$WORK/opt/rocm"
-mkdir -p "$FAKE_ROCM/lib/rocblas/library"
-mkdir -p "$FAKE_ROCM/lib/hipblaslt/library"
-echo "fake rocblas tensile" > "$FAKE_ROCM/lib/rocblas/library/TensileLibrary_lazy_gfx1201.dat"
-echo "fake hipblaslt tensile" > "$FAKE_ROCM/lib/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat"
-
-TARGET="$WORK/target"
-mkdir -p "$TARGET"
-
-# shellcheck source=/dev/null
-source "$SCRIPT" "$TARGET"
-
-# Point the data-dir copy at the fabricated tree instead of the real /opt/rocm,
-# then run the actual ROCm packager. This asserts package_rocm_libs itself
-# bundles BOTH data dirs, not just that the helper works in isolation.
-export BUILD_TYPE=hipblas
-export ROCM_BASE_DIRS="$FAKE_ROCM"
-package_rocm_libs
-
-fail=false
-if [ ! -e "$TARGET/rocblas/library/TensileLibrary_lazy_gfx1201.dat" ]; then
-    echo "FAIL: rocblas tensile data was NOT bundled"
-    fail=true
-fi
-if [ ! -e "$TARGET/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat" ]; then
-    echo "FAIL: hipblaslt tensile data was NOT bundled (regression of #10660)"
-    fail=true
-fi
-
-if [ "$fail" = true ]; then
-    ls -R "$TARGET" || true
-    exit 1
-fi
-
-echo "PASS: rocblas and hipblaslt tensile data were both bundled"
-exit 0
--- a/scripts/build/package-gpu-libs.sh
+++ b/scripts/build/package-gpu-libs.sh
@@ -224,50 +224,6 @@ package_cuda_libs() {
    echo "CUDA libraries packaged successfully"
 }

-# Copy a ROCm library data subdirectory (e.g. rocblas, hipblaslt) into the
-# bundled lib/ dir. These directories hold the TensileLibrary_*.dat GPU kernel
-# tuning files, which rocBLAS/hipBLASLt load at runtime *relative to their own
-# .so*. Since backends ship their own copies of libhipblaslt.so/librocblas.so
-# under lib/, the matching data dir must travel with them or the libs fall back
-# to slow generic kernels (rocblaslt error: Cannot read TensileLibrary_lazy_gfx*.dat;
-# see issue #10660).
-#
-# The ROCm search roots default to /opt/rocm{,-*} but can be overridden via the
-# ROCM_BASE_DIRS env var (space-separated), which keeps the copy unit-testable
-# without a real ROCm install.
-# Args: $1 = data subdir name found under <rocm-root>/lib{,64}/
-copy_rocm_data_dir() {
-    local data_name="$1"
-    # Single-line `local x=$(...)` on purpose: `local` masks the command
-    # substitution's exit status, which is 1 when nullglob is unset and would
-    # otherwise trip the script's `set -e`.
-    local old_nullglob=$(shopt -p nullglob)
-    shopt -s nullglob
-    local rocm_dirs
-    if [ -n "${ROCM_BASE_DIRS:-}" ]; then
-        # shellcheck disable=SC2206  # intentional word-split of the override
-        rocm_dirs=(${ROCM_BASE_DIRS})
-    else
-        rocm_dirs=(/opt/rocm /opt/rocm-*)
-    fi
-    eval "$old_nullglob"
-    local found=false
-    local rocm_base lib_subdir
-    for rocm_base in "${rocm_dirs[@]}"; do
-        for lib_subdir in lib lib64; do
-            if [ -d "$rocm_base/$lib_subdir/$data_name" ]; then
-                echo "Found $data_name data at $rocm_base/$lib_subdir/$data_name"
-                mkdir -p "$TARGET_LIB_DIR/$data_name"
-                cp -arfL "$rocm_base/$lib_subdir/$data_name/"* "$TARGET_LIB_DIR/$data_name/" || echo "WARNING: Failed to copy $data_name data from $rocm_base/$lib_subdir/$data_name"
-                found=true
-            fi
-        done
-    done
-    if [ "$found" = false ]; then
-        echo "WARNING: No $data_name library data found in ${ROCM_BASE_DIRS:-/opt/rocm*}/lib{,64}/$data_name"
-    fi
-}
-
 # Package AMD ROCm/HIPBlas libraries
 package_rocm_libs() {
    echo "Packaging ROCm/HIPBlas libraries for BUILD_TYPE=${BUILD_TYPE}..."
@@ -311,16 +267,27 @@ package_rocm_libs() {
        fi
    done

-    # Copy rocBLAS and hipBLASLt kernel data (TensileLibrary_*.dat tuning files)
-    # so the bundled libs find their per-arch kernels at runtime instead of
-    # falling back to slow generic code (see copy_rocm_data_dir / issue #10660).
-    copy_rocm_data_dir rocblas
-    copy_rocm_data_dir hipblaslt
+    # Copy rocblas library data (tuning files, TensileLibrary, etc.)
+    local old_nullglob=$(shopt -p nullglob)
+    shopt -s nullglob
+    local rocm_dirs=(/opt/rocm /opt/rocm-*)
+    eval "$old_nullglob"
+    local rocblas_found=false
+    for rocm_base in "${rocm_dirs[@]}"; do
+        for lib_subdir in lib lib64; do
+            if [ -d "$rocm_base/$lib_subdir/rocblas" ]; then
+                echo "Found rocblas data at $rocm_base/$lib_subdir/rocblas"
+                mkdir -p "$TARGET_LIB_DIR/rocblas"
+                cp -arfL "$rocm_base/$lib_subdir/rocblas/"* "$TARGET_LIB_DIR/rocblas/" || echo "WARNING: Failed to copy rocblas data from $rocm_base/$lib_subdir/rocblas"
+                rocblas_found=true
+            fi
+        done
+    done
+    if [ "$rocblas_found" = false ]; then
+        echo "WARNING: No rocblas library data found in /opt/rocm*/lib{,64}/rocblas"
+    fi

    # Copy libomp from LLVM (required for ROCm)
-    # Single-line `local x=$(...)` on purpose: masks shopt -p's nonzero exit
-    # (nullglob unset) so it doesn't trip `set -e`.
-    local old_nullglob=$(shopt -p nullglob)
    shopt -s nullglob
    local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*)
    eval "$old_nullglob"
@@ -510,7 +477,6 @@ export -f copy_libs_glob
 export -f is_core_lib
 export -f copy_elf_deps
 export -f sweep_transitive_deps
-export -f copy_rocm_data_dir
 export -f package_cuda_libs
 export -f package_rocm_libs
 export -f package_intel_libs
Author	SHA1	Message	Date
LocalAI [bot]	13310905a3	chore: ⬆️ Update ikawrakow/ik_llama.cpp to `bbc7de475178dd0535c16ad85f204a2529806c9d` (#10669 ) ⬆️ Update ikawrakow/ik_llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-07-03 23:35:41 +02:00
LocalAI [bot]	2cbb3c96b3	fix(gallery): block SSRF in gallery config URL fetch (#10665 ) (#10673 ) POST /models/apply with an empty "id" fetches the attacker-supplied "url" gallery config directly via http.Client, with no check that the URL resolves to a public IP. In the default Docker deployment no API key is configured, so any network-reachable client can coerce LocalAI into issuing requests to internal services or cloud-metadata endpoints (and exfiltrate a small slice of the response through the job error message). Guard the config fetch chokepoints (GetGalleryConfigFromURL and GetGalleryConfigFromURLWithContext, which back both the /models/apply worker and gallery installs) with utils.ValidateExternalURL, matching the protection already applied to the CORS proxy and image/video/audio download paths. Only plain http(s) URLs are validated; non-network schemes (huggingface://, github:, oci://, ollama://, file://) resolve to fixed public services or local files and are left untouched. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-07-03 21:32:42 +00:00
Ettore Di Giacinto	1152acc167	Revert "feat(config): default swa_full:true for sliding-window-attention models" (#10674 ) Revert "feat(config): default swa_full:true for sliding-window-attention mode…" This reverts commit `02b007a31e`.	2026-07-03 22:46:44 +02:00
walcz-de	cc8ee62db0	feat(pii): export PII/audit events as a Prometheus counter (#10641 ) The PII EventStore ring buffer is capacity-bound and meant for recent-audit browsing via /api/pii/events; operators also want a monotonic, scrape-friendly signal on /metrics — how many detections/masks/blocks per hour, per origin, and whether the filter stopped firing after a deploy (silent-failure class). EventStore.Record is the single choke point every producer already goes through (request middleware, response scrubbing, MITM proxy connects/intercepts), so one lazily-initialised counter there covers all paths without touching any producer: localai_pii_events_total{kind, origin, action, direction} Same lazy otel.Meter pattern as core/services/routing/billing, so the counter lands on the Prometheus-backed global MeterProvider installed by the monitoring service. No behaviour change; label cardinality is bounded (enum-like fields only, no pattern IDs or user IDs). Assisted-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: stefanwalcz <stefan.walcz@walcz.de>	2026-07-03 20:36:15 +00:00