mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 09:09:07 -04:00
Compare commits
42 Commits
feat/recon
...
feat/llama
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c23fc5fb42 | ||
|
|
292c1cab94 | ||
|
|
4e9bb4f879 | ||
|
|
3b47122e54 | ||
|
|
379fa3e525 | ||
|
|
e47c58656f | ||
|
|
482314c623 | ||
|
|
e8ae88a2a0 | ||
|
|
e1994579f8 | ||
|
|
e5620989dd | ||
|
|
fc618dcee6 | ||
|
|
e6042080c0 | ||
|
|
0f3b24436d | ||
|
|
4b6f911835 | ||
|
|
a5e28942a6 | ||
|
|
dba9cd7ca4 | ||
|
|
c93190de50 | ||
|
|
4dbf69f889 | ||
|
|
deb430f3ec | ||
|
|
dd8c8778e2 | ||
|
|
06a7b6cadb | ||
|
|
67c8889866 | ||
|
|
1d49041c85 | ||
|
|
2edc4e25b3 | ||
|
|
7888067914 | ||
|
|
9eedbf537a | ||
|
|
69c16481c8 | ||
|
|
56f8a6623f | ||
|
|
4755d676a3 | ||
|
|
10184b5e28 | ||
|
|
fdf475ec5f | ||
|
|
9d54a599b0 | ||
|
|
63bcbf6c12 | ||
|
|
95b058e1c5 | ||
|
|
f2abcc7503 | ||
|
|
62c99c10b3 | ||
|
|
7226bb9f30 | ||
|
|
569d9bbd9e | ||
|
|
682fb2718c | ||
|
|
20c643e1f6 | ||
|
|
64a4351f3a | ||
|
|
b7d67f5779 |
@@ -17,19 +17,29 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
||||
fi
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image (BUILD_TYPE empty): one build with ggml CPU_ALL_VARIANTS replaces the
|
||||
# per-microarch binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml
|
||||
# dlopens the best libggml-cpu-*.so at runtime by probing host CPU features.
|
||||
#
|
||||
# arm64: the CPU_ALL_VARIANTS table includes armv9.2 SME variants whose -march=...+sme is
|
||||
# rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so build the arm64
|
||||
# variants with it (the host never *selects* SME unless it has it, but every variant must
|
||||
# still compile).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make llama-cpp-cpu-all
|
||||
else
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-avx
|
||||
make llama-cpp-avx2
|
||||
make llama-cpp-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): the accelerator does the compute, so a
|
||||
# single fallback CPU build is enough - no per-microarch CPU variants needed. (This also
|
||||
# keeps the heavy GPU backend compile from also building the whole CPU variant matrix,
|
||||
# and avoids the gcc-14 apt step on GPU base images such as nvidia l4t.)
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
fi
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -19,17 +19,21 @@ fi
|
||||
|
||||
cd /LocalAI/backend/cpp/turboquant
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries.
|
||||
# arm64: the armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make turboquant-cpu-all
|
||||
else
|
||||
make turboquant-avx
|
||||
make turboquant-avx2
|
||||
make turboquant-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): single fallback CPU build, the accelerator
|
||||
# does the compute. Keeps the GPU compile from also building the CPU variant matrix and
|
||||
# avoids the gcc-14 apt step on GPU base images such as nvidia l4t.
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
fi
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
2
.github/workflows/backend.yml
vendored
2
.github/workflows/backend.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
2
.github/workflows/backend_build.yml
vendored
2
.github/workflows/backend_build.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
|
||||
2
.github/workflows/backend_build_darwin.yml
vendored
2
.github/workflows/backend_build_darwin.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
HOMEBREW_NO_ANALYTICS: '1'
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
|
||||
2
.github/workflows/backend_merge.yml
vendored
2
.github/workflows/backend_merge.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
||||
# keepalive cleanup script) but none of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
|
||||
2
.github/workflows/backend_pr.yml
vendored
2
.github/workflows/backend_pr.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
2
.github/workflows/base-images.yml
vendored
2
.github/workflows/base-images.yml
vendored
@@ -127,7 +127,7 @@ jobs:
|
||||
# the original l4t matrix entry which set skip-drivers: 'true'.
|
||||
skip-drivers: 'true'
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: false
|
||||
- name: Free disk space
|
||||
|
||||
6
.github/workflows/build-test.yaml
vendored
6
.github/workflows/build-test.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
bump:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
|
||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -92,7 +92,7 @@ jobs:
|
||||
file: "backend/go/vibevoice-cpp/Makefile"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
- name: Bump dependencies 🔧
|
||||
id: bump
|
||||
run: |
|
||||
@@ -128,7 +128,7 @@ jobs:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
- name: Bump vLLM cu130 wheel pin 🔧
|
||||
id: bump
|
||||
run: |
|
||||
|
||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
- repository: "mudler/LocalAI"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||
|
||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- name: Install dependencies
|
||||
|
||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
|
||||
2
.github/workflows/gallery-agent.yaml
vendored
2
.github/workflows/gallery-agent.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
|
||||
- name: Cache Intel images
|
||||
uses: docker/build-push-action@v7
|
||||
|
||||
2
.github/workflows/gh-pages.yml
vendored
2
.github/workflows/gh-pages.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
HUGO_VERSION: "0.146.3"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0 # needed for enableGitInfo
|
||||
submodules: true
|
||||
|
||||
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@@ -80,7 +80,7 @@ jobs:
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
|
||||
- name: Configure apt mirror on runner
|
||||
id: apt_mirror
|
||||
|
||||
2
.github/workflows/image_merge.yml
vendored
2
.github/workflows/image_merge.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
||||
# script). Skips the rest of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
|
||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
golangci-lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
with:
|
||||
# Full history so golangci-lint's new-from-merge-base can reach
|
||||
# origin/master and compute the diff against it.
|
||||
|
||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
GO111MODULE: on
|
||||
steps:
|
||||
- name: Checkout Source
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
|
||||
86
.github/workflows/test-extra.yml
vendored
86
.github/workflows/test-extra.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
- name: Install dependencies
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -158,7 +158,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -240,7 +240,7 @@ jobs:
|
||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||
# df -h
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -265,7 +265,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -288,7 +288,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -309,7 +309,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -330,7 +330,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -351,7 +351,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -373,7 +373,7 @@ jobs:
|
||||
# timeout-minutes: 45
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -394,7 +394,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -415,7 +415,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -436,7 +436,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -462,7 +462,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -484,7 +484,7 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -513,7 +513,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -530,7 +530,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -552,7 +552,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -579,7 +579,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -604,7 +604,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -625,7 +625,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -645,7 +645,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -664,7 +664,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -681,7 +681,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -698,7 +698,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -741,7 +741,7 @@ jobs:
|
||||
# timeout-minutes: 90
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -783,7 +783,7 @@ jobs:
|
||||
# timeout-minutes: 90
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# uses: actions/checkout@v7
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -808,7 +808,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -840,7 +840,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -876,7 +876,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -915,7 +915,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -952,7 +952,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -987,7 +987,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -1013,7 +1013,7 @@ jobs:
|
||||
timeout-minutes: 150
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1042,7 +1042,7 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -1058,7 +1058,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1091,7 +1091,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1114,7 +1114,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1140,7 +1140,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Free disk space
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
|
||||
2
.github/workflows/tests-aio.yml
vendored
2
.github/workflows/tests-aio.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
|
||||
2
.github/workflows/tests-e2e.yml
vendored
2
.github/workflows/tests-e2e.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
go-version: ['1.25.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
---
|
||||
name: 'PII NER tier E2E (live GGUF, CPU)'
|
||||
|
||||
# Runs the real privacy-filter GGUF NER tier end-to-end on CPU — the gap the
|
||||
# hermetic tests/e2e suite cannot cover (it only exercises the in-process
|
||||
# pattern tier). Heavy (builds the C++ backend image + downloads a ~2.7 GB
|
||||
# GGUF), so it is path-filtered on PRs and otherwise runs nightly / on demand.
|
||||
#
|
||||
# This drives the container-level harness (tests/e2e-backends) via
|
||||
# `make test-extra-backend-privacy-filter`: it builds the privacy-filter image,
|
||||
# downloads the model, loads it on CPU, and asserts byte-correct, UTF-8-aligned
|
||||
# TokenClassify spans. The complementary HTTP-path specs in tests/e2e
|
||||
# (e2e_pii_ner_test.go) Skip unless PII_NER_MODEL_GGUF is wired.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 3 * * *'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'backend/cpp/privacy-filter/**'
|
||||
- 'backend/Dockerfile.privacy-filter'
|
||||
- 'core/services/routing/pii/**'
|
||||
- 'core/services/routing/piidetector/**'
|
||||
- 'core/backend/token_classify.go'
|
||||
- 'core/http/endpoints/localai/pii.go'
|
||||
- 'core/schema/pii.go'
|
||||
- 'tests/e2e-backends/**'
|
||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||
- 'tests/e2e/e2e_suite_test.go'
|
||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'backend/cpp/privacy-filter/**'
|
||||
- 'backend/Dockerfile.privacy-filter'
|
||||
- 'core/services/routing/pii/**'
|
||||
- 'core/services/routing/piidetector/**'
|
||||
- 'core/backend/token_classify.go'
|
||||
- 'core/http/endpoints/localai/pii.go'
|
||||
- 'core/schema/pii.go'
|
||||
- 'tests/e2e-backends/**'
|
||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||
- 'tests/e2e/e2e_suite_test.go'
|
||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-pii-ner-e2e-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests-pii-ner-e2e:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.25.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Free disk space
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL || true
|
||||
sudo docker image prune --all --force || true
|
||||
df -h
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
cache: false
|
||||
- name: Proto Dependencies
|
||||
run: |
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
# Builds local-ai-backend:privacy-filter, downloads the GGUF, loads it on
|
||||
# CPU and runs the token_classify capability spec (byte-offset contract).
|
||||
- name: Run live PII NER backend E2E
|
||||
run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.23
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
2
.github/workflows/tests-ui-e2e.yml
vendored
2
.github/workflows/tests-ui-e2e.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v7
|
||||
with:
|
||||
submodules: true
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v7
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- uses: actions/setup-go@v5
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -91,3 +91,6 @@ core/http/react-ui/test-results/
|
||||
|
||||
# Local worktrees
|
||||
.worktrees/
|
||||
|
||||
# SDD / brainstorm scratch (agent-driven development)
|
||||
.superpowers/
|
||||
|
||||
10
Makefile
10
Makefile
@@ -690,6 +690,16 @@ test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
|
||||
BACKEND_TEST_CTX_SIZE=2048 \
|
||||
$(MAKE) test-extra-backend
|
||||
|
||||
## privacy-filter: the PII/NER token-classification backend. Exercises the
|
||||
## TokenClassify RPC and asserts byte-correct, UTF-8-aligned span offsets
|
||||
## against the openai-privacy-filter multilingual GGUF (CPU-runnable, ~50M
|
||||
## active params). This is the live-backend coverage for the PII NER tier.
|
||||
test-extra-backend-privacy-filter: docker-build-privacy-filter
|
||||
BACKEND_IMAGE=local-ai-backend:privacy-filter \
|
||||
BACKEND_TEST_MODEL_URL=https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf \
|
||||
BACKEND_TEST_CAPS=health,load,token_classify \
|
||||
$(MAKE) test-extra-backend
|
||||
|
||||
## vllm is resolved from a HuggingFace model id (no file download) and
|
||||
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
||||
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -50,8 +50,13 @@ add_custom_command(
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
# hw_grpc_proto: force STATIC. Under the CPU_ALL_VARIANTS build BUILD_SHARED_LIBS=ON
|
||||
# (ggml/llama become shared), which would otherwise make this glue library a DSO. As a
|
||||
# DSO it references the hidden-visibility symbols in the static libprotobuf.a, which the
|
||||
# linker cannot satisfy ("hidden symbol ... in libprotobuf.a is referenced by DSO").
|
||||
# Keeping it STATIC links protobuf/gRPC directly into the grpc-server executable while
|
||||
# only ggml/llama stay shared. No effect on the static variants (already BUILD_SHARED_LIBS=OFF).
|
||||
add_library(hw_grpc_proto STATIC
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62
|
||||
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -10,8 +10,16 @@ TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
ARCH?=$(shell uname -m)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
# Shared libs default to OFF: we link static gRPC and the avx/avx2/avx512/fallback
|
||||
# variants are fully static. The CPU_ALL_VARIANTS build flips SHARED_LIBS=ON (ggml/llama
|
||||
# become shared so the dynamic CPU backends work; gRPC stays static via its imported
|
||||
# targets). SHARED_LIBS is a make variable, not an appended -D, so it survives the
|
||||
# recursive sub-make into the VARIANT build dir (which re-parses this Makefile) instead
|
||||
# of being re-clobbered by a second -DBUILD_SHARED_LIBS=OFF. EXTRA_CMAKE_ARGS is the hook
|
||||
# the CPU_ALL_VARIANTS target uses to inject -DGGML_BACKEND_DL/-DGGML_CPU_ALL_VARIANTS.
|
||||
SHARED_LIBS?=OFF
|
||||
EXTRA_CMAKE_ARGS?=
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=$(SHARED_LIBS) -DLLAMA_CURL=OFF $(EXTRA_CMAKE_ARGS)
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -120,6 +128,30 @@ llama-cpp-fallback: llama.cpp
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
# Single-build CPU backend using ggml's CPU_ALL_VARIANTS. Produces ONE grpc-server
|
||||
# plus a set of dlopen-able libggml-cpu-*.so (sandybridge/haswell/skylakex/...) that
|
||||
# ggml's backend registry selects from at runtime by probing host CPU features.
|
||||
# Replaces the avx/avx2/avx512/fallback multi-binary build on x86.
|
||||
#
|
||||
# CPU_ALL_VARIANTS requires GGML_BACKEND_DL, which requires BUILD_SHARED_LIBS=ON, so we
|
||||
# pass SHARED_LIBS=ON and the DL flags as make variables (NOT pre-expanded into the
|
||||
# CMAKE_ARGS env string): command-line make variables propagate through every recursive
|
||||
# sub-make, so the deepest VARIANT-dir build computes BUILD_SHARED_LIBS=ON consistently.
|
||||
# Only ggml/llama go shared - gRPC is found via its static imported targets, so the
|
||||
# grpc-server binary keeps static gRPC and only dynamically links ggml.
|
||||
#
|
||||
# TARGET adds "ggml": the per-microarch backends are runtime-dlopened, not link deps of
|
||||
# grpc-server, so they only build because each is an add_dependencies() of the ggml target.
|
||||
llama-cpp-cpu-all: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:cpu-all-variants${RESET})
|
||||
$(MAKE) SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" VARIANT="llama-cpp-cpu-all-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/grpc-server llama-cpp-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
|
||||
@@ -14,6 +14,22 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends produced by the CPU_ALL_VARIANTS build (libggml-base.so,
|
||||
# libggml.so, libllama.so and the per-microarch libggml-cpu-*.so), all into package/lib.
|
||||
#
|
||||
# Two distinct resolution mechanisms both land here:
|
||||
# - NEEDED deps (libggml-base/libggml/libllama): resolved by the dynamic linker via the
|
||||
# LD_LIBRARY_PATH=$CURDIR/lib that run.sh exports.
|
||||
# - The per-microarch libggml-cpu-*.so are NOT linked; ggml *discovers* them at runtime by
|
||||
# scanning the executable's own directory (readlink /proc/self/exe). run.sh launches via
|
||||
# the bundled $CURDIR/lib/ld.so, so /proc/self/exe -> .../lib/ld.so and ggml scans lib/.
|
||||
# That is why the variants must sit in lib/ (next to ld.so), not just on the link path.
|
||||
# No-op on builds (arm64/darwin) that don't produce the all-variants set.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,12 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
# CPU images (x86, arm64, darwin) ship a single llama-cpp-cpu-all built with ggml
|
||||
# CPU_ALL_VARIANTS: ggml's backend registry dlopens the best libggml-cpu-*.so for this
|
||||
# host, so no shell-side AVX probing. GPU images (cublas/sycl/vulkan/hipblas) ship only
|
||||
# llama-cpp-fallback (the accelerator does the compute), so fall back to it when absent.
|
||||
if [ -e $CURDIR/llama-cpp-cpu-all ]; then
|
||||
BINARY=llama-cpp-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -65,6 +65,29 @@ turboquant-avx:
|
||||
turboquant-fallback:
|
||||
$(call turboquant-build,fallback,-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server)
|
||||
|
||||
# Single-build CPU backend via ggml CPU_ALL_VARIANTS (mirrors llama-cpp-cpu-all).
|
||||
# turboquant reuses backend/cpp/llama-cpp's CMakeLists.txt (hw_grpc_proto STATIC) and
|
||||
# Makefile (SHARED_LIBS make-var + EXTRA_CMAKE_ARGS), so this passes the same overrides
|
||||
# through to the copied build: SHARED_LIBS=ON, the DL flags, and --target ggml (which
|
||||
# pulls in the per-microarch libggml-cpu-*.so via ggml's add_dependencies). The .so set
|
||||
# is collected for package.sh to bundle into package/lib.
|
||||
turboquant-cpu-all:
|
||||
rm -rf $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
cp -rf $(LLAMA_CPP_DIR) $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build purge
|
||||
bash $(CURRENT_MAKEFILE_DIR)/patch-grpc-server.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server.cpp
|
||||
$(info $(GREEN)I turboquant build info:cpu-all-variants$(RESET))
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build llama.cpp
|
||||
bash $(CURRENT_MAKEFILE_DIR)/apply-patches.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp $(PATCHES_DIR)
|
||||
SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" \
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server turboquant-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
turboquant-grpc:
|
||||
$(call turboquant-build,grpc,-DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server --target rpc-server)
|
||||
|
||||
|
||||
@@ -14,6 +14,15 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/turboquant-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends from the CPU_ALL_VARIANTS build into package/lib. ggml
|
||||
# discovers the per-microarch libggml-cpu-*.so by scanning the executable directory, which
|
||||
# (via the bundled lib/ld.so that run.sh launches through) resolves to lib/. See the
|
||||
# matching comment in backend/cpp/llama-cpp/package.sh. No-op on the fallback/ROCm builds.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,11 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=turboquant-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/turboquant-avx ]; then
|
||||
BINARY=turboquant-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/turboquant-avx2 ]; then
|
||||
BINARY=turboquant-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/turboquant-avx512 ]; then
|
||||
BINARY=turboquant-avx512
|
||||
fi
|
||||
# x86/arm64 ship a single turboquant-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's
|
||||
# backend registry dlopens the best libggml-cpu-*.so for this host, so no shell-side
|
||||
# probing. ROCm ships only turboquant-fallback, so fall back to it when cpu-all is absent.
|
||||
if [ -e $CURDIR/turboquant-cpu-all ]; then
|
||||
BINARY=turboquant-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=d745bda4386ae0f9d1d2f23fff8ec95d76428221
|
||||
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
torchvision==0.22.1
|
||||
accelerate
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
@@ -10,9 +10,15 @@ sentencepiece
|
||||
torch==2.7.1
|
||||
optimum-quanto
|
||||
ftfy
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu121
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
torchvision
|
||||
accelerate
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
@@ -10,9 +10,15 @@ sentencepiece
|
||||
torch
|
||||
ftfy
|
||||
optimum-quanto
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
torchvision
|
||||
accelerate
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
@@ -10,9 +10,15 @@ sentencepiece
|
||||
torch
|
||||
ftfy
|
||||
optimum-quanto
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
torchvision==0.25.0+rocm7.0
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
accelerate
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
ftfy
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
@@ -3,18 +3,24 @@ torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
accelerate
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
ftfy
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
||||
torch
|
||||
git+https://github.com/huggingface/diffusers
|
||||
transformers
|
||||
diffusers==0.38.0
|
||||
transformers==4.57.6
|
||||
accelerate
|
||||
peft
|
||||
optimum-quanto
|
||||
@@ -9,9 +9,15 @@ numpy<2
|
||||
sentencepiece
|
||||
torchvision
|
||||
ftfy
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch
|
||||
git+https://github.com/huggingface/diffusers
|
||||
transformers
|
||||
diffusers==0.38.0
|
||||
transformers==4.57.6
|
||||
accelerate
|
||||
peft
|
||||
optimum-quanto
|
||||
@@ -10,9 +10,15 @@ sentencepiece
|
||||
torchvision
|
||||
ftfy
|
||||
chardet
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
torch==2.7.1
|
||||
torchvision==0.22.1
|
||||
git+https://github.com/huggingface/diffusers
|
||||
diffusers==0.38.0
|
||||
opencv-python
|
||||
transformers
|
||||
transformers==4.57.6
|
||||
accelerate
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
ftfy
|
||||
# TODO: re-add compel once it supports transformers >= 5.
|
||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||
# https://github.com/damian0815/compel/issues/128
|
||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||
# last known-good released pair so builds are reproducible and can't drift into
|
||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||
#
|
||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||
# the import succeeding, so dropping it here is safe.
|
||||
@@ -341,11 +341,9 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
||||
}
|
||||
appCfg := a.ApplicationConfig()
|
||||
|
||||
if cfg.PII.Enabled != nil {
|
||||
enabled = *cfg.PII.Enabled
|
||||
} else {
|
||||
enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
|
||||
}
|
||||
// PIIIsEnabled already encodes "explicit pii.enabled wins, else backend
|
||||
// default (cloud-proxy)" — the single source of that rule.
|
||||
enabled = cfg.PIIIsEnabled()
|
||||
if !enabled {
|
||||
return false, nil
|
||||
}
|
||||
@@ -354,7 +352,7 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
||||
if len(detectors) == 0 {
|
||||
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
||||
}
|
||||
return enabled, detectors
|
||||
return true, detectors // enabled is necessarily true past the !enabled guard
|
||||
}
|
||||
|
||||
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
||||
|
||||
@@ -215,6 +215,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
||||
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
||||
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
||||
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
|
||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
||||
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
||||
@@ -335,6 +336,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
|
||||
// Request-side default redaction reads this live via
|
||||
// ResolvePIIPolicy, so a file edit takes effect on the next chat
|
||||
// request. The MITM listener resolves its per-host detector map
|
||||
// once at start, so a raw file edit reaches cloud-proxy traffic
|
||||
// only after a restart or a POST /api/settings (which rebuilds
|
||||
// the listener) — the admin UI uses the latter.
|
||||
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
||||
}
|
||||
|
||||
@@ -357,6 +357,15 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
||||
Pressure: pressure,
|
||||
})
|
||||
|
||||
// Wire staging-progress broadcasting so file-staging shows up on every
|
||||
// replica, not just the one performing the transfer. Without this, a
|
||||
// /api/operations poll that round-robins onto a peer sees no staging row and
|
||||
// the progress flickers. The origin publishes; peers mirror via the wildcard.
|
||||
router.StagingTracker().SetPublisher(natsClient)
|
||||
if _, err := router.StagingTracker().SubscribeBroadcasts(natsClient); err != nil {
|
||||
xlog.Warn("Failed to subscribe to staging progress broadcasts", "error", err)
|
||||
}
|
||||
|
||||
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
||||
// RegistrationToken feed the state-reconciliation passes: pending op
|
||||
// drain uses the adapter, and model health probes use the token to auth
|
||||
|
||||
@@ -109,6 +109,52 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// Instance-wide default PII detectors. The file is the only source (no
|
||||
// env var), and the loader runs immediately before startMITMIfConfigured,
|
||||
// so a regression here means the cloud-proxy MITM listener resolves an
|
||||
// empty detector set at boot and forwards intercepted traffic unredacted —
|
||||
// even though pii_default_detectors is on disk and the MITM model has PII
|
||||
// enabled. It also breaks request-side default redaction the same way.
|
||||
Describe("PII default detectors", func() {
|
||||
It("loads pii_default_detectors from the file", func() {
|
||||
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
|
||||
})
|
||||
|
||||
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
|
||||
cfg := &config.ApplicationConfig{
|
||||
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
|
||||
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
|
||||
}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
|
||||
})
|
||||
})
|
||||
|
||||
// The live file watcher applies pii_default_detectors on a runtime change
|
||||
// the same way it handles galleries/threads/etc.: env-set values (current
|
||||
// == startup snapshot) are left alone, otherwise the file value is applied
|
||||
// to the live config so request-side default redaction picks it up without
|
||||
// a restart.
|
||||
Describe("file watcher: pii_default_detectors", func() {
|
||||
It("applies a changed file value to the live config", func() {
|
||||
startup := config.ApplicationConfig{} // no env baseline
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
|
||||
})
|
||||
|
||||
It("leaves an env-controlled value untouched", func() {
|
||||
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
|
||||
})
|
||||
})
|
||||
|
||||
// The Agent Pool block has a mix of zero and non-zero defaults
|
||||
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
||||
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
||||
|
||||
@@ -750,6 +750,20 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.MITMListen = *settings.MITMListen
|
||||
}
|
||||
|
||||
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
|
||||
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
|
||||
// — apply it only when the env/CLI left the value empty, mirroring the
|
||||
// "env > file" precedence used for the other fields. This must land before
|
||||
// startMITMIfConfigured (called right after this loader): the cloud-proxy
|
||||
// listener resolves each intercept host's detectors once at start via
|
||||
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
|
||||
// falls back to these defaults. Without it the listener (and request-side
|
||||
// default redaction) starts with an empty detector set and forwards
|
||||
// traffic unredacted even though pii_default_detectors is on disk.
|
||||
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
|
||||
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
|
||||
// Backend upgrade flags
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
if !options.AutoUpgradeBackends {
|
||||
|
||||
@@ -181,6 +181,8 @@ type RunCMD struct {
|
||||
// Cloud-proxy MITM listener (off by default).
|
||||
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
||||
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
||||
|
||||
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -243,6 +245,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithAPIAddress(r.Address),
|
||||
config.WithMITMListen(r.MITMListen),
|
||||
config.WithMITMCADir(r.MITMCADir),
|
||||
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
|
||||
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
||||
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
||||
tunnelEnvVar := strings.Join(tunnels, ",")
|
||||
|
||||
@@ -712,6 +712,18 @@ func WithMITMCADir(dir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
|
||||
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
|
||||
// models) that names no pii.detectors of its own. CLI/env:
|
||||
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
|
||||
// runtime_settings.json / the Middleware UI; a non-empty value takes
|
||||
// precedence over the file (env > file).
|
||||
func WithPIIDefaultDetectors(detectors []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.PIIDefaultDetectors = detectors
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
|
||||
@@ -537,6 +537,36 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
||||
Component: "number",
|
||||
Order: 79,
|
||||
},
|
||||
"pipeline.compaction.enabled": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Enabled",
|
||||
Description: "Fold conversation items that age out of the live window (Max History Items) into a rolling summary instead of dropping them, so long realtime sessions stay cheap without losing earlier context. Off by default.",
|
||||
Component: "toggle",
|
||||
Order: 80,
|
||||
},
|
||||
"pipeline.compaction.trigger_items": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Trigger Items",
|
||||
Description: "High-water mark: once the live conversation exceeds this many items, the overflow above Max History Items is summarized and evicted. Must be greater than Max History Items; defaults to twice it. The gap controls how often summarization runs.",
|
||||
Component: "number",
|
||||
Order: 81,
|
||||
},
|
||||
"pipeline.compaction.summary_model": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Summary Model",
|
||||
Description: "Optional smaller/cheaper model used to produce the rolling summary. Empty reuses the pipeline's own LLM. On CPU, a tiny model here keeps compaction from competing with the conversation LLM.",
|
||||
Component: "input",
|
||||
Advanced: true,
|
||||
Order: 82,
|
||||
},
|
||||
"pipeline.compaction.max_summary_tokens": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Max Summary Tokens",
|
||||
Description: "Advisory cap on the rolling summary length (fed to the summarizer prompt). Defaults to 512.",
|
||||
Component: "number",
|
||||
Advanced: true,
|
||||
Order: 83,
|
||||
},
|
||||
|
||||
// --- Functions ---
|
||||
"function.grammar.parallel_calls": {
|
||||
|
||||
@@ -641,11 +641,32 @@ type Pipeline struct {
|
||||
// context fills.
|
||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||
|
||||
// Compaction folds conversation items that age out of the live window
|
||||
// (max_history_items) into a rolling summary instead of dropping them, so
|
||||
// long realtime sessions stay cheap without losing earlier context. Nil
|
||||
// (block absent) means disabled, preserving existing behavior.
|
||||
Compaction *PipelineCompaction `yaml:"compaction,omitempty" json:"compaction,omitempty"`
|
||||
|
||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||
// (block absent) means no gate, preserving existing behavior.
|
||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||
}
|
||||
|
||||
// PipelineCompaction configures summarize-then-drop for a realtime pipeline.
|
||||
type PipelineCompaction struct {
|
||||
// Enabled turns summarize-then-drop on. Default false.
|
||||
Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
|
||||
// TriggerItems is the high-water mark: once live items exceed it, overflow
|
||||
// above max_history_items is summarized and evicted. Must exceed
|
||||
// max_history_items; clamped up if not. Default: 2x max_history_items.
|
||||
TriggerItems int `yaml:"trigger_items,omitempty" json:"trigger_items,omitempty"`
|
||||
// SummaryModel optionally names a smaller/cheaper model for the summary
|
||||
// call. Empty uses the pipeline's own LLM.
|
||||
SummaryModel string `yaml:"summary_model,omitempty" json:"summary_model,omitempty"`
|
||||
// MaxSummaryTokens advises the summary length (fed to the prompt). Default 512.
|
||||
MaxSummaryTokens int `yaml:"max_summary_tokens,omitempty" json:"max_summary_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
||||
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
||||
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||
@@ -33,6 +34,35 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
||||
return settings, nil
|
||||
}
|
||||
|
||||
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
||||
// receiver, leaving the receiver's value untouched wherever overlay left a
|
||||
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
||||
// can be told apart from "absent" (see the type doc), which makes this a
|
||||
// faithful partial update: a caller that submits only the field it owns
|
||||
// changes exactly that field and never clobbers unrelated settings.
|
||||
//
|
||||
// This is the read-modify-write contract the persistence helpers exist for.
|
||||
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
||||
// on top, and writes the result — so a focused admin page that POSTs only its
|
||||
// own field (the Middleware page sends only mitm_listen; the detector table
|
||||
// only pii_default_detectors) no longer nulls every other setting.
|
||||
//
|
||||
// Reflection keeps the merge total over the struct: a field added to
|
||||
// RuntimeSettings later is merged automatically, so the persistence path can
|
||||
// never silently drop a new setting the way a hand-maintained field list
|
||||
// would. Non-pointer fields (none today) are skipped — they cannot express
|
||||
// "absent", so the receiver wins.
|
||||
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
||||
dst := reflect.ValueOf(s).Elem()
|
||||
src := reflect.ValueOf(overlay)
|
||||
for i := 0; i < src.NumField(); i++ {
|
||||
f := src.Field(i)
|
||||
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
||||
dst.Field(i).Set(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||
// runtime_settings.json with restricted permissions (it may carry API
|
||||
// keys and P2P tokens).
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
)
|
||||
|
||||
func strPtr(s string) *string { return &s }
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
var (
|
||||
@@ -51,6 +52,47 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
||||
// relies on: a focused admin page POSTs only the field it owns, and the
|
||||
// handler reads the on-disk settings and overlays the request on top.
|
||||
// Without it, the body would be written verbatim and every field the
|
||||
// caller omitted would be nulled (the reported regression: changing
|
||||
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
||||
Describe("MergeNonNil partial update", func() {
|
||||
It("overlays set fields and preserves unset ones", func() {
|
||||
base := config.RuntimeSettings{
|
||||
MITMListen: strPtr(":9000"),
|
||||
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
||||
WatchdogIdleEnabled: boolPtr(true),
|
||||
ApiKeys: &[]string{"persisted-key"},
|
||||
PIIDefaultDetectors: &[]string{"det-a"},
|
||||
}
|
||||
|
||||
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
||||
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
||||
base.MergeNonNil(overlay)
|
||||
|
||||
Expect(base.MITMListen).ToNot(BeNil())
|
||||
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
||||
// Everything the overlay left unset must survive untouched.
|
||||
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
||||
Expect(*base.Galleries).To(HaveLen(1))
|
||||
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
||||
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
||||
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
||||
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
||||
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
It("lets an explicit empty slice clear a field", func() {
|
||||
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
||||
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
||||
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
||||
})
|
||||
})
|
||||
|
||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||
// MITM listener address must survive a write/read round trip so the
|
||||
// next process restart can bring the listener back up. (Intercept
|
||||
|
||||
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
||||
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
|
||||
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
|
||||
if payload.UpdateInterval < 1 {
|
||||
payload.UpdateInterval = 60
|
||||
}
|
||||
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
49
core/http/endpoints/localai/agent_collections_param_test.go
Normal file
49
core/http/endpoints/localai/agent_collections_param_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Regression for #10443: agent/collection names carry a "legacy-api-key:"
|
||||
// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
|
||||
// such paths via URL.RawPath and stores the path-param value still escaped, so
|
||||
// handlers must URL-decode it before looking the collection up in the store -
|
||||
// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
|
||||
var _ = Describe("decodedParam", func() {
|
||||
var e *echo.Echo
|
||||
|
||||
BeforeEach(func() {
|
||||
e = echo.New()
|
||||
})
|
||||
|
||||
// route runs a request through Echo's real router so the path param is
|
||||
// populated exactly as it would be in production, then returns the decoded
|
||||
// value the handler would observe.
|
||||
route := func(rawPath string) string {
|
||||
var got string
|
||||
e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
|
||||
got = decodedParam(c, "name")
|
||||
return c.NoContent(http.StatusOK)
|
||||
})
|
||||
req := httptest.NewRequest(http.MethodGet, rawPath, nil)
|
||||
rec := httptest.NewRecorder()
|
||||
e.ServeHTTP(rec, req)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
return got
|
||||
}
|
||||
|
||||
It("decodes a percent-encoded colon in the collection name", func() {
|
||||
got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
|
||||
Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
|
||||
})
|
||||
|
||||
It("leaves an unencoded name untouched", func() {
|
||||
got := route("/api/agents/collections/PlainCollection/upload")
|
||||
Expect(got).To(Equal("PlainCollection"))
|
||||
})
|
||||
})
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
@@ -33,6 +34,22 @@ func getUserID(c echo.Context) string {
|
||||
return user.ID
|
||||
}
|
||||
|
||||
// decodedParam returns the named path parameter, URL-decoding it.
|
||||
//
|
||||
// Echo routes a request via URL.RawPath whenever the path contains
|
||||
// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
|
||||
// matched path-param value raw/escaped. Agent and collection names carry a
|
||||
// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
|
||||
// longer matches the stored name. Callers must unescape before lookups.
|
||||
// Falls back to the raw value if it isn't valid percent-encoding.
|
||||
func decodedParam(c echo.Context, name string) string {
|
||||
raw := c.Param(name)
|
||||
if decoded, err := url.PathUnescape(raw); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
// isAdminUser returns true if the authenticated user has admin role.
|
||||
func isAdminUser(c echo.Context) bool {
|
||||
user := auth.GetUser(c)
|
||||
@@ -127,7 +144,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
statuses := svc.ListAgentsForUser(userID)
|
||||
active, exists := statuses[name]
|
||||
@@ -142,7 +159,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
var cfg state.AgentConfig
|
||||
if err := c.Bind(&cfg); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
@@ -161,7 +178,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
if err := svc.DeleteAgentForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -173,7 +190,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
cfg := svc.GetAgentConfigForUser(userID, name)
|
||||
if cfg == nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
|
||||
@@ -186,7 +203,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -197,7 +214,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -208,7 +225,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
history := svc.GetAgentStatusForUser(userID, name)
|
||||
if history == nil {
|
||||
@@ -241,7 +258,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
history, err := svc.GetAgentObservablesForUser(userID, name)
|
||||
if err != nil {
|
||||
@@ -261,7 +278,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -273,7 +290,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
var payload struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
@@ -302,7 +319,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
// Try local SSE manager first
|
||||
manager := svc.GetSSEManagerForUser(userID, name)
|
||||
@@ -334,7 +351,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
data, err := svc.ExportAgentForUser(userID, name)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
@@ -385,6 +385,23 @@ func GetNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// ListAllNodeModelsEndpoint returns all loaded models across all healthy nodes.
|
||||
// @Summary List all loaded models cluster-wide
|
||||
// @Tags Nodes
|
||||
// @Success 200 {array} nodes.NodeModel
|
||||
// @Router /api/nodes/models [get]
|
||||
func ListAllNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
models, err := registry.ListAllLoadedModels(ctx)
|
||||
if err != nil {
|
||||
xlog.Error("Failed to list all node models", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to list node models"))
|
||||
}
|
||||
return c.JSON(http.StatusOK, models)
|
||||
}
|
||||
}
|
||||
|
||||
// DrainNodeEndpoint sets a node to draining status (no new requests).
|
||||
func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
|
||||
@@ -407,4 +407,44 @@ var _ = Describe("Node HTTP handlers", func() {
|
||||
Expect(names).To(ConsistOf("alpha", "beta"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ListAllNodeModelsEndpoint", func() {
|
||||
It("returns an empty list when no models are loaded", func() {
|
||||
e := echo.New()
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
c := e.NewContext(req, rec)
|
||||
|
||||
handler := ListAllNodeModelsEndpoint(registry)
|
||||
Expect(handler(c)).To(Succeed())
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
|
||||
var list []nodes.NodeModel
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||
Expect(list).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns loaded models across healthy nodes", func() {
|
||||
ctx := context.Background()
|
||||
Expect(registry.Register(ctx, &nodes.BackendNode{
|
||||
ID: "n1", Name: "alpha", Address: "10.0.0.1:50051", Status: nodes.StatusHealthy,
|
||||
}, true)).To(Succeed())
|
||||
Expect(registry.SetNodeModel(ctx, "n1", "llama-3.3", 0, "loaded", "10.0.0.1:50051", 0)).To(Succeed())
|
||||
|
||||
e := echo.New()
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
c := e.NewContext(req, rec)
|
||||
|
||||
handler := ListAllNodeModelsEndpoint(registry)
|
||||
Expect(handler(c)).To(Succeed())
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
|
||||
var list []nodes.NodeModel
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||
Expect(list).To(HaveLen(1))
|
||||
Expect(list[0].ModelName).To(Equal("llama-3.3"))
|
||||
Expect(list[0].NodeID).To(Equal("n1"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -110,6 +108,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
})
|
||||
}
|
||||
|
||||
// Read whatever is already persisted: it is both the source of truth
|
||||
// for branding asset filenames (below) and the base we merge this
|
||||
// request onto before writing. A read failure must not let a Save
|
||||
// silently discard the existing settings — surface it instead.
|
||||
persisted, err := appConfig.ReadPersistedSettings()
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to read existing settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// Branding asset filenames are owned exclusively by
|
||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||
// round-trips them via GET /api/settings, but its local state is stale
|
||||
@@ -118,11 +128,9 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
// at page open. Replace whatever the body sent for these three fields
|
||||
// with the values currently on disk so /api/settings can never
|
||||
// regress them.
|
||||
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
||||
settings.LogoFile = existing.LogoFile
|
||||
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
||||
settings.FaviconFile = existing.FaviconFile
|
||||
}
|
||||
settings.LogoFile = persisted.LogoFile
|
||||
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
||||
settings.FaviconFile = persisted.FaviconFile
|
||||
|
||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||
// merged env+runtime list. When the user clicks Save, the same merged
|
||||
@@ -145,16 +153,17 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
settings.ApiKeys = &runtimeOnly
|
||||
}
|
||||
|
||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to marshal settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||
// Persist as a partial update: overlay only the fields this request set
|
||||
// onto the settings already on disk. Focused admin pages POST just the
|
||||
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
||||
// detector table only pii_default_detectors), so writing the request
|
||||
// body verbatim would null every unrelated setting (the no-omitempty
|
||||
// api_keys / pii_default_detectors fields even round-trip as JSON
|
||||
// null). The full Settings page still round-trips every field, so its
|
||||
// Save is unchanged.
|
||||
toPersist := persisted
|
||||
toPersist.MergeNonNil(settings)
|
||||
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to write settings file: " + err.Error(),
|
||||
@@ -262,7 +271,14 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
if settings.MITMListen != nil {
|
||||
// Rebuild the MITM listener when its address OR the instance-wide
|
||||
// default detectors change. The per-host detector map is resolved once
|
||||
// at listener start (startMITMLocked → ResolvePIIPolicy), so a
|
||||
// default-detector change is otherwise invisible to cloud-proxy traffic
|
||||
// until the next restart — an admin toggling a default detector would
|
||||
// see no redaction. RestartMITM is a no-op when the listener is
|
||||
// disabled (empty address).
|
||||
if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
|
||||
if err := app.RestartMITM(); err != nil {
|
||||
xlog.Error("Failed to restart MITM proxy", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
|
||||
@@ -52,6 +52,10 @@ var _ = Describe("Settings endpoints", func() {
|
||||
// Settings are persisted here; set after construction since there's no
|
||||
// dedicated AppOption for it.
|
||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
||||
// mitm_listen, which starts the listener and writes a CA; without this
|
||||
// it defaults to ./mitm-ca and litters the package source tree.
|
||||
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
||||
|
||||
e = echo.New()
|
||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||
@@ -109,6 +113,57 @@ var _ = Describe("Settings endpoints", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
||||
// the one field it owns — mitm_listen. The old handler wrote the request
|
||||
// body verbatim, so every other persisted setting was dropped (and
|
||||
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
||||
// null). A partial POST must now merge onto what is already on disk.
|
||||
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
||||
// First save establishes a fuller settings file (as the full Settings
|
||||
// page would): galleries, an API key, and the MITM listener. The
|
||||
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
||||
// ephemeral free port rather than a fixed one that may be in use.
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
// The Middleware proxy tab then changes only the listen address — the
|
||||
// exact partial body that nulled everything else before the fix.
|
||||
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var ondisk config.RuntimeSettings
|
||||
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
||||
|
||||
Expect(ondisk.MITMListen).ToNot(BeNil())
|
||||
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
||||
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
||||
Expect(*ondisk.Galleries).To(HaveLen(1))
|
||||
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
||||
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
||||
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
||||
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
// The MITM listener resolves its per-host PII detectors once at start
|
||||
// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
|
||||
// only when mitm_listen changed. So an admin toggling a default detector
|
||||
// (the Middleware detector table POSTs only pii_default_detectors) left
|
||||
// cloud-proxy traffic unredacted until the next reboot. A
|
||||
// pii_default_detectors change must now rebuild the listener.
|
||||
It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
srv1 := app.MITMServer()
|
||||
Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
|
||||
|
||||
rec = post(`{"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
|
||||
"a default-detector change must restart the listener so it picks up the new detectors")
|
||||
})
|
||||
|
||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||
// React master toggle must start the live watchdog immediately, without a
|
||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"net/http"
|
||||
@@ -134,6 +135,18 @@ type Session struct {
|
||||
// pairs are kept together so we never feed an orphaned tool result.
|
||||
MaxHistoryItems int
|
||||
|
||||
// Compaction settings resolved from pipeline.compaction (see resolveCompaction).
|
||||
CompactionEnabled bool
|
||||
CompactionTrigger int
|
||||
SummaryModel string
|
||||
MaxSummaryTokens int
|
||||
|
||||
// summarizerFactory lazily builds the model used for compaction summaries
|
||||
// when summary_model is configured; nil means reuse the pipeline LLM.
|
||||
summarizerFactory func() (Model, error)
|
||||
summarizerOnce sync.Once
|
||||
summarizerCached Model
|
||||
|
||||
// AssistantExecutor is non-nil when the session opted into the in-process
|
||||
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
||||
// executor's catalog are run inproc and their output is fed back to the
|
||||
@@ -241,6 +254,12 @@ type Conversation struct {
|
||||
ID string
|
||||
Items []*types.MessageItemUnion
|
||||
Lock sync.Mutex
|
||||
// Memory is the rolling summary of items already evicted by compaction. It
|
||||
// is kept out of Items (so trimRealtimeItems never drops it) and rendered
|
||||
// as a system message right after the session instructions.
|
||||
Memory string
|
||||
// compacting ensures at most one background compaction runs per conversation.
|
||||
compacting atomic.Bool
|
||||
}
|
||||
|
||||
func (c *Conversation) ToServer() types.Conversation {
|
||||
@@ -540,13 +559,12 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
||||
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
||||
}
|
||||
session.CompactionEnabled, session.CompactionTrigger, session.MaxSummaryTokens, session.SummaryModel = resolveCompaction(cfg, session.MaxHistoryItems)
|
||||
|
||||
// Create a default conversation
|
||||
conversationID := generateConversationID()
|
||||
conversation := &Conversation{
|
||||
ID: conversationID,
|
||||
// TODO: We need to truncate the conversation items when a new item is added and we have run out of space. There are multiple places where items
|
||||
// can be added so we could use a datastructure here that enforces truncation upon addition
|
||||
ID: conversationID,
|
||||
Items: []*types.MessageItemUnion{},
|
||||
}
|
||||
session.Conversations[conversationID] = conversation
|
||||
@@ -577,6 +595,18 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
}
|
||||
session.ModelInterface = m
|
||||
|
||||
if session.SummaryModel != "" {
|
||||
summaryModelName := session.SummaryModel
|
||||
sid := sessionID
|
||||
session.summarizerFactory = func() (Model, error) {
|
||||
summaryCfg, lerr := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(summaryModelName, application.ApplicationConfig())
|
||||
if lerr != nil {
|
||||
return nil, fmt.Errorf("load summary model config %q: %w", summaryModelName, lerr)
|
||||
}
|
||||
return newModel(&summaryCfg.Pipeline, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), evaluator, buildRealtimeRoutingContext(application, sid))
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.Pipeline.VoiceGateEnabled() {
|
||||
gate, gerr := newVoiceGate(
|
||||
*cfg.Pipeline.VoiceRecognition,
|
||||
@@ -807,6 +837,15 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
commitUtterance(respCtx, allAudio, session, conversation, t)
|
||||
}()
|
||||
|
||||
case types.InputAudioBufferClearEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
// Discard a partially-captured utterance so the client can restart
|
||||
// input cleanly without the stale buffer leaking into the next commit.
|
||||
clearInputAudio(session)
|
||||
sendEvent(t, types.InputAudioBufferClearedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
})
|
||||
|
||||
case types.ConversationItemCreateEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
// Add the item to the conversation
|
||||
@@ -841,7 +880,39 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
})
|
||||
|
||||
case types.ConversationItemDeleteEvent:
|
||||
sendError(t, "not_implemented", "Deleting items not implemented", "", "event_TODO")
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
if e.ItemID == "" {
|
||||
sendError(t, "invalid_item_id", "Need item_id, but none specified", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
conversation.Lock.Lock()
|
||||
updated, ok := deleteItem(conversation.Items, e.ItemID)
|
||||
conversation.Items = updated
|
||||
conversation.Lock.Unlock()
|
||||
if !ok {
|
||||
sendError(t, "invalid_item_id", "Item to delete not found", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
sendEvent(t, types.ConversationItemDeletedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
ItemID: e.ItemID,
|
||||
})
|
||||
|
||||
case types.ConversationItemTruncateEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
conversation.Lock.Lock()
|
||||
ok := truncateAssistantText(conversation.Items, e.ItemID, e.ContentIndex)
|
||||
conversation.Lock.Unlock()
|
||||
if !ok {
|
||||
sendError(t, "invalid_item_id", "Item to truncate not found", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
sendEvent(t, types.ConversationItemTruncatedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
ItemID: e.ItemID,
|
||||
ContentIndex: e.ContentIndex,
|
||||
AudioEndMs: e.AudioEndMs,
|
||||
})
|
||||
|
||||
case types.ConversationItemRetrieveEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
@@ -854,21 +925,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
conversation.Lock.Lock()
|
||||
var retrievedItem types.MessageItemUnion
|
||||
for _, item := range conversation.Items {
|
||||
// We need to check ID in the union
|
||||
var id string
|
||||
if item.System != nil {
|
||||
id = item.System.ID
|
||||
} else if item.User != nil {
|
||||
id = item.User.ID
|
||||
} else if item.Assistant != nil {
|
||||
id = item.Assistant.ID
|
||||
} else if item.FunctionCall != nil {
|
||||
id = item.FunctionCall.ID
|
||||
} else if item.FunctionCallOutput != nil {
|
||||
id = item.FunctionCallOutput.ID
|
||||
}
|
||||
|
||||
if id == e.ItemID {
|
||||
if itemID(item) == e.ItemID {
|
||||
retrievedItem = *item
|
||||
break
|
||||
}
|
||||
@@ -1666,6 +1723,9 @@ const maxAssistantToolTurns = 10
|
||||
|
||||
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
||||
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
||||
// Fold aged-out turns into the rolling memory off the critical path; the
|
||||
// next turn reaps the smaller buffer.
|
||||
session.maybeCompact(conv)
|
||||
}
|
||||
|
||||
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
||||
@@ -1721,6 +1781,7 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
|
||||
var lastUserSpeaker *types.Speaker
|
||||
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
||||
conv.Lock.Lock()
|
||||
conversationHistory = withMemory(conversationHistory, conv.Memory)
|
||||
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
||||
for _, item := range items {
|
||||
if item.User != nil {
|
||||
|
||||
326
core/http/endpoints/openai/realtime_compaction.go
Normal file
326
core/http/endpoints/openai/realtime_compaction.go
Normal file
@@ -0,0 +1,326 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxSummaryTokens = 512
|
||||
memoryPrefix = "Summary of earlier conversation:\n"
|
||||
// compactionTimeout bounds the summarizer call so a stuck model can't pin the
|
||||
// compacting flag (and thus block all further compaction) forever.
|
||||
compactionTimeout = 60 * time.Second
|
||||
)
|
||||
|
||||
// withMemory inserts the rolling summary as a system message after the existing
|
||||
// (instructions) history. No-op when memory is empty.
|
||||
func withMemory(history schema.Messages, memory string) schema.Messages {
|
||||
if memory == "" {
|
||||
return history
|
||||
}
|
||||
content := memoryPrefix + memory
|
||||
return append(history, schema.Message{
|
||||
Role: string(types.MessageRoleSystem),
|
||||
StringContent: content,
|
||||
Content: content,
|
||||
})
|
||||
}
|
||||
|
||||
// renderItemsTranscript renders conversation items as a plain "role: text"
|
||||
// transcript for summarization. Non-text items (bare tool calls) are labelled
|
||||
// so the summarizer keeps track of actions taken.
|
||||
func renderItemsTranscript(items []*types.MessageItemUnion) string {
|
||||
var b strings.Builder
|
||||
for _, item := range items {
|
||||
switch {
|
||||
case item.User != nil:
|
||||
b.WriteString("user: ")
|
||||
for _, c := range item.User.Content {
|
||||
if c.Text != "" {
|
||||
b.WriteString(c.Text)
|
||||
}
|
||||
if c.Transcript != "" {
|
||||
b.WriteString(c.Transcript)
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
case item.Assistant != nil:
|
||||
b.WriteString("assistant: ")
|
||||
// Realtime assistant *audio* turns store the spoken words in
|
||||
// .Transcript (not .Text), so emit both or spoken turns are dropped.
|
||||
for _, c := range item.Assistant.Content {
|
||||
if c.Text != "" {
|
||||
b.WriteString(c.Text)
|
||||
}
|
||||
if c.Transcript != "" {
|
||||
b.WriteString(c.Transcript)
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
case item.FunctionCall != nil:
|
||||
b.WriteString(fmt.Sprintf("assistant called tool %s(%s)\n", item.FunctionCall.Name, item.FunctionCall.Arguments))
|
||||
case item.FunctionCallOutput != nil:
|
||||
b.WriteString(fmt.Sprintf("tool result: %s\n", item.FunctionCallOutput.Output))
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(b.String())
|
||||
}
|
||||
|
||||
// buildSummaryMessages builds the chat messages for the summarizer LLM: a system
|
||||
// instruction plus prior memory and the new transcript to fold in. maxTokens is
|
||||
// advisory (fed to the prompt; not hard-enforced in v1).
|
||||
func buildSummaryMessages(priorMemory, transcript string, maxTokens int) schema.Messages {
|
||||
system := fmt.Sprintf("You maintain a running memory of a live voice conversation. "+
|
||||
"Merge the prior memory with the new exchanges into an updated memory. "+
|
||||
"Keep names, decisions, facts, preferences, and open threads. Be concise "+
|
||||
"(under ~%d tokens). Output only the updated memory, with no reasoning or tags.", maxTokens)
|
||||
var user strings.Builder
|
||||
if priorMemory != "" {
|
||||
user.WriteString("Prior memory:\n")
|
||||
user.WriteString(priorMemory)
|
||||
user.WriteString("\n\n")
|
||||
}
|
||||
user.WriteString("New exchanges to fold in:\n")
|
||||
user.WriteString(transcript)
|
||||
return schema.Messages{
|
||||
{Role: string(types.MessageRoleSystem), StringContent: system, Content: system},
|
||||
{Role: string(types.MessageRoleUser), StringContent: user.String(), Content: user.String()},
|
||||
}
|
||||
}
|
||||
|
||||
// clearInputAudio resets the session's pending input audio buffer (the raw
|
||||
// PCM and any buffered Opus frames). Used by the input_audio_buffer.clear
|
||||
// realtime event so a client can discard a partially-captured utterance.
|
||||
func clearInputAudio(s *Session) {
|
||||
s.AudioBufferLock.Lock()
|
||||
s.InputAudioBuffer = nil
|
||||
s.AudioBufferLock.Unlock()
|
||||
s.OpusFramesLock.Lock()
|
||||
s.OpusFrames = nil
|
||||
s.OpusFramesLock.Unlock()
|
||||
}
|
||||
|
||||
// itemID extracts the id from any MessageItemUnion variant ("" if none).
|
||||
func itemID(item *types.MessageItemUnion) string {
|
||||
switch {
|
||||
case item == nil:
|
||||
return ""
|
||||
case item.System != nil:
|
||||
return item.System.ID
|
||||
case item.User != nil:
|
||||
return item.User.ID
|
||||
case item.Assistant != nil:
|
||||
return item.Assistant.ID
|
||||
case item.FunctionCall != nil:
|
||||
return item.FunctionCall.ID
|
||||
case item.FunctionCallOutput != nil:
|
||||
return item.FunctionCallOutput.ID
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// deleteItem removes the item with id from items, returning the new slice and
|
||||
// whether it was found.
|
||||
func deleteItem(items []*types.MessageItemUnion, id string) ([]*types.MessageItemUnion, bool) {
|
||||
for i, item := range items {
|
||||
if itemID(item) == id {
|
||||
return append(items[:i:i], items[i+1:]...), true
|
||||
}
|
||||
}
|
||||
return items, false
|
||||
}
|
||||
|
||||
// truncateAssistantText clears the text of the assistant item's content part at
|
||||
// contentIndex. Minimal truncate: used to discard an interrupted/barge-in
|
||||
// response tail. Both .Text and .Transcript are cleared because realtime audio
|
||||
// turns store the spoken words in .Transcript (clearing only .Text would no-op).
|
||||
func truncateAssistantText(items []*types.MessageItemUnion, id string, contentIndex int) bool {
|
||||
for _, item := range items {
|
||||
if itemID(item) != id || item.Assistant == nil {
|
||||
continue
|
||||
}
|
||||
if contentIndex >= 0 && contentIndex < len(item.Assistant.Content) {
|
||||
item.Assistant.Content[contentIndex].Text = ""
|
||||
item.Assistant.Content[contentIndex].Transcript = ""
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// compactionCut returns the index splitting items into overflow (items[:cut],
|
||||
// to be summarized+evicted) and the kept live tail (items[cut:]), keeping the
|
||||
// last `keep` items. It mirrors trimRealtimeItems' pair-safety: the cut is
|
||||
// pulled left so a function_call and its function_call_output are never split
|
||||
// across the boundary (the whole pair lands in the kept tail). Returns 0 when
|
||||
// there is nothing to cut.
|
||||
func compactionCut(items []*types.MessageItemUnion, keep int) int {
|
||||
// keep <= 0 means no live-window cap (the "unlimited history" sentinel, as
|
||||
// in trimRealtimeItems): there is nothing to evict, so cut nothing. This
|
||||
// also avoids indexing items[len(items)] in the pair-safety loop below.
|
||||
if keep <= 0 {
|
||||
return 0
|
||||
}
|
||||
cut := len(items) - keep
|
||||
if cut <= 0 {
|
||||
return 0
|
||||
}
|
||||
for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
|
||||
cut--
|
||||
}
|
||||
return cut
|
||||
}
|
||||
|
||||
// resolveCompaction reads the pipeline.compaction block, applying defaults and
|
||||
// the trigger>max_history invariant. maxHistory is the already-resolved live
|
||||
// window size. Returns enabled=false (and zero values) when compaction is off.
|
||||
func resolveCompaction(cfg *config.ModelConfig, maxHistory int) (enabled bool, trigger, maxSummaryTokens int, summaryModel string) {
|
||||
if cfg == nil || cfg.Pipeline.Compaction == nil || !cfg.Pipeline.Compaction.Enabled {
|
||||
return false, 0, 0, ""
|
||||
}
|
||||
c := cfg.Pipeline.Compaction
|
||||
trigger = c.TriggerItems
|
||||
if trigger <= 0 {
|
||||
trigger = maxHistory * 2
|
||||
}
|
||||
if trigger <= maxHistory {
|
||||
trigger = maxHistory + 1
|
||||
}
|
||||
maxSummaryTokens = c.MaxSummaryTokens
|
||||
if maxSummaryTokens <= 0 {
|
||||
maxSummaryTokens = defaultMaxSummaryTokens
|
||||
}
|
||||
return true, trigger, maxSummaryTokens, c.SummaryModel
|
||||
}
|
||||
|
||||
// prefixMatches reports whether items begins with the same ids, in order, as
|
||||
// snapshot — i.e. the overflow we summarized is still at the head (no concurrent
|
||||
// client delete reshuffled it).
|
||||
func prefixMatches(items, snapshot []*types.MessageItemUnion) bool {
|
||||
if len(items) < len(snapshot) {
|
||||
return false
|
||||
}
|
||||
for i := range snapshot {
|
||||
if itemID(items[i]) != itemID(snapshot[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// compact folds overflow items into conv.Memory and evicts them. It never holds
|
||||
// conv.Lock across the summarizer call: snapshot under lock, summarize unlocked,
|
||||
// commit under lock (re-validating the head is unchanged). On any error it
|
||||
// leaves the conversation untouched — items are never dropped without a summary.
|
||||
func (s *Session) compact(conv *Conversation, model Model) {
|
||||
if model == nil {
|
||||
return
|
||||
}
|
||||
// Snapshot.
|
||||
conv.Lock.Lock()
|
||||
if len(conv.Items) <= s.CompactionTrigger {
|
||||
conv.Lock.Unlock()
|
||||
return
|
||||
}
|
||||
cut := compactionCut(conv.Items, s.MaxHistoryItems)
|
||||
if cut <= 0 {
|
||||
conv.Lock.Unlock()
|
||||
return
|
||||
}
|
||||
overflow := append([]*types.MessageItemUnion(nil), conv.Items[:cut]...)
|
||||
prior := conv.Memory
|
||||
conv.Lock.Unlock()
|
||||
|
||||
// Summarize (unlocked).
|
||||
msgs := buildSummaryMessages(prior, renderItemsTranscript(overflow), s.MaxSummaryTokens)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), compactionTimeout)
|
||||
defer cancel()
|
||||
predFunc, err := model.Predict(ctx, msgs, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summarizer predict failed", "error", err)
|
||||
return
|
||||
}
|
||||
pred, err := predFunc()
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summarizer inference failed", "error", err)
|
||||
return
|
||||
}
|
||||
// Strip any leaked reasoning/thinking spans using the same extractor the
|
||||
// rest of the realtime path uses, rather than a bespoke regex.
|
||||
rcfg := reasoning.Config{}
|
||||
if mc := model.PredictConfig(); mc != nil {
|
||||
rcfg = spokenReasoningConfig(mc.ReasoningConfig)
|
||||
}
|
||||
_, summary := reasoning.ExtractReasoningComplete(pred.Response, "", rcfg)
|
||||
summary = strings.TrimSpace(summary)
|
||||
if summary == "" {
|
||||
xlog.Warn("realtime compaction: empty summary, skipping eviction")
|
||||
return
|
||||
}
|
||||
|
||||
// Commit.
|
||||
conv.Lock.Lock()
|
||||
defer conv.Lock.Unlock()
|
||||
if !prefixMatches(conv.Items, overflow) {
|
||||
xlog.Debug("realtime compaction: head changed during summary, skipping")
|
||||
return
|
||||
}
|
||||
conv.Memory = summary
|
||||
conv.Items = conv.Items[len(overflow):]
|
||||
xlog.Debug("realtime compaction: evicted items into memory", "evicted", len(overflow), "remaining", len(conv.Items))
|
||||
}
|
||||
|
||||
// summarizerModel resolves the model used to produce compaction summaries.
|
||||
// Without a configured summary_model (or factory) it reuses the pipeline LLM.
|
||||
func (s *Session) summarizerModel() Model {
|
||||
if s.SummaryModel == "" || s.summarizerFactory == nil {
|
||||
return s.ModelInterface
|
||||
}
|
||||
s.summarizerOnce.Do(func() {
|
||||
m, err := s.summarizerFactory()
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summary_model load failed, falling back to pipeline LLM", "model", s.SummaryModel, "error", err)
|
||||
m = s.ModelInterface
|
||||
}
|
||||
s.summarizerCached = m
|
||||
})
|
||||
return s.summarizerCached
|
||||
}
|
||||
|
||||
// maybeCompact schedules a background compaction when the live buffer has grown
|
||||
// past the trigger and none is already running. Returns immediately.
|
||||
func (s *Session) maybeCompact(conv *Conversation) {
|
||||
if !s.CompactionEnabled {
|
||||
return
|
||||
}
|
||||
conv.Lock.Lock()
|
||||
over := len(conv.Items) > s.CompactionTrigger
|
||||
conv.Lock.Unlock()
|
||||
if !over {
|
||||
return
|
||||
}
|
||||
if !conv.compacting.CompareAndSwap(false, true) {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
defer conv.compacting.Store(false)
|
||||
// Resolve (and, for a configured summary_model, lazily load) the
|
||||
// summarizer only when a compaction actually runs, off the response
|
||||
// path — so the model load never blocks a user turn.
|
||||
model := s.summarizerModel()
|
||||
if model == nil {
|
||||
return
|
||||
}
|
||||
s.compact(conv, model)
|
||||
}()
|
||||
}
|
||||
308
core/http/endpoints/openai/realtime_compaction_test.go
Normal file
308
core/http/endpoints/openai/realtime_compaction_test.go
Normal file
@@ -0,0 +1,308 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
var _ = Describe("resolveCompaction", func() {
|
||||
It("disables when the block is absent", func() {
|
||||
enabled, _, _, _ := resolveCompaction(&config.ModelConfig{}, 6)
|
||||
Expect(enabled).To(BeFalse())
|
||||
})
|
||||
|
||||
It("defaults trigger to 2x max history and tokens to 512", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true}}}
|
||||
enabled, trigger, maxTok, _ := resolveCompaction(cfg, 6)
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(trigger).To(Equal(12))
|
||||
Expect(maxTok).To(Equal(512))
|
||||
})
|
||||
|
||||
It("clamps trigger to max history + 1 when misconfigured", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true, TriggerItems: 4}}}
|
||||
_, trigger, _, _ := resolveCompaction(cfg, 6)
|
||||
Expect(trigger).To(Equal(7))
|
||||
})
|
||||
|
||||
It("honors explicit values", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{
|
||||
Enabled: true, TriggerItems: 20, MaxSummaryTokens: 256, SummaryModel: "tiny"}}}
|
||||
enabled, trigger, maxTok, model := resolveCompaction(cfg, 6)
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(trigger).To(Equal(20))
|
||||
Expect(maxTok).To(Equal(256))
|
||||
Expect(model).To(Equal("tiny"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("deleteItem", func() {
|
||||
mk := func(ids ...string) []*types.MessageItemUnion {
|
||||
out := make([]*types.MessageItemUnion, len(ids))
|
||||
for i, id := range ids {
|
||||
out[i] = &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
It("removes the item with the given id", func() {
|
||||
items, ok := deleteItem(mk("a", "b", "c"), "b")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(len(items)).To(Equal(2))
|
||||
Expect(itemID(items[0])).To(Equal("a"))
|
||||
Expect(itemID(items[1])).To(Equal("c"))
|
||||
})
|
||||
|
||||
It("reports not found for an unknown id", func() {
|
||||
_, ok := deleteItem(mk("a"), "zzz")
|
||||
Expect(ok).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("clearInputAudio", func() {
|
||||
It("resets the pending PCM and buffered Opus frames", func() {
|
||||
s := &Session{InputAudioBuffer: []byte{1, 2, 3}, OpusFrames: [][]byte{{9}}}
|
||||
clearInputAudio(s)
|
||||
Expect(s.InputAudioBuffer).To(BeNil())
|
||||
Expect(s.OpusFrames).To(BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("truncateAssistantText", func() {
|
||||
It("clears the text of the assistant content part at the index", func() {
|
||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||
ID: "a1",
|
||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello world"}},
|
||||
}}}
|
||||
ok := truncateAssistantText(items, "a1", 0)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(items[0].Assistant.Content[0].Text).To(Equal(""))
|
||||
})
|
||||
|
||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||
// .Text, so a barge-in truncate must clear .Transcript too or it would no-op.
|
||||
It("clears the transcript of an assistant audio content part", func() {
|
||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||
ID: "a1",
|
||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "hello world"}},
|
||||
}}}
|
||||
ok := truncateAssistantText(items, "a1", 0)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(items[0].Assistant.Content[0].Transcript).To(Equal(""))
|
||||
})
|
||||
|
||||
It("returns false for an unknown id", func() {
|
||||
Expect(truncateAssistantText(nil, "nope", 0)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("compactionCut", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
call := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id}}
|
||||
}
|
||||
out := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id}}
|
||||
}
|
||||
|
||||
It("cuts exactly len-keep when no pairs straddle the boundary", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3"), user("4")}
|
||||
Expect(compactionCut(items, 2)).To(Equal(2))
|
||||
})
|
||||
|
||||
It("returns 0 when nothing to cut", func() {
|
||||
Expect(compactionCut([]*types.MessageItemUnion{user("1")}, 2)).To(Equal(0))
|
||||
})
|
||||
|
||||
It("returns 0 (cuts nothing) when keep is 0 — the unlimited-window sentinel", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||
Expect(compactionCut(items, 0)).To(Equal(0))
|
||||
})
|
||||
|
||||
It("moves the boundary so a call/output pair is not split", func() {
|
||||
// keep=2 -> naive cut=2, but items[2] is the output of items[1]'s call;
|
||||
// pull the cut right so the whole pair stays in the kept tail.
|
||||
items := []*types.MessageItemUnion{user("1"), call("c"), out("c"), user("4")}
|
||||
Expect(compactionCut(items, 2)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("withMemory", func() {
|
||||
It("inserts a memory system message when memory is non-empty", func() {
|
||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||
out := withMemory(base, "user is Bob; wants pizza")
|
||||
Expect(len(out)).To(Equal(2))
|
||||
Expect(out[1].Role).To(Equal("system"))
|
||||
Expect(out[1].StringContent).To(ContainSubstring("user is Bob"))
|
||||
Expect(out[1].StringContent).To(ContainSubstring("Summary of earlier conversation"))
|
||||
})
|
||||
|
||||
It("is a no-op when memory is empty", func() {
|
||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||
Expect(withMemory(base, "")).To(HaveLen(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("renderItemsTranscript", func() {
|
||||
It("renders user and assistant text turns", func() {
|
||||
items := []*types.MessageItemUnion{
|
||||
{User: &types.MessageItemUser{Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: "hi"}}}},
|
||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello"}}}},
|
||||
}
|
||||
out := renderItemsTranscript(items)
|
||||
Expect(out).To(ContainSubstring("user: hi"))
|
||||
Expect(out).To(ContainSubstring("assistant: hello"))
|
||||
})
|
||||
|
||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||
// .Text, so the transcript builder must emit .Transcript too or spoken turns
|
||||
// would be dropped from the summary.
|
||||
It("renders an assistant audio turn from its transcript", func() {
|
||||
items := []*types.MessageItemUnion{
|
||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "spoken words"}}}},
|
||||
}
|
||||
Expect(renderItemsTranscript(items)).To(ContainSubstring("assistant: spoken words"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("buildSummaryMessages", func() {
|
||||
It("includes prior memory and the new transcript", func() {
|
||||
msgs := buildSummaryMessages("prior facts", "user: hi", 512)
|
||||
Expect(len(msgs)).To(Equal(2))
|
||||
Expect(msgs[0].Role).To(Equal("system"))
|
||||
Expect(msgs[1].StringContent).To(ContainSubstring("prior facts"))
|
||||
Expect(msgs[1].StringContent).To(ContainSubstring("user: hi"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("compact", func() {
|
||||
user := func(id, text string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id,
|
||||
Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: text}}}}
|
||||
}
|
||||
|
||||
It("summarizes overflow into Memory and evicts it, keeping the live tail", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||
}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "ROLLED UP"}}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal("ROLLED UP"))
|
||||
Expect(len(conv.Items)).To(Equal(4))
|
||||
Expect(itemID(conv.Items[0])).To(Equal("5"))
|
||||
// The summarizer saw the evicted turns.
|
||||
Expect(m.lastMessages[1].StringContent).To(ContainSubstring("a"))
|
||||
})
|
||||
|
||||
It("leaves Items and Memory untouched when the summarizer errors", func() {
|
||||
items := []*types.MessageItemUnion{user("1", "a"), user("2", "b"), user("3", "c")}
|
||||
conv := &Conversation{Items: items}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 2, MaxHistoryItems: 1, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictErr: errors.New("boom")}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal(""))
|
||||
Expect(len(conv.Items)).To(Equal(3))
|
||||
})
|
||||
|
||||
It("strips leaked reasoning tags from the summary via the shared extractor", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||
}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "<think>planning the summary</think>CLEAN SUMMARY"}}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal("CLEAN SUMMARY"))
|
||||
Expect(conv.Memory).ToNot(ContainSubstring("planning"))
|
||||
})
|
||||
|
||||
It("does nothing when items are at or below the trigger", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{user("1", "a")}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4}
|
||||
s.compact(conv, &fakeModel{predictResp: backend.LLMResponse{Response: "x"}})
|
||||
Expect(conv.Memory).To(Equal(""))
|
||||
Expect(len(conv.Items)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("prefixMatches", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
|
||||
It("matches when items begins with the snapshot ids in order", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("matches an empty snapshot", func() {
|
||||
Expect(prefixMatches([]*types.MessageItemUnion{user("1")}, nil)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("fails when items is shorter than the snapshot (a concurrent delete shrank the head)", func() {
|
||||
items := []*types.MessageItemUnion{user("1")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||
})
|
||||
|
||||
It("fails when the head ids differ (a concurrent delete reordered the head)", func() {
|
||||
items := []*types.MessageItemUnion{user("2"), user("3")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("summarizerModel", func() {
|
||||
It("returns the pipeline model when no summary_model is set", func() {
|
||||
m := &fakeModel{}
|
||||
s := &Session{ModelInterface: m}
|
||||
Expect(s.summarizerModel()).To(Equal(m))
|
||||
})
|
||||
|
||||
It("uses the factory (once) when summary_model is set", func() {
|
||||
pipeline := &fakeModel{}
|
||||
small := &fakeModel{}
|
||||
calls := 0
|
||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||
summarizerFactory: func() (Model, error) { calls++; return small, nil }}
|
||||
Expect(s.summarizerModel()).To(Equal(small))
|
||||
Expect(s.summarizerModel()).To(Equal(small))
|
||||
Expect(calls).To(Equal(1))
|
||||
})
|
||||
|
||||
It("falls back to the pipeline model when the factory errors", func() {
|
||||
pipeline := &fakeModel{}
|
||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||
summarizerFactory: func() (Model, error) { return nil, errors.New("nope") }}
|
||||
Expect(s.summarizerModel()).To(Equal(pipeline))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("itemID", func() {
|
||||
It("returns the id for each variant and empty for nil", func() {
|
||||
Expect(itemID(nil)).To(Equal(""))
|
||||
Expect(itemID(&types.MessageItemUnion{User: &types.MessageItemUser{ID: "u1"}})).To(Equal("u1"))
|
||||
Expect(itemID(&types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: "a1"}})).To(Equal("a1"))
|
||||
Expect(itemID(&types.MessageItemUnion{System: &types.MessageItemSystem{ID: "s1"}})).To(Equal("s1"))
|
||||
Expect(itemID(&types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: "f1"}})).To(Equal("f1"))
|
||||
Expect(itemID(&types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: "o1"}})).To(Equal("o1"))
|
||||
})
|
||||
})
|
||||
@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
if pipeline.SoundDetection == "" {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
|
||||
cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load sound detection config: %w", err)
|
||||
}
|
||||
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
}
|
||||
|
||||
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
|
||||
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
|
||||
}
|
||||
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -542,11 +542,30 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
|
||||
}
|
||||
}
|
||||
|
||||
// loadPipelineSubModel loads a pipeline sub-model config by name and follows a
|
||||
// single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
|
||||
// gets the alias target's full config (Backend, Model, ...) rather than the
|
||||
// alias stub with an empty Backend. Without this the alias survives unresolved
|
||||
// into model loading and fails downstream — notably in distributed mode with
|
||||
// "backend name is empty". Mirrors the top-level alias resolution in
|
||||
// core/http/middleware/request.go.
|
||||
func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
|
||||
cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resolved, _, err := cl.ResolveAlias(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
// returns and loads either a wrapped model or a model that support audio-to-audio
|
||||
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
|
||||
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
|
||||
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -557,7 +576,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
}
|
||||
|
||||
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -589,7 +608,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
xlog.Debug("Loading a wrapped model")
|
||||
|
||||
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
|
||||
cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
|
||||
cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -604,7 +623,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||
applyPipelineThinking(cfgLLM, *pipeline)
|
||||
|
||||
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||
cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
|
||||
52
core/http/endpoints/openai/realtime_model_alias_test.go
Normal file
52
core/http/endpoints/openai/realtime_model_alias_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// loadPipelineSubModel must resolve a pipeline sub-model that references an
|
||||
// alias (e.g. `llm: default`) one hop to the alias target's full config — so
|
||||
// the effective backend is the target's backend, not the empty backend of the
|
||||
// alias stub. This mirrors the top-level alias resolution done in
|
||||
// core/http/middleware/request.go, which the realtime pipeline previously
|
||||
// skipped (failing in distributed mode with "backend name is empty").
|
||||
var _ = Describe("loadPipelineSubModel", func() {
|
||||
It("resolves a sub-model alias one hop to the target's config", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
|
||||
// A real model config with a concrete backend.
|
||||
realLLM := `name: real-llm
|
||||
backend: llama-cpp
|
||||
parameters:
|
||||
model: real-llm.gguf
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
|
||||
|
||||
// An alias pointing at the real model.
|
||||
aliasCfg := `name: default
|
||||
alias: real-llm
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
|
||||
|
||||
cl := config.NewModelConfigLoader(tmpDir)
|
||||
Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
|
||||
|
||||
// Resolving the alias must follow the hop to the target's full config.
|
||||
resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(resolved.IsAlias()).To(BeFalse())
|
||||
Expect(resolved.Backend).To(Equal("llama-cpp"))
|
||||
|
||||
// A non-alias name must load unchanged.
|
||||
direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(direct.Backend).To(Equal("llama-cpp"))
|
||||
Expect(direct.Name).To(Equal("real-llm"))
|
||||
})
|
||||
})
|
||||
@@ -288,6 +288,21 @@ test.describe('Model Editor - Interactive Tab', () => {
|
||||
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
||||
})
|
||||
|
||||
test('pattern min_len clamps a directly-typed negative to 0', async ({ page }) => {
|
||||
const searchInput = page.locator('input[placeholder="Search fields to add..."]')
|
||||
await searchInput.fill('Custom Secret Patterns')
|
||||
const dropdown = searchInput.locator('..').locator('..')
|
||||
await dropdown.locator('div', { hasText: 'Custom Secret Patterns' }).first().click()
|
||||
|
||||
await page.locator('button', { hasText: 'Add pattern' }).click()
|
||||
// The number input's min={0} only limits the spinner arrows, not keyboard
|
||||
// entry; the editor must sanitise a typed negative so a meaningless
|
||||
// negative length floor never reaches the saved config.
|
||||
const minLen = page.locator('input[aria-label="Minimum length"]')
|
||||
await minLen.fill('-5')
|
||||
await expect(minLen).toHaveValue('0')
|
||||
})
|
||||
|
||||
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
||||
// must render WITH its values. flattenConfig used to recurse into the map,
|
||||
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
||||
@@ -329,4 +344,37 @@ test.describe('Model Editor - Interactive Tab', () => {
|
||||
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
||||
})
|
||||
|
||||
// A map cannot hold two values for one key, so renaming a row to an existing
|
||||
// group must collapse to a single row (Object.fromEntries, last write wins)
|
||||
// rather than rendering two conflicting rows that silently lose one on save.
|
||||
test('entity_actions collapses a duplicate group to a single row', async ({ page }) => {
|
||||
await page.route('**/api/models/edit/ner-model', (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
name: 'ner-model',
|
||||
config: [
|
||||
'name: ner-model',
|
||||
'backend: llama-cpp',
|
||||
'pii_detection:',
|
||||
' entity_actions:',
|
||||
' SSN: block',
|
||||
' EMAIL: mask',
|
||||
'',
|
||||
].join('\n'),
|
||||
}),
|
||||
})
|
||||
})
|
||||
|
||||
await page.goto('/app/model-editor/ner-model')
|
||||
|
||||
const groupInputs = page.locator('input[aria-label="Entity group"]')
|
||||
await expect(groupInputs).toHaveCount(2)
|
||||
|
||||
// Rename the EMAIL row to duplicate SSN; the editor collapses to one SSN row.
|
||||
await groupInputs.nth(1).fill('SSN')
|
||||
await expect(groupInputs).toHaveCount(1)
|
||||
await expect(groupInputs.nth(0)).toHaveValue('SSN')
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
34
core/http/react-ui/e2e/nodes-detail.spec.js
Normal file
34
core/http/react-ui/e2e/nodes-detail.spec.js
Normal file
@@ -0,0 +1,34 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
const ID = 'n1'
|
||||
async function mockNode(page) {
|
||||
await page.route(`**/api/nodes/${ID}`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||
body: JSON.stringify({ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy', total_vram: 24e9, available_vram: 12e9, max_replicas_per_model: 1, labels: { env: 'prod' } }) }))
|
||||
await page.route(`**/api/nodes/${ID}/models`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||
body: JSON.stringify([{ node_id: ID, model_name: 'llama-3.3', state: 'loaded', in_flight: 0, replica_index: 0 }]) }))
|
||||
await page.route(`**/api/nodes/${ID}/backends`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||
body: JSON.stringify([{ name: 'llama-cpp', is_system: true, installed_at: '2026-06-01T00:00:00Z' }]) }))
|
||||
}
|
||||
|
||||
test.describe('Node detail page', () => {
|
||||
test('renders sections for a node', async ({ page }) => {
|
||||
await mockNode(page)
|
||||
await page.goto(`/app/nodes/${ID}`)
|
||||
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.getByText('alpha')).toBeVisible()
|
||||
await expect(page.getByText('llama-3.3')).toBeVisible()
|
||||
await expect(page.getByText('llama-cpp')).toBeVisible()
|
||||
await expect(page.getByText('env=prod')).toBeVisible()
|
||||
})
|
||||
|
||||
test('is reachable by clicking a roster panel', async ({ page }) => {
|
||||
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||
body: JSON.stringify([{ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' }]) }))
|
||||
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||
await mockNode(page)
|
||||
await page.goto('/app/nodes')
|
||||
await page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('alpha').click()
|
||||
await expect(page).toHaveURL(new RegExp(`/app/nodes/${ID}$`))
|
||||
})
|
||||
})
|
||||
@@ -12,28 +12,37 @@ const NODE_NAME = 'worker-test'
|
||||
const BACKEND_NAME = 'cuda12-vllm-development'
|
||||
|
||||
async function mockDistributedNodes(page, { onDelete } = {}) {
|
||||
const nodeRecord = {
|
||||
id: NODE_ID,
|
||||
name: NODE_NAME,
|
||||
node_type: 'backend',
|
||||
address: '10.0.0.1:50051',
|
||||
http_address: '10.0.0.1:8090',
|
||||
status: 'healthy',
|
||||
total_vram: 0,
|
||||
available_vram: 0,
|
||||
total_ram: 8_000_000_000,
|
||||
available_ram: 4_000_000_000,
|
||||
gpu_vendor: '',
|
||||
last_heartbeat: new Date().toISOString(),
|
||||
created_at: new Date().toISOString(),
|
||||
updated_at: new Date().toISOString(),
|
||||
}
|
||||
|
||||
await page.route('**/api/nodes', (route) => {
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify([
|
||||
{
|
||||
id: NODE_ID,
|
||||
name: NODE_NAME,
|
||||
node_type: 'backend',
|
||||
address: '10.0.0.1:50051',
|
||||
http_address: '10.0.0.1:8090',
|
||||
status: 'healthy',
|
||||
total_vram: 0,
|
||||
available_vram: 0,
|
||||
total_ram: 8_000_000_000,
|
||||
available_ram: 4_000_000_000,
|
||||
gpu_vendor: '',
|
||||
last_heartbeat: new Date().toISOString(),
|
||||
created_at: new Date().toISOString(),
|
||||
updated_at: new Date().toISOString(),
|
||||
},
|
||||
]),
|
||||
body: JSON.stringify([nodeRecord]),
|
||||
})
|
||||
})
|
||||
|
||||
// The detail page fetches the single node via nodesApi.get(id).
|
||||
await page.route(`**/api/nodes/${NODE_ID}`, (route) => {
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(nodeRecord),
|
||||
})
|
||||
})
|
||||
|
||||
@@ -80,24 +89,18 @@ async function mockDistributedNodes(page, { onDelete } = {}) {
|
||||
})
|
||||
}
|
||||
|
||||
async function expandNodeAndWaitForBackends(page) {
|
||||
await page.goto('/app/nodes')
|
||||
// Click the row to expand it. The chevron toggle and the row both work,
|
||||
// but clicking the name cell is the most user-like.
|
||||
await page.getByText(NODE_NAME).first().click()
|
||||
// Backends, Capacity and Labels live behind a "Manage" <details>
|
||||
// disclosure (the drawer was distilled to keep at-a-glance content
|
||||
// lean — see distill refactor in the multi-replica branch). Open it
|
||||
// by clicking the summary inside the .node-manage scope so the
|
||||
// per-node backend table is in the DOM before assertions run.
|
||||
await page.locator('.node-manage > summary').first().click()
|
||||
async function openNodeDetail(page) {
|
||||
// The per-node backend table now lives on the deep-linkable detail page
|
||||
// at /app/nodes/:id (the old expand-row + "Manage" disclosure was removed
|
||||
// when the roster was restructured). Navigate straight there.
|
||||
await page.goto(`/app/nodes/${NODE_ID}`)
|
||||
await expect(page.getByRole('cell', { name: BACKEND_NAME, exact: true })).toBeVisible({ timeout: 10_000 })
|
||||
}
|
||||
|
||||
test.describe('Nodes page — per-node backend actions', () => {
|
||||
test('upgrade affordance is self-explanatory (not "Reinstall backend" with a sync icon)', async ({ page }) => {
|
||||
await mockDistributedNodes(page)
|
||||
await expandNodeAndWaitForBackends(page)
|
||||
await openNodeDetail(page)
|
||||
|
||||
// Negative: the old, ambiguous wording must not be used.
|
||||
await expect(page.locator('button[title="Reinstall backend"]')).toHaveCount(0)
|
||||
@@ -114,7 +117,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
||||
|
||||
test('per-node backend row shows a delete (trash) button next to upgrade', async ({ page }) => {
|
||||
await mockDistributedNodes(page)
|
||||
await expandNodeAndWaitForBackends(page)
|
||||
await openNodeDetail(page)
|
||||
|
||||
const deleteBtn = page.locator('button[title="Delete backend from this node"]')
|
||||
await expect(deleteBtn).toBeVisible()
|
||||
@@ -128,7 +131,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
||||
postedBody = route.request().postDataJSON()
|
||||
},
|
||||
})
|
||||
await expandNodeAndWaitForBackends(page)
|
||||
await openNodeDetail(page)
|
||||
|
||||
await page.locator('button[title="Delete backend from this node"]').click()
|
||||
|
||||
@@ -150,7 +153,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
||||
deleteCalls += 1
|
||||
},
|
||||
})
|
||||
await expandNodeAndWaitForBackends(page)
|
||||
await openNodeDetail(page)
|
||||
|
||||
await page.locator('button[title="Delete backend from this node"]').click()
|
||||
|
||||
|
||||
47
core/http/react-ui/e2e/nodes-roster.spec.js
Normal file
47
core/http/react-ui/e2e/nodes-roster.spec.js
Normal file
@@ -0,0 +1,47 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
async function mockCluster(page, nodes) {
|
||||
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(nodes) }))
|
||||
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||
}
|
||||
|
||||
test.describe('Nodes roster header', () => {
|
||||
test('shows a cluster pulse line and no stat-card grid', async ({ page }) => {
|
||||
await mockCluster(page, [
|
||||
{ id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
|
||||
{ id: 'n2', name: 'beta', node_type: 'backend', address: '10.0.0.2:50051', status: 'draining' },
|
||||
])
|
||||
await page.goto('/app/nodes')
|
||||
await expect(page.locator('.cluster-pulse')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.cluster-pulse')).toContainText('2 nodes')
|
||||
await expect(page.locator('.stat-grid')).toHaveCount(0)
|
||||
})
|
||||
|
||||
test('shows an approval callout for pending nodes', async ({ page }) => {
|
||||
await mockCluster(page, [{ id: 'n3', name: 'gamma', node_type: 'backend', address: '10.0.0.3:50051', status: 'pending' }])
|
||||
await page.goto('/app/nodes')
|
||||
await expect(page.locator('.attention-callout')).toContainText('approval', { timeout: 15_000 })
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Nodes roster panels', () => {
|
||||
test('shows model chips without clicking and filters by type', async ({ page }) => {
|
||||
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
|
||||
{ id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
|
||||
{ id: 'a1', name: 'agent-1', node_type: 'agent', address: '10.0.0.9:50051', status: 'healthy' },
|
||||
]) }))
|
||||
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
|
||||
{ node_id: 'n1', model_name: 'llama-3.3', state: 'loaded', in_flight: 2, replica_index: 0 },
|
||||
]) }))
|
||||
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||
|
||||
await page.goto('/app/nodes')
|
||||
// model chip visible without any expand click
|
||||
await expect(page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('llama-3.3')).toBeVisible({ timeout: 15_000 })
|
||||
// segmented filter: Agent shows the agent node, hides the backend node
|
||||
await page.getByRole('radio', { name: /Agent/ }).click()
|
||||
await expect(page.getByText('agent-1')).toBeVisible()
|
||||
await expect(page.getByText('alpha')).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
@@ -21,6 +21,7 @@ const PAGES = [
|
||||
['/app/backends', 'Backends'],
|
||||
['/app/settings', 'Settings'],
|
||||
['/app/nodes', 'Nodes'],
|
||||
['/app/scheduling', 'Scheduling'],
|
||||
['/app/face', 'Face recognition'],
|
||||
['/app/voice', 'Voice recognition'],
|
||||
['/app/fine-tune', 'Fine-tuning'],
|
||||
|
||||
16
core/http/react-ui/e2e/scheduling.spec.js
Normal file
16
core/http/react-ui/e2e/scheduling.spec.js
Normal file
@@ -0,0 +1,16 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
test.describe('Scheduling page', () => {
|
||||
test('renders at /app/scheduling with rules from the API', async ({ page }) => {
|
||||
await page.route('**/api/nodes/scheduling', (route) => {
|
||||
route.fulfill({
|
||||
status: 200, contentType: 'application/json',
|
||||
body: JSON.stringify([{ model_name: 'llama-3.3', spread_all: true, min_replicas: 0, max_replicas: 0 }]),
|
||||
})
|
||||
})
|
||||
await page.goto('/app/scheduling')
|
||||
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page).toHaveURL(/\/app\/scheduling$/)
|
||||
await expect(page.getByText('llama-3.3')).toBeVisible()
|
||||
})
|
||||
})
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "Verteilte Knoten",
|
||||
"subtitle": "Backend- und Agenten-Worker-Knoten verwalten"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Planung",
|
||||
"subtitle": "Modellplatzierung und Replikat-Regeln im gesamten Cluster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Verteilte KI-Berechnung",
|
||||
"subtitle": "Skalieren Sie Ihre KI-Workloads über mehrere Geräte mit Peer-to-Peer-Verteilung"
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
"backends": "Backends",
|
||||
"traces": "Traces",
|
||||
"nodes": "Knoten",
|
||||
"scheduling": "Planung",
|
||||
"swarm": "Swarm",
|
||||
"system": "System",
|
||||
"settings": "Einstellungen",
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "Distributed Nodes",
|
||||
"subtitle": "Manage backend and agent worker nodes"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Scheduling",
|
||||
"subtitle": "Model placement and replica rules across the cluster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Distributed AI Computing",
|
||||
"subtitle": "Scale your AI workloads across multiple devices with peer-to-peer distribution"
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
"backends": "Backends",
|
||||
"traces": "Traces",
|
||||
"nodes": "Nodes",
|
||||
"scheduling": "Scheduling",
|
||||
"swarm": "Swarm",
|
||||
"system": "System",
|
||||
"settings": "Settings",
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "Nodos distribuidos",
|
||||
"subtitle": "Administra nodos worker de backends y agentes"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Planificación",
|
||||
"subtitle": "Reglas de ubicación de modelos y réplicas en el clúster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Computación de IA distribuida",
|
||||
"subtitle": "Escala tus cargas de trabajo de IA en múltiples dispositivos con distribución peer-to-peer"
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
"backends": "Backends",
|
||||
"traces": "Trazas",
|
||||
"nodes": "Nodos",
|
||||
"scheduling": "Planificación",
|
||||
"swarm": "Swarm",
|
||||
"system": "Sistema",
|
||||
"settings": "Configuración",
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "Node Terdistribusi",
|
||||
"subtitle": "Kelola node backend dan node worker"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Penjadwalan",
|
||||
"subtitle": "Aturan penempatan model dan replika di seluruh kluster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Komputasi AI Terdistribusi",
|
||||
"subtitle": "Skalakan beban kerja AI Anda ke beberapa perangkat dengan distribusi peer-to-peer"
|
||||
@@ -82,4 +86,4 @@
|
||||
"title": "Penjelajah",
|
||||
"subtitle": "Jelajahi file dan konfigurasi"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
"actions": {
|
||||
"copy": "Salin",
|
||||
"regenerate": "Hasilkan ulang",
|
||||
"jumpToLatest": "Jump to latest"
|
||||
"jumpToLatest": "Lompat ke terbaru"
|
||||
},
|
||||
"streaming": {
|
||||
"transferring": "Mentransfer model...",
|
||||
@@ -115,4 +115,4 @@
|
||||
"clearAll": "Hapus semua",
|
||||
"deleteAllTitle": "Hapus semua percakapan"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"unsaved": {
|
||||
"title": "Discard unsaved changes?",
|
||||
"message": "You have unsaved changes that will be lost if you leave this page.",
|
||||
"leave": "Leave"
|
||||
"title": "Buang perubahan yang belum disimpan?",
|
||||
"message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
|
||||
"leave": "Tinggalkan Halaman"
|
||||
},
|
||||
"actions": {
|
||||
"save": "Simpan",
|
||||
|
||||
@@ -7,15 +7,15 @@
|
||||
"resourceGpu": "GPU",
|
||||
"resourceRam": "RAM",
|
||||
"greeting": {
|
||||
"morning": "Good morning",
|
||||
"afternoon": "Good afternoon",
|
||||
"evening": "Good evening",
|
||||
"night": "Working late"
|
||||
"morning": "Selamat pagi",
|
||||
"afternoon": "Selamat siang",
|
||||
"evening": "Selamat malam",
|
||||
"night": "Selamat lembur"
|
||||
},
|
||||
"statusLine": {
|
||||
"modelsLoaded_one": "{{count}} model loaded",
|
||||
"modelsLoaded_other": "{{count}} models loaded",
|
||||
"noModelsLoaded": "No models loaded",
|
||||
"modelsLoaded_one": "{{count}} model dimuat",
|
||||
"modelsLoaded_other": "{{count}} model dimuat",
|
||||
"noModelsLoaded": "Tidak ada model yang dimuat",
|
||||
"nodes_one": "{{count}} node",
|
||||
"nodes_other": "{{count}} nodes"
|
||||
},
|
||||
@@ -79,14 +79,14 @@
|
||||
},
|
||||
"connect": {
|
||||
"title": "Satu endpoint, semua API",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"nativeTitle": "API native",
|
||||
"compatTitle": "Kompatibilitas drop-in",
|
||||
"apiReference": "Referensi API lengkap",
|
||||
"copy": "Salin",
|
||||
"copied": "Disalin",
|
||||
"browse": "Browse the API",
|
||||
"hide": "Hide endpoints",
|
||||
"dismiss": "Dismiss"
|
||||
"browse": "Jelajahi API",
|
||||
"hide": "Sembunyikan endpoint",
|
||||
"dismiss": "Abaikan"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"video": "Video",
|
||||
"tts": "TTS",
|
||||
"sound": "Suara",
|
||||
"transform": "Transform"
|
||||
"transform": "Transformasi"
|
||||
}
|
||||
},
|
||||
"image": {
|
||||
@@ -30,7 +30,7 @@
|
||||
"refImagesAdded_other": "{{count}} gambar ditambahkan"
|
||||
},
|
||||
"actions": {
|
||||
"view": "View",
|
||||
"view": "Lihat",
|
||||
"generate": "Hasilkan",
|
||||
"generating": "Menghasilkan..."
|
||||
},
|
||||
@@ -153,4 +153,4 @@
|
||||
"clearConfirm": "Hapus",
|
||||
"cleared": "Riwayat dihapus"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,11 +19,11 @@
|
||||
"operate": "Operasikan"
|
||||
},
|
||||
"operate": {
|
||||
"inference": "Inference",
|
||||
"cluster": "Cluster",
|
||||
"observability": "Observability",
|
||||
"access": "Access",
|
||||
"system": "System"
|
||||
"inference": "Inferensi",
|
||||
"cluster": "Kluster",
|
||||
"observability": "Observabilitas",
|
||||
"access": "Akses",
|
||||
"system": "Sistem"
|
||||
},
|
||||
"items": {
|
||||
"home": "Beranda",
|
||||
@@ -51,6 +51,7 @@
|
||||
"backends": "Backend",
|
||||
"traces": "Trace",
|
||||
"nodes": "Node",
|
||||
"scheduling": "Penjadwalan",
|
||||
"swarm": "Swarm",
|
||||
"system": "Sistem",
|
||||
"settings": "Pengaturan",
|
||||
@@ -63,7 +64,7 @@
|
||||
"copyright": "© 2023-{{year}} {{author}}"
|
||||
},
|
||||
"console": {
|
||||
"automation": "Otomasi",
|
||||
"automation": "Automasi",
|
||||
"training": "Pelatihan"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "Nodi distribuiti",
|
||||
"subtitle": "Gestisci i nodi worker dei backend e degli agenti"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Pianificazione",
|
||||
"subtitle": "Regole di posizionamento dei modelli e delle repliche nel cluster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Calcolo AI distribuito",
|
||||
"subtitle": "Scala i tuoi carichi di lavoro AI su più dispositivi con la distribuzione peer-to-peer"
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
"backends": "Backend",
|
||||
"traces": "Tracce",
|
||||
"nodes": "Nodi",
|
||||
"scheduling": "Pianificazione",
|
||||
"swarm": "Swarm",
|
||||
"system": "Sistema",
|
||||
"settings": "Impostazioni",
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "분산 노드",
|
||||
"subtitle": "백엔드 및 에이전트 워커 노드를 관리합니다"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "스케줄링",
|
||||
"subtitle": "클러스터 전반의 모델 배치 및 복제본 규칙"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "분산 AI 컴퓨팅",
|
||||
"subtitle": "피어 투 피어 분산으로 여러 기기에 걸쳐 AI 워크로드를 확장합니다"
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
"backends": "백엔드",
|
||||
"traces": "트레이스",
|
||||
"nodes": "노드",
|
||||
"scheduling": "스케줄링",
|
||||
"swarm": "Swarm",
|
||||
"system": "시스템",
|
||||
"settings": "설정",
|
||||
|
||||
@@ -43,6 +43,10 @@
|
||||
"title": "分布式节点",
|
||||
"subtitle": "管理后端和智能体工作节点"
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "调度",
|
||||
"subtitle": "集群中的模型放置和副本规则"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "分布式 AI 计算",
|
||||
"subtitle": "通过点对点分发将您的 AI 工作负载扩展到多个设备"
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
"backends": "后端",
|
||||
"traces": "追踪",
|
||||
"nodes": "节点",
|
||||
"scheduling": "调度",
|
||||
"swarm": "Swarm",
|
||||
"system": "系统",
|
||||
"settings": "设置",
|
||||
|
||||
@@ -8471,3 +8471,56 @@ select.input {
|
||||
.status-pill--error .status-pill__dot { background: var(--color-error); }
|
||||
.status-pill--info .status-pill__dot { background: var(--color-info); }
|
||||
.status-pill--muted .status-pill__dot { background: var(--color-text-muted); }
|
||||
|
||||
/* Nodes: cluster pulse + attention callout (replaces the stat-card strip) */
|
||||
.cluster-pulse {
|
||||
font-size: var(--text-sm);
|
||||
color: var(--color-text-muted);
|
||||
margin: 0 0 var(--spacing-lg);
|
||||
}
|
||||
.cluster-pulse__strong { color: var(--color-text-primary); font-weight: 600; }
|
||||
|
||||
.attention-callout {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: var(--spacing-md);
|
||||
padding: var(--spacing-sm) var(--spacing-md);
|
||||
border-radius: var(--radius-md);
|
||||
margin-bottom: var(--spacing-lg);
|
||||
font-size: var(--text-sm);
|
||||
}
|
||||
.attention-callout--warn {
|
||||
background: var(--color-warning-light);
|
||||
border: 1px solid var(--color-warning-border);
|
||||
color: var(--color-text-primary);
|
||||
}
|
||||
.attention-callout--error {
|
||||
background: var(--color-error-light);
|
||||
border: 1px solid var(--color-error-border);
|
||||
color: var(--color-text-primary);
|
||||
}
|
||||
|
||||
/* Node roster panels (Nodes page) */
|
||||
.node-roster { display: flex; flex-direction: column; gap: var(--spacing-sm); }
|
||||
.node-panel {
|
||||
background: var(--color-bg-secondary);
|
||||
border: 1px solid var(--color-border-subtle);
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
.node-panel__main { padding: var(--spacing-md) var(--spacing-lg); cursor: pointer; }
|
||||
.node-panel:hover { border-color: var(--color-border); }
|
||||
.node-panel__head { display: flex; align-items: flex-start; justify-content: space-between; gap: var(--spacing-md); }
|
||||
.node-panel__id { display: flex; align-items: center; gap: var(--spacing-sm); flex-wrap: wrap; }
|
||||
.node-panel__name { font-weight: 600; }
|
||||
.node-panel__meta { display: flex; gap: var(--spacing-lg); margin-top: var(--spacing-sm); color: var(--color-text-muted); font-size: var(--text-xs); }
|
||||
.node-panel__models { display: flex; flex-wrap: wrap; gap: 6px; margin-top: var(--spacing-sm); }
|
||||
.model-chip {
|
||||
display: inline-flex; align-items: center; gap: 5px;
|
||||
font-family: var(--font-mono); font-size: 0.6875rem;
|
||||
padding: 2px 8px; border-radius: var(--radius-sm); border: 1px solid;
|
||||
}
|
||||
.model-chip__dot { width: 6px; height: 6px; border-radius: 50%; }
|
||||
.model-chip__state { opacity: 0.85; font-style: normal; }
|
||||
.node-filter { margin-bottom: var(--spacing-lg); }
|
||||
.node-detail__metrics { display: flex; gap: var(--spacing-xl); margin: var(--spacing-md) 0 var(--spacing-lg); flex-wrap: wrap; }
|
||||
|
||||
@@ -74,7 +74,18 @@ export default function PatternListEditor({ value, onChange }) {
|
||||
min={0}
|
||||
value={r.min_len || 0}
|
||||
title="Minimum match length (0 = no floor)"
|
||||
onChange={e => update(i, { min_len: parseInt(e.target.value, 10) || 0 })}
|
||||
// min={0} only constrains the spinner, not keyboard entry. Clamp a
|
||||
// typed negative to 0 (a negative floor is meaningless and would
|
||||
// disable the length filter). When we clamp, force the DOM value
|
||||
// too: the resulting 0->0 state change is a no-op, so React's
|
||||
// controlled input would otherwise keep displaying the rejected
|
||||
// "-5" even though the saved value is 0.
|
||||
onChange={e => {
|
||||
const parsed = parseInt(e.target.value, 10)
|
||||
const n = Math.max(0, parsed || 0)
|
||||
if (parsed < 0) e.target.value = String(n)
|
||||
update(i, { min_len: n })
|
||||
}}
|
||||
style={{ width: 80, fontSize: '0.8125rem' }}
|
||||
aria-label="Minimum length"
|
||||
/>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user