mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
Compare commits
1 Commits
fix/https-
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d36e980be |
7
.github/backend-matrix.yml
vendored
7
.github/backend-matrix.yml
vendored
@@ -4974,9 +4974,6 @@ includeDarwin:
|
||||
- backend: "kitten-tts"
|
||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||
build-type: "mps"
|
||||
- backend: "liquid-audio"
|
||||
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
||||
build-type: "mps"
|
||||
- backend: "piper"
|
||||
tag-suffix: "-metal-darwin-arm64-piper"
|
||||
build-type: "metal"
|
||||
@@ -4993,10 +4990,6 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "supertonic"
|
||||
tag-suffix: "-metal-darwin-arm64-supertonic"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "local-store"
|
||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||
build-type: "metal"
|
||||
|
||||
2
.github/workflows/backend.yml
vendored
2
.github/workflows/backend.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
2
.github/workflows/backend_build.yml
vendored
2
.github/workflows/backend_build.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
|
||||
2
.github/workflows/backend_build_darwin.yml
vendored
2
.github/workflows/backend_build_darwin.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
HOMEBREW_NO_ANALYTICS: '1'
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
|
||||
2
.github/workflows/backend_merge.yml
vendored
2
.github/workflows/backend_merge.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
||||
# keepalive cleanup script) but none of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
|
||||
2
.github/workflows/backend_pr.yml
vendored
2
.github/workflows/backend_pr.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
2
.github/workflows/base-images.yml
vendored
2
.github/workflows/base-images.yml
vendored
@@ -127,7 +127,7 @@ jobs:
|
||||
# the original l4t matrix entry which set skip-drivers: 'true'.
|
||||
skip-drivers: 'true'
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: false
|
||||
- name: Free disk space
|
||||
|
||||
6
.github/workflows/build-test.yaml
vendored
6
.github/workflows/build-test.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
bump:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
|
||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -92,7 +92,7 @@ jobs:
|
||||
file: "backend/go/vibevoice-cpp/Makefile"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
- name: Bump dependencies 🔧
|
||||
id: bump
|
||||
run: |
|
||||
@@ -128,7 +128,7 @@ jobs:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
- name: Bump vLLM cu130 wheel pin 🔧
|
||||
id: bump
|
||||
run: |
|
||||
|
||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
- repository: "mudler/LocalAI"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||
|
||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- name: Install dependencies
|
||||
|
||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
|
||||
2
.github/workflows/gallery-agent.yaml
vendored
2
.github/workflows/gallery-agent.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Cache Intel images
|
||||
uses: docker/build-push-action@v7
|
||||
|
||||
2
.github/workflows/gh-pages.yml
vendored
2
.github/workflows/gh-pages.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
HUGO_VERSION: "0.146.3"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # needed for enableGitInfo
|
||||
submodules: true
|
||||
|
||||
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@@ -80,7 +80,7 @@ jobs:
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Configure apt mirror on runner
|
||||
id: apt_mirror
|
||||
|
||||
2
.github/workflows/image_merge.yml
vendored
2
.github/workflows/image_merge.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
||||
# script). Skips the rest of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
|
||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
golangci-lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
# Full history so golangci-lint's new-from-merge-base can reach
|
||||
# origin/master and compute the diff against it.
|
||||
|
||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
GO111MODULE: on
|
||||
steps:
|
||||
- name: Checkout Source
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
|
||||
86
.github/workflows/test-extra.yml
vendored
86
.github/workflows/test-extra.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
- name: Install dependencies
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -158,7 +158,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -240,7 +240,7 @@ jobs:
|
||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||
# df -h
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -265,7 +265,7 @@ jobs:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -288,7 +288,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -309,7 +309,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -330,7 +330,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -351,7 +351,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -373,7 +373,7 @@ jobs:
|
||||
# timeout-minutes: 45
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -394,7 +394,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -415,7 +415,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -436,7 +436,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -462,7 +462,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -484,7 +484,7 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -513,7 +513,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -530,7 +530,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -552,7 +552,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -579,7 +579,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -604,7 +604,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -625,7 +625,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -645,7 +645,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -664,7 +664,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -681,7 +681,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -698,7 +698,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -741,7 +741,7 @@ jobs:
|
||||
# timeout-minutes: 90
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -783,7 +783,7 @@ jobs:
|
||||
# timeout-minutes: 90
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v7
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
@@ -808,7 +808,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -840,7 +840,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -876,7 +876,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -915,7 +915,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -952,7 +952,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -987,7 +987,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -1013,7 +1013,7 @@ jobs:
|
||||
timeout-minutes: 150
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1042,7 +1042,7 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
@@ -1058,7 +1058,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1091,7 +1091,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1114,7 +1114,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
@@ -1140,7 +1140,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Free disk space
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
|
||||
2
.github/workflows/tests-aio.yml
vendored
2
.github/workflows/tests-aio.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
|
||||
2
.github/workflows/tests-e2e.yml
vendored
2
.github/workflows/tests-e2e.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
go-version: ['1.25.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
2
.github/workflows/tests-pii-ner-e2e.yml
vendored
2
.github/workflows/tests-pii-ner-e2e.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
go-version: ['1.25.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Free disk space
|
||||
|
||||
2
.github/workflows/tests-ui-e2e.yml
vendored
2
.github/workflows/tests-ui-e2e.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Configure apt mirror on runner
|
||||
|
||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- uses: actions/checkout@v6
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- uses: actions/setup-go@v5
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
|
||||
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
|
||||
LLAMA_VERSION?=7c082bc417bbe53210a83df4ba5b49e18ce6193c
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
|
||||
CRISPASR_VERSION?=7a8cb80907341c0204bd0488c1244764f4163883
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
@@ -944,13 +943,7 @@ func InitializeONNXRuntime() error {
|
||||
}
|
||||
}
|
||||
if libPath == "" {
|
||||
// LocalAI: default to the platform-native shared library
|
||||
// extension when nothing else is found (dyld vs ld.so).
|
||||
if runtime.GOOS == "darwin" {
|
||||
libPath = "/usr/local/lib/libonnxruntime.dylib"
|
||||
} else {
|
||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||
}
|
||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||
}
|
||||
}
|
||||
ort.SetSharedLibraryPath(libPath)
|
||||
|
||||
@@ -32,10 +32,6 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ $(uname -s) = "Darwin" ]; then
|
||||
# macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
|
||||
# run.sh); there is no ld.so loader nor glibc to bundle.
|
||||
echo "Detected Darwin"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
|
||||
@@ -3,19 +3,12 @@ set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS uses dyld: there is no ld.so loader, and the search path env
|
||||
# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||
fi
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||
fi
|
||||
|
||||
exec $CURDIR/supertonic "$@"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
||||
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -1284,7 +1284,6 @@
|
||||
nvidia-cuda-13: "cuda13-liquid-audio"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||
metal: "metal-liquid-audio"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||
- &qwen-tts
|
||||
urls:
|
||||
@@ -1570,7 +1569,6 @@
|
||||
- TTS
|
||||
capabilities:
|
||||
default: "cpu-supertonic"
|
||||
metal: "metal-supertonic"
|
||||
- !!merge <<: *neutts
|
||||
name: "neutts-development"
|
||||
capabilities:
|
||||
@@ -4614,7 +4612,6 @@
|
||||
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||
metal: "metal-liquid-audio-development"
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cpu-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
||||
@@ -4625,16 +4622,6 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "metal-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "metal-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda12-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
||||
@@ -5497,7 +5484,6 @@
|
||||
name: "supertonic-development"
|
||||
capabilities:
|
||||
default: "cpu-supertonic-development"
|
||||
metal: "metal-supertonic-development"
|
||||
- !!merge <<: *supertonic
|
||||
name: "cpu-supertonic"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
||||
@@ -5508,13 +5494,3 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "metal-supertonic"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "metal-supertonic-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-supertonic
|
||||
|
||||
@@ -14,11 +14,5 @@ else
|
||||
fi
|
||||
|
||||
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
||||
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
||||
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
||||
# it on the uv path; Linux/CUDA resolution is unchanged.
|
||||
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
installRequirements
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
|
||||
Submodule backend/rust/kokoros/sources/Kokoros updated: 7089168f0c...b54354b860
@@ -215,7 +215,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
||||
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
||||
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
||||
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
|
||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
||||
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
||||
@@ -336,15 +335,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
|
||||
// Request-side default redaction reads this live via
|
||||
// ResolvePIIPolicy, so a file edit takes effect on the next chat
|
||||
// request. The MITM listener resolves its per-host detector map
|
||||
// once at start, so a raw file edit reaches cloud-proxy traffic
|
||||
// only after a restart or a POST /api/settings (which rebuilds
|
||||
// the listener) — the admin UI uses the latter.
|
||||
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
||||
}
|
||||
|
||||
@@ -109,52 +109,6 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// Instance-wide default PII detectors. The file is the only source (no
|
||||
// env var), and the loader runs immediately before startMITMIfConfigured,
|
||||
// so a regression here means the cloud-proxy MITM listener resolves an
|
||||
// empty detector set at boot and forwards intercepted traffic unredacted —
|
||||
// even though pii_default_detectors is on disk and the MITM model has PII
|
||||
// enabled. It also breaks request-side default redaction the same way.
|
||||
Describe("PII default detectors", func() {
|
||||
It("loads pii_default_detectors from the file", func() {
|
||||
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
|
||||
})
|
||||
|
||||
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
|
||||
cfg := &config.ApplicationConfig{
|
||||
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
|
||||
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
|
||||
}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
|
||||
})
|
||||
})
|
||||
|
||||
// The live file watcher applies pii_default_detectors on a runtime change
|
||||
// the same way it handles galleries/threads/etc.: env-set values (current
|
||||
// == startup snapshot) are left alone, otherwise the file value is applied
|
||||
// to the live config so request-side default redaction picks it up without
|
||||
// a restart.
|
||||
Describe("file watcher: pii_default_detectors", func() {
|
||||
It("applies a changed file value to the live config", func() {
|
||||
startup := config.ApplicationConfig{} // no env baseline
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
|
||||
})
|
||||
|
||||
It("leaves an env-controlled value untouched", func() {
|
||||
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
|
||||
})
|
||||
})
|
||||
|
||||
// The Agent Pool block has a mix of zero and non-zero defaults
|
||||
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
||||
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
||||
|
||||
@@ -750,20 +750,6 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.MITMListen = *settings.MITMListen
|
||||
}
|
||||
|
||||
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
|
||||
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
|
||||
// — apply it only when the env/CLI left the value empty, mirroring the
|
||||
// "env > file" precedence used for the other fields. This must land before
|
||||
// startMITMIfConfigured (called right after this loader): the cloud-proxy
|
||||
// listener resolves each intercept host's detectors once at start via
|
||||
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
|
||||
// falls back to these defaults. Without it the listener (and request-side
|
||||
// default redaction) starts with an empty detector set and forwards
|
||||
// traffic unredacted even though pii_default_detectors is on disk.
|
||||
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
|
||||
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
|
||||
// Backend upgrade flags
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
if !options.AutoUpgradeBackends {
|
||||
|
||||
@@ -140,7 +140,7 @@ type RunCMD struct {
|
||||
OIDCIssuer string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
|
||||
OIDCClientID string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
|
||||
OIDCClientSecret string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
|
||||
ExternalBaseURL string `env:"LOCALAI_BASE_URL" help:"External base URL of this instance (e.g. https://localhost:8080). Used for OAuth callbacks and self-referential links (generated images/videos, job status). When unset, derived from X-Forwarded-Proto/Host or Forwarded headers." group:"api"`
|
||||
AuthBaseURL string `env:"LOCALAI_BASE_URL" help:"Base URL for OAuth callbacks (e.g. http://localhost:8080)" group:"auth"`
|
||||
AuthAdminEmail string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
|
||||
AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
|
||||
DisableLocalAuth bool `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
|
||||
@@ -181,8 +181,6 @@ type RunCMD struct {
|
||||
// Cloud-proxy MITM listener (off by default).
|
||||
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
||||
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
||||
|
||||
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -245,7 +243,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithAPIAddress(r.Address),
|
||||
config.WithMITMListen(r.MITMListen),
|
||||
config.WithMITMCADir(r.MITMCADir),
|
||||
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
|
||||
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
||||
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
||||
tunnelEnvVar := strings.Join(tunnels, ",")
|
||||
@@ -503,6 +500,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
|
||||
opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
|
||||
}
|
||||
if r.AuthBaseURL != "" {
|
||||
opts = append(opts, config.WithAuthBaseURL(r.AuthBaseURL))
|
||||
}
|
||||
if r.AuthAdminEmail != "" {
|
||||
opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
|
||||
}
|
||||
@@ -520,12 +520,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Applied unconditionally: the external base URL governs all self-referential
|
||||
// links (not just OAuth callbacks), so it must take effect even when auth is off.
|
||||
if r.ExternalBaseURL != "" {
|
||||
opts = append(opts, config.WithExternalBaseURL(r.ExternalBaseURL))
|
||||
}
|
||||
|
||||
if idleWatchDog || busyWatchDog {
|
||||
opts = append(opts, config.EnableWatchDog)
|
||||
if idleWatchDog {
|
||||
|
||||
@@ -49,13 +49,6 @@ type ApplicationConfig struct {
|
||||
P2PNetworkID string
|
||||
Federated bool
|
||||
|
||||
// ExternalBaseURL is the externally visible base URL of this instance
|
||||
// (scheme+host[:port]), set via LOCALAI_BASE_URL. When non-empty it is
|
||||
// authoritative for every self-referential URL LocalAI emits (OAuth
|
||||
// callbacks, generated image/video links, async job StatusURLs),
|
||||
// overriding proxy-header detection. Empty = derive from request headers.
|
||||
ExternalBaseURL string
|
||||
|
||||
// DisableStats turns off per-request token tracking. By default the
|
||||
// routing module's billing recorder runs in every mode (including
|
||||
// no-auth single-user) so dashboards and `/api/usage` are immediately
|
||||
@@ -203,6 +196,7 @@ type AuthConfig struct {
|
||||
OIDCIssuer string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
|
||||
OIDCClientID string
|
||||
OIDCClientSecret string
|
||||
BaseURL string // for OAuth callback URLs (e.g. "http://localhost:8080")
|
||||
AdminEmail string // auto-promote to admin on login
|
||||
RegistrationMode string // "open", "approval" (default when empty), "invite"
|
||||
DisableLocalAuth bool // disable local email/password registration and login
|
||||
@@ -718,18 +712,6 @@ func WithMITMCADir(dir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
|
||||
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
|
||||
// models) that names no pii.detectors of its own. CLI/env:
|
||||
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
|
||||
// runtime_settings.json / the Middleware UI; a non-empty value takes
|
||||
// precedence over the file (env > file).
|
||||
func WithPIIDefaultDetectors(detectors []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.PIIDefaultDetectors = detectors
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
@@ -956,9 +938,9 @@ func WithAuthGitHubClientSecret(clientSecret string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithExternalBaseURL(url string) AppOption {
|
||||
func WithAuthBaseURL(baseURL string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ExternalBaseURL = url
|
||||
o.Auth.BaseURL = baseURL
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -537,36 +537,6 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
||||
Component: "number",
|
||||
Order: 79,
|
||||
},
|
||||
"pipeline.compaction.enabled": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Enabled",
|
||||
Description: "Fold conversation items that age out of the live window (Max History Items) into a rolling summary instead of dropping them, so long realtime sessions stay cheap without losing earlier context. Off by default.",
|
||||
Component: "toggle",
|
||||
Order: 80,
|
||||
},
|
||||
"pipeline.compaction.trigger_items": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Trigger Items",
|
||||
Description: "High-water mark: once the live conversation exceeds this many items, the overflow above Max History Items is summarized and evicted. Must be greater than Max History Items; defaults to twice it. The gap controls how often summarization runs.",
|
||||
Component: "number",
|
||||
Order: 81,
|
||||
},
|
||||
"pipeline.compaction.summary_model": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Summary Model",
|
||||
Description: "Optional smaller/cheaper model used to produce the rolling summary. Empty reuses the pipeline's own LLM. On CPU, a tiny model here keeps compaction from competing with the conversation LLM.",
|
||||
Component: "input",
|
||||
Advanced: true,
|
||||
Order: 82,
|
||||
},
|
||||
"pipeline.compaction.max_summary_tokens": {
|
||||
Section: "pipeline",
|
||||
Label: "Compaction Max Summary Tokens",
|
||||
Description: "Advisory cap on the rolling summary length (fed to the summarizer prompt). Defaults to 512.",
|
||||
Component: "number",
|
||||
Advanced: true,
|
||||
Order: 83,
|
||||
},
|
||||
|
||||
// --- Functions ---
|
||||
"function.grammar.parallel_calls": {
|
||||
|
||||
@@ -641,32 +641,11 @@ type Pipeline struct {
|
||||
// context fills.
|
||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||
|
||||
// Compaction folds conversation items that age out of the live window
|
||||
// (max_history_items) into a rolling summary instead of dropping them, so
|
||||
// long realtime sessions stay cheap without losing earlier context. Nil
|
||||
// (block absent) means disabled, preserving existing behavior.
|
||||
Compaction *PipelineCompaction `yaml:"compaction,omitempty" json:"compaction,omitempty"`
|
||||
|
||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||
// (block absent) means no gate, preserving existing behavior.
|
||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||
}
|
||||
|
||||
// PipelineCompaction configures summarize-then-drop for a realtime pipeline.
|
||||
type PipelineCompaction struct {
|
||||
// Enabled turns summarize-then-drop on. Default false.
|
||||
Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
|
||||
// TriggerItems is the high-water mark: once live items exceed it, overflow
|
||||
// above max_history_items is summarized and evicted. Must exceed
|
||||
// max_history_items; clamped up if not. Default: 2x max_history_items.
|
||||
TriggerItems int `yaml:"trigger_items,omitempty" json:"trigger_items,omitempty"`
|
||||
// SummaryModel optionally names a smaller/cheaper model for the summary
|
||||
// call. Empty uses the pipeline's own LLM.
|
||||
SummaryModel string `yaml:"summary_model,omitempty" json:"summary_model,omitempty"`
|
||||
// MaxSummaryTokens advises the summary length (fed to the prompt). Default 512.
|
||||
MaxSummaryTokens int `yaml:"max_summary_tokens,omitempty" json:"max_summary_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
||||
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
||||
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||
@@ -34,35 +33,6 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
||||
return settings, nil
|
||||
}
|
||||
|
||||
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
||||
// receiver, leaving the receiver's value untouched wherever overlay left a
|
||||
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
||||
// can be told apart from "absent" (see the type doc), which makes this a
|
||||
// faithful partial update: a caller that submits only the field it owns
|
||||
// changes exactly that field and never clobbers unrelated settings.
|
||||
//
|
||||
// This is the read-modify-write contract the persistence helpers exist for.
|
||||
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
||||
// on top, and writes the result — so a focused admin page that POSTs only its
|
||||
// own field (the Middleware page sends only mitm_listen; the detector table
|
||||
// only pii_default_detectors) no longer nulls every other setting.
|
||||
//
|
||||
// Reflection keeps the merge total over the struct: a field added to
|
||||
// RuntimeSettings later is merged automatically, so the persistence path can
|
||||
// never silently drop a new setting the way a hand-maintained field list
|
||||
// would. Non-pointer fields (none today) are skipped — they cannot express
|
||||
// "absent", so the receiver wins.
|
||||
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
||||
dst := reflect.ValueOf(s).Elem()
|
||||
src := reflect.ValueOf(overlay)
|
||||
for i := 0; i < src.NumField(); i++ {
|
||||
f := src.Field(i)
|
||||
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
||||
dst.Field(i).Set(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||
// runtime_settings.json with restricted permissions (it may carry API
|
||||
// keys and P2P tokens).
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
)
|
||||
|
||||
func strPtr(s string) *string { return &s }
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
var (
|
||||
@@ -52,47 +51,6 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
||||
// relies on: a focused admin page POSTs only the field it owns, and the
|
||||
// handler reads the on-disk settings and overlays the request on top.
|
||||
// Without it, the body would be written verbatim and every field the
|
||||
// caller omitted would be nulled (the reported regression: changing
|
||||
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
||||
Describe("MergeNonNil partial update", func() {
|
||||
It("overlays set fields and preserves unset ones", func() {
|
||||
base := config.RuntimeSettings{
|
||||
MITMListen: strPtr(":9000"),
|
||||
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
||||
WatchdogIdleEnabled: boolPtr(true),
|
||||
ApiKeys: &[]string{"persisted-key"},
|
||||
PIIDefaultDetectors: &[]string{"det-a"},
|
||||
}
|
||||
|
||||
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
||||
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
||||
base.MergeNonNil(overlay)
|
||||
|
||||
Expect(base.MITMListen).ToNot(BeNil())
|
||||
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
||||
// Everything the overlay left unset must survive untouched.
|
||||
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
||||
Expect(*base.Galleries).To(HaveLen(1))
|
||||
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
||||
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
||||
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
||||
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
||||
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
It("lets an explicit empty slice clear a field", func() {
|
||||
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
||||
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
||||
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
||||
})
|
||||
})
|
||||
|
||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||
// MITM listener address must survive a write/read round trip so the
|
||||
// next process restart can bring the listener back up. (Intercept
|
||||
|
||||
@@ -149,18 +149,6 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
|
||||
e.Pre(httpMiddleware.StripPathPrefix())
|
||||
|
||||
// Stamp the configured external base URL into each request context so
|
||||
// middleware.BaseURL can treat it as authoritative for self-referential
|
||||
// links. Registered as Pre so it runs before routing and handlers.
|
||||
if extBaseURL := application.ApplicationConfig().ExternalBaseURL; extBaseURL != "" {
|
||||
e.Pre(func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
c.Set("_external_base_url", extBaseURL)
|
||||
return next(c)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
e.Pre(middleware.RemoveTrailingSlash())
|
||||
|
||||
if application.ApplicationConfig().MachineTag != "" {
|
||||
|
||||
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
||||
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
|
||||
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
|
||||
if payload.UpdateInterval < 1 {
|
||||
payload.UpdateInterval = 60
|
||||
}
|
||||
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Regression for #10443: agent/collection names carry a "legacy-api-key:"
|
||||
// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
|
||||
// such paths via URL.RawPath and stores the path-param value still escaped, so
|
||||
// handlers must URL-decode it before looking the collection up in the store -
|
||||
// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
|
||||
var _ = Describe("decodedParam", func() {
|
||||
var e *echo.Echo
|
||||
|
||||
BeforeEach(func() {
|
||||
e = echo.New()
|
||||
})
|
||||
|
||||
// route runs a request through Echo's real router so the path param is
|
||||
// populated exactly as it would be in production, then returns the decoded
|
||||
// value the handler would observe.
|
||||
route := func(rawPath string) string {
|
||||
var got string
|
||||
e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
|
||||
got = decodedParam(c, "name")
|
||||
return c.NoContent(http.StatusOK)
|
||||
})
|
||||
req := httptest.NewRequest(http.MethodGet, rawPath, nil)
|
||||
rec := httptest.NewRecorder()
|
||||
e.ServeHTTP(rec, req)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
return got
|
||||
}
|
||||
|
||||
It("decodes a percent-encoded colon in the collection name", func() {
|
||||
got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
|
||||
Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
|
||||
})
|
||||
|
||||
It("leaves an unencoded name untouched", func() {
|
||||
got := route("/api/agents/collections/PlainCollection/upload")
|
||||
Expect(got).To(Equal("PlainCollection"))
|
||||
})
|
||||
})
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
@@ -34,22 +33,6 @@ func getUserID(c echo.Context) string {
|
||||
return user.ID
|
||||
}
|
||||
|
||||
// decodedParam returns the named path parameter, URL-decoding it.
|
||||
//
|
||||
// Echo routes a request via URL.RawPath whenever the path contains
|
||||
// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
|
||||
// matched path-param value raw/escaped. Agent and collection names carry a
|
||||
// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
|
||||
// longer matches the stored name. Callers must unescape before lookups.
|
||||
// Falls back to the raw value if it isn't valid percent-encoding.
|
||||
func decodedParam(c echo.Context, name string) string {
|
||||
raw := c.Param(name)
|
||||
if decoded, err := url.PathUnescape(raw); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
// isAdminUser returns true if the authenticated user has admin role.
|
||||
func isAdminUser(c echo.Context) bool {
|
||||
user := auth.GetUser(c)
|
||||
@@ -144,7 +127,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
|
||||
statuses := svc.ListAgentsForUser(userID)
|
||||
active, exists := statuses[name]
|
||||
@@ -159,7 +142,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
var cfg state.AgentConfig
|
||||
if err := c.Bind(&cfg); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
@@ -178,7 +161,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
if err := svc.DeleteAgentForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -190,7 +173,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
cfg := svc.GetAgentConfigForUser(userID, name)
|
||||
if cfg == nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
|
||||
@@ -203,7 +186,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -214,7 +197,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -225,7 +208,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
|
||||
history := svc.GetAgentStatusForUser(userID, name)
|
||||
if history == nil {
|
||||
@@ -258,7 +241,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
|
||||
history, err := svc.GetAgentObservablesForUser(userID, name)
|
||||
if err != nil {
|
||||
@@ -278,7 +261,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -290,7 +273,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
var payload struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
@@ -319,7 +302,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
|
||||
// Try local SSE manager first
|
||||
manager := svc.GetSSEManagerForUser(userID, name)
|
||||
@@ -351,7 +334,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := decodedParam(c, "name")
|
||||
name := c.Param("name")
|
||||
data, err := svc.ExportAgentForUser(userID, name)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -108,18 +110,6 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
})
|
||||
}
|
||||
|
||||
// Read whatever is already persisted: it is both the source of truth
|
||||
// for branding asset filenames (below) and the base we merge this
|
||||
// request onto before writing. A read failure must not let a Save
|
||||
// silently discard the existing settings — surface it instead.
|
||||
persisted, err := appConfig.ReadPersistedSettings()
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to read existing settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// Branding asset filenames are owned exclusively by
|
||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||
// round-trips them via GET /api/settings, but its local state is stale
|
||||
@@ -128,9 +118,11 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
// at page open. Replace whatever the body sent for these three fields
|
||||
// with the values currently on disk so /api/settings can never
|
||||
// regress them.
|
||||
settings.LogoFile = persisted.LogoFile
|
||||
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
||||
settings.FaviconFile = persisted.FaviconFile
|
||||
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
||||
settings.LogoFile = existing.LogoFile
|
||||
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
||||
settings.FaviconFile = existing.FaviconFile
|
||||
}
|
||||
|
||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||
// merged env+runtime list. When the user clicks Save, the same merged
|
||||
@@ -153,17 +145,16 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
settings.ApiKeys = &runtimeOnly
|
||||
}
|
||||
|
||||
// Persist as a partial update: overlay only the fields this request set
|
||||
// onto the settings already on disk. Focused admin pages POST just the
|
||||
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
||||
// detector table only pii_default_detectors), so writing the request
|
||||
// body verbatim would null every unrelated setting (the no-omitempty
|
||||
// api_keys / pii_default_detectors fields even round-trip as JSON
|
||||
// null). The full Settings page still round-trips every field, so its
|
||||
// Save is unchanged.
|
||||
toPersist := persisted
|
||||
toPersist.MergeNonNil(settings)
|
||||
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to marshal settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to write settings file: " + err.Error(),
|
||||
@@ -271,14 +262,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild the MITM listener when its address OR the instance-wide
|
||||
// default detectors change. The per-host detector map is resolved once
|
||||
// at listener start (startMITMLocked → ResolvePIIPolicy), so a
|
||||
// default-detector change is otherwise invisible to cloud-proxy traffic
|
||||
// until the next restart — an admin toggling a default detector would
|
||||
// see no redaction. RestartMITM is a no-op when the listener is
|
||||
// disabled (empty address).
|
||||
if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
|
||||
if settings.MITMListen != nil {
|
||||
if err := app.RestartMITM(); err != nil {
|
||||
xlog.Error("Failed to restart MITM proxy", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
|
||||
@@ -52,10 +52,6 @@ var _ = Describe("Settings endpoints", func() {
|
||||
// Settings are persisted here; set after construction since there's no
|
||||
// dedicated AppOption for it.
|
||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
||||
// mitm_listen, which starts the listener and writes a CA; without this
|
||||
// it defaults to ./mitm-ca and litters the package source tree.
|
||||
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
||||
|
||||
e = echo.New()
|
||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||
@@ -113,57 +109,6 @@ var _ = Describe("Settings endpoints", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
||||
// the one field it owns — mitm_listen. The old handler wrote the request
|
||||
// body verbatim, so every other persisted setting was dropped (and
|
||||
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
||||
// null). A partial POST must now merge onto what is already on disk.
|
||||
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
||||
// First save establishes a fuller settings file (as the full Settings
|
||||
// page would): galleries, an API key, and the MITM listener. The
|
||||
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
||||
// ephemeral free port rather than a fixed one that may be in use.
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
// The Middleware proxy tab then changes only the listen address — the
|
||||
// exact partial body that nulled everything else before the fix.
|
||||
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var ondisk config.RuntimeSettings
|
||||
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
||||
|
||||
Expect(ondisk.MITMListen).ToNot(BeNil())
|
||||
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
||||
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
||||
Expect(*ondisk.Galleries).To(HaveLen(1))
|
||||
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
||||
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
||||
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
||||
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
// The MITM listener resolves its per-host PII detectors once at start
|
||||
// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
|
||||
// only when mitm_listen changed. So an admin toggling a default detector
|
||||
// (the Middleware detector table POSTs only pii_default_detectors) left
|
||||
// cloud-proxy traffic unredacted until the next reboot. A
|
||||
// pii_default_detectors change must now rebuild the listener.
|
||||
It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
srv1 := app.MITMServer()
|
||||
Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
|
||||
|
||||
rec = post(`{"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
|
||||
"a default-detector change must restart the listener so it picks up the new detectors")
|
||||
})
|
||||
|
||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||
// React master toggle must start the live watchdog immediately, without a
|
||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"net/http"
|
||||
@@ -135,18 +134,6 @@ type Session struct {
|
||||
// pairs are kept together so we never feed an orphaned tool result.
|
||||
MaxHistoryItems int
|
||||
|
||||
// Compaction settings resolved from pipeline.compaction (see resolveCompaction).
|
||||
CompactionEnabled bool
|
||||
CompactionTrigger int
|
||||
SummaryModel string
|
||||
MaxSummaryTokens int
|
||||
|
||||
// summarizerFactory lazily builds the model used for compaction summaries
|
||||
// when summary_model is configured; nil means reuse the pipeline LLM.
|
||||
summarizerFactory func() (Model, error)
|
||||
summarizerOnce sync.Once
|
||||
summarizerCached Model
|
||||
|
||||
// AssistantExecutor is non-nil when the session opted into the in-process
|
||||
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
||||
// executor's catalog are run inproc and their output is fed back to the
|
||||
@@ -254,12 +241,6 @@ type Conversation struct {
|
||||
ID string
|
||||
Items []*types.MessageItemUnion
|
||||
Lock sync.Mutex
|
||||
// Memory is the rolling summary of items already evicted by compaction. It
|
||||
// is kept out of Items (so trimRealtimeItems never drops it) and rendered
|
||||
// as a system message right after the session instructions.
|
||||
Memory string
|
||||
// compacting ensures at most one background compaction runs per conversation.
|
||||
compacting atomic.Bool
|
||||
}
|
||||
|
||||
func (c *Conversation) ToServer() types.Conversation {
|
||||
@@ -559,12 +540,13 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
||||
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
||||
}
|
||||
session.CompactionEnabled, session.CompactionTrigger, session.MaxSummaryTokens, session.SummaryModel = resolveCompaction(cfg, session.MaxHistoryItems)
|
||||
|
||||
// Create a default conversation
|
||||
conversationID := generateConversationID()
|
||||
conversation := &Conversation{
|
||||
ID: conversationID,
|
||||
ID: conversationID,
|
||||
// TODO: We need to truncate the conversation items when a new item is added and we have run out of space. There are multiple places where items
|
||||
// can be added so we could use a datastructure here that enforces truncation upon addition
|
||||
Items: []*types.MessageItemUnion{},
|
||||
}
|
||||
session.Conversations[conversationID] = conversation
|
||||
@@ -595,18 +577,6 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
}
|
||||
session.ModelInterface = m
|
||||
|
||||
if session.SummaryModel != "" {
|
||||
summaryModelName := session.SummaryModel
|
||||
sid := sessionID
|
||||
session.summarizerFactory = func() (Model, error) {
|
||||
summaryCfg, lerr := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(summaryModelName, application.ApplicationConfig())
|
||||
if lerr != nil {
|
||||
return nil, fmt.Errorf("load summary model config %q: %w", summaryModelName, lerr)
|
||||
}
|
||||
return newModel(&summaryCfg.Pipeline, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), evaluator, buildRealtimeRoutingContext(application, sid))
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.Pipeline.VoiceGateEnabled() {
|
||||
gate, gerr := newVoiceGate(
|
||||
*cfg.Pipeline.VoiceRecognition,
|
||||
@@ -837,15 +807,6 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
commitUtterance(respCtx, allAudio, session, conversation, t)
|
||||
}()
|
||||
|
||||
case types.InputAudioBufferClearEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
// Discard a partially-captured utterance so the client can restart
|
||||
// input cleanly without the stale buffer leaking into the next commit.
|
||||
clearInputAudio(session)
|
||||
sendEvent(t, types.InputAudioBufferClearedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
})
|
||||
|
||||
case types.ConversationItemCreateEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
// Add the item to the conversation
|
||||
@@ -880,39 +841,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
})
|
||||
|
||||
case types.ConversationItemDeleteEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
if e.ItemID == "" {
|
||||
sendError(t, "invalid_item_id", "Need item_id, but none specified", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
conversation.Lock.Lock()
|
||||
updated, ok := deleteItem(conversation.Items, e.ItemID)
|
||||
conversation.Items = updated
|
||||
conversation.Lock.Unlock()
|
||||
if !ok {
|
||||
sendError(t, "invalid_item_id", "Item to delete not found", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
sendEvent(t, types.ConversationItemDeletedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
ItemID: e.ItemID,
|
||||
})
|
||||
|
||||
case types.ConversationItemTruncateEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
conversation.Lock.Lock()
|
||||
ok := truncateAssistantText(conversation.Items, e.ItemID, e.ContentIndex)
|
||||
conversation.Lock.Unlock()
|
||||
if !ok {
|
||||
sendError(t, "invalid_item_id", "Item to truncate not found", "", "event_TODO")
|
||||
continue
|
||||
}
|
||||
sendEvent(t, types.ConversationItemTruncatedEvent{
|
||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||
ItemID: e.ItemID,
|
||||
ContentIndex: e.ContentIndex,
|
||||
AudioEndMs: e.AudioEndMs,
|
||||
})
|
||||
sendError(t, "not_implemented", "Deleting items not implemented", "", "event_TODO")
|
||||
|
||||
case types.ConversationItemRetrieveEvent:
|
||||
xlog.Debug("recv", "message", string(msg))
|
||||
@@ -925,7 +854,21 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
conversation.Lock.Lock()
|
||||
var retrievedItem types.MessageItemUnion
|
||||
for _, item := range conversation.Items {
|
||||
if itemID(item) == e.ItemID {
|
||||
// We need to check ID in the union
|
||||
var id string
|
||||
if item.System != nil {
|
||||
id = item.System.ID
|
||||
} else if item.User != nil {
|
||||
id = item.User.ID
|
||||
} else if item.Assistant != nil {
|
||||
id = item.Assistant.ID
|
||||
} else if item.FunctionCall != nil {
|
||||
id = item.FunctionCall.ID
|
||||
} else if item.FunctionCallOutput != nil {
|
||||
id = item.FunctionCallOutput.ID
|
||||
}
|
||||
|
||||
if id == e.ItemID {
|
||||
retrievedItem = *item
|
||||
break
|
||||
}
|
||||
@@ -1723,9 +1666,6 @@ const maxAssistantToolTurns = 10
|
||||
|
||||
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
||||
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
||||
// Fold aged-out turns into the rolling memory off the critical path; the
|
||||
// next turn reaps the smaller buffer.
|
||||
session.maybeCompact(conv)
|
||||
}
|
||||
|
||||
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
||||
@@ -1781,7 +1721,6 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
|
||||
var lastUserSpeaker *types.Speaker
|
||||
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
||||
conv.Lock.Lock()
|
||||
conversationHistory = withMemory(conversationHistory, conv.Memory)
|
||||
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
||||
for _, item := range items {
|
||||
if item.User != nil {
|
||||
|
||||
@@ -1,326 +0,0 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxSummaryTokens = 512
|
||||
memoryPrefix = "Summary of earlier conversation:\n"
|
||||
// compactionTimeout bounds the summarizer call so a stuck model can't pin the
|
||||
// compacting flag (and thus block all further compaction) forever.
|
||||
compactionTimeout = 60 * time.Second
|
||||
)
|
||||
|
||||
// withMemory inserts the rolling summary as a system message after the existing
|
||||
// (instructions) history. No-op when memory is empty.
|
||||
func withMemory(history schema.Messages, memory string) schema.Messages {
|
||||
if memory == "" {
|
||||
return history
|
||||
}
|
||||
content := memoryPrefix + memory
|
||||
return append(history, schema.Message{
|
||||
Role: string(types.MessageRoleSystem),
|
||||
StringContent: content,
|
||||
Content: content,
|
||||
})
|
||||
}
|
||||
|
||||
// renderItemsTranscript renders conversation items as a plain "role: text"
|
||||
// transcript for summarization. Non-text items (bare tool calls) are labelled
|
||||
// so the summarizer keeps track of actions taken.
|
||||
func renderItemsTranscript(items []*types.MessageItemUnion) string {
|
||||
var b strings.Builder
|
||||
for _, item := range items {
|
||||
switch {
|
||||
case item.User != nil:
|
||||
b.WriteString("user: ")
|
||||
for _, c := range item.User.Content {
|
||||
if c.Text != "" {
|
||||
b.WriteString(c.Text)
|
||||
}
|
||||
if c.Transcript != "" {
|
||||
b.WriteString(c.Transcript)
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
case item.Assistant != nil:
|
||||
b.WriteString("assistant: ")
|
||||
// Realtime assistant *audio* turns store the spoken words in
|
||||
// .Transcript (not .Text), so emit both or spoken turns are dropped.
|
||||
for _, c := range item.Assistant.Content {
|
||||
if c.Text != "" {
|
||||
b.WriteString(c.Text)
|
||||
}
|
||||
if c.Transcript != "" {
|
||||
b.WriteString(c.Transcript)
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
case item.FunctionCall != nil:
|
||||
b.WriteString(fmt.Sprintf("assistant called tool %s(%s)\n", item.FunctionCall.Name, item.FunctionCall.Arguments))
|
||||
case item.FunctionCallOutput != nil:
|
||||
b.WriteString(fmt.Sprintf("tool result: %s\n", item.FunctionCallOutput.Output))
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(b.String())
|
||||
}
|
||||
|
||||
// buildSummaryMessages builds the chat messages for the summarizer LLM: a system
|
||||
// instruction plus prior memory and the new transcript to fold in. maxTokens is
|
||||
// advisory (fed to the prompt; not hard-enforced in v1).
|
||||
func buildSummaryMessages(priorMemory, transcript string, maxTokens int) schema.Messages {
|
||||
system := fmt.Sprintf("You maintain a running memory of a live voice conversation. "+
|
||||
"Merge the prior memory with the new exchanges into an updated memory. "+
|
||||
"Keep names, decisions, facts, preferences, and open threads. Be concise "+
|
||||
"(under ~%d tokens). Output only the updated memory, with no reasoning or tags.", maxTokens)
|
||||
var user strings.Builder
|
||||
if priorMemory != "" {
|
||||
user.WriteString("Prior memory:\n")
|
||||
user.WriteString(priorMemory)
|
||||
user.WriteString("\n\n")
|
||||
}
|
||||
user.WriteString("New exchanges to fold in:\n")
|
||||
user.WriteString(transcript)
|
||||
return schema.Messages{
|
||||
{Role: string(types.MessageRoleSystem), StringContent: system, Content: system},
|
||||
{Role: string(types.MessageRoleUser), StringContent: user.String(), Content: user.String()},
|
||||
}
|
||||
}
|
||||
|
||||
// clearInputAudio resets the session's pending input audio buffer (the raw
|
||||
// PCM and any buffered Opus frames). Used by the input_audio_buffer.clear
|
||||
// realtime event so a client can discard a partially-captured utterance.
|
||||
func clearInputAudio(s *Session) {
|
||||
s.AudioBufferLock.Lock()
|
||||
s.InputAudioBuffer = nil
|
||||
s.AudioBufferLock.Unlock()
|
||||
s.OpusFramesLock.Lock()
|
||||
s.OpusFrames = nil
|
||||
s.OpusFramesLock.Unlock()
|
||||
}
|
||||
|
||||
// itemID extracts the id from any MessageItemUnion variant ("" if none).
|
||||
func itemID(item *types.MessageItemUnion) string {
|
||||
switch {
|
||||
case item == nil:
|
||||
return ""
|
||||
case item.System != nil:
|
||||
return item.System.ID
|
||||
case item.User != nil:
|
||||
return item.User.ID
|
||||
case item.Assistant != nil:
|
||||
return item.Assistant.ID
|
||||
case item.FunctionCall != nil:
|
||||
return item.FunctionCall.ID
|
||||
case item.FunctionCallOutput != nil:
|
||||
return item.FunctionCallOutput.ID
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// deleteItem removes the item with id from items, returning the new slice and
|
||||
// whether it was found.
|
||||
func deleteItem(items []*types.MessageItemUnion, id string) ([]*types.MessageItemUnion, bool) {
|
||||
for i, item := range items {
|
||||
if itemID(item) == id {
|
||||
return append(items[:i:i], items[i+1:]...), true
|
||||
}
|
||||
}
|
||||
return items, false
|
||||
}
|
||||
|
||||
// truncateAssistantText clears the text of the assistant item's content part at
|
||||
// contentIndex. Minimal truncate: used to discard an interrupted/barge-in
|
||||
// response tail. Both .Text and .Transcript are cleared because realtime audio
|
||||
// turns store the spoken words in .Transcript (clearing only .Text would no-op).
|
||||
func truncateAssistantText(items []*types.MessageItemUnion, id string, contentIndex int) bool {
|
||||
for _, item := range items {
|
||||
if itemID(item) != id || item.Assistant == nil {
|
||||
continue
|
||||
}
|
||||
if contentIndex >= 0 && contentIndex < len(item.Assistant.Content) {
|
||||
item.Assistant.Content[contentIndex].Text = ""
|
||||
item.Assistant.Content[contentIndex].Transcript = ""
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// compactionCut returns the index splitting items into overflow (items[:cut],
|
||||
// to be summarized+evicted) and the kept live tail (items[cut:]), keeping the
|
||||
// last `keep` items. It mirrors trimRealtimeItems' pair-safety: the cut is
|
||||
// pulled left so a function_call and its function_call_output are never split
|
||||
// across the boundary (the whole pair lands in the kept tail). Returns 0 when
|
||||
// there is nothing to cut.
|
||||
func compactionCut(items []*types.MessageItemUnion, keep int) int {
|
||||
// keep <= 0 means no live-window cap (the "unlimited history" sentinel, as
|
||||
// in trimRealtimeItems): there is nothing to evict, so cut nothing. This
|
||||
// also avoids indexing items[len(items)] in the pair-safety loop below.
|
||||
if keep <= 0 {
|
||||
return 0
|
||||
}
|
||||
cut := len(items) - keep
|
||||
if cut <= 0 {
|
||||
return 0
|
||||
}
|
||||
for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
|
||||
cut--
|
||||
}
|
||||
return cut
|
||||
}
|
||||
|
||||
// resolveCompaction reads the pipeline.compaction block, applying defaults and
|
||||
// the trigger>max_history invariant. maxHistory is the already-resolved live
|
||||
// window size. Returns enabled=false (and zero values) when compaction is off.
|
||||
func resolveCompaction(cfg *config.ModelConfig, maxHistory int) (enabled bool, trigger, maxSummaryTokens int, summaryModel string) {
|
||||
if cfg == nil || cfg.Pipeline.Compaction == nil || !cfg.Pipeline.Compaction.Enabled {
|
||||
return false, 0, 0, ""
|
||||
}
|
||||
c := cfg.Pipeline.Compaction
|
||||
trigger = c.TriggerItems
|
||||
if trigger <= 0 {
|
||||
trigger = maxHistory * 2
|
||||
}
|
||||
if trigger <= maxHistory {
|
||||
trigger = maxHistory + 1
|
||||
}
|
||||
maxSummaryTokens = c.MaxSummaryTokens
|
||||
if maxSummaryTokens <= 0 {
|
||||
maxSummaryTokens = defaultMaxSummaryTokens
|
||||
}
|
||||
return true, trigger, maxSummaryTokens, c.SummaryModel
|
||||
}
|
||||
|
||||
// prefixMatches reports whether items begins with the same ids, in order, as
|
||||
// snapshot — i.e. the overflow we summarized is still at the head (no concurrent
|
||||
// client delete reshuffled it).
|
||||
func prefixMatches(items, snapshot []*types.MessageItemUnion) bool {
|
||||
if len(items) < len(snapshot) {
|
||||
return false
|
||||
}
|
||||
for i := range snapshot {
|
||||
if itemID(items[i]) != itemID(snapshot[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// compact folds overflow items into conv.Memory and evicts them. It never holds
|
||||
// conv.Lock across the summarizer call: snapshot under lock, summarize unlocked,
|
||||
// commit under lock (re-validating the head is unchanged). On any error it
|
||||
// leaves the conversation untouched — items are never dropped without a summary.
|
||||
func (s *Session) compact(conv *Conversation, model Model) {
|
||||
if model == nil {
|
||||
return
|
||||
}
|
||||
// Snapshot.
|
||||
conv.Lock.Lock()
|
||||
if len(conv.Items) <= s.CompactionTrigger {
|
||||
conv.Lock.Unlock()
|
||||
return
|
||||
}
|
||||
cut := compactionCut(conv.Items, s.MaxHistoryItems)
|
||||
if cut <= 0 {
|
||||
conv.Lock.Unlock()
|
||||
return
|
||||
}
|
||||
overflow := append([]*types.MessageItemUnion(nil), conv.Items[:cut]...)
|
||||
prior := conv.Memory
|
||||
conv.Lock.Unlock()
|
||||
|
||||
// Summarize (unlocked).
|
||||
msgs := buildSummaryMessages(prior, renderItemsTranscript(overflow), s.MaxSummaryTokens)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), compactionTimeout)
|
||||
defer cancel()
|
||||
predFunc, err := model.Predict(ctx, msgs, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summarizer predict failed", "error", err)
|
||||
return
|
||||
}
|
||||
pred, err := predFunc()
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summarizer inference failed", "error", err)
|
||||
return
|
||||
}
|
||||
// Strip any leaked reasoning/thinking spans using the same extractor the
|
||||
// rest of the realtime path uses, rather than a bespoke regex.
|
||||
rcfg := reasoning.Config{}
|
||||
if mc := model.PredictConfig(); mc != nil {
|
||||
rcfg = spokenReasoningConfig(mc.ReasoningConfig)
|
||||
}
|
||||
_, summary := reasoning.ExtractReasoningComplete(pred.Response, "", rcfg)
|
||||
summary = strings.TrimSpace(summary)
|
||||
if summary == "" {
|
||||
xlog.Warn("realtime compaction: empty summary, skipping eviction")
|
||||
return
|
||||
}
|
||||
|
||||
// Commit.
|
||||
conv.Lock.Lock()
|
||||
defer conv.Lock.Unlock()
|
||||
if !prefixMatches(conv.Items, overflow) {
|
||||
xlog.Debug("realtime compaction: head changed during summary, skipping")
|
||||
return
|
||||
}
|
||||
conv.Memory = summary
|
||||
conv.Items = conv.Items[len(overflow):]
|
||||
xlog.Debug("realtime compaction: evicted items into memory", "evicted", len(overflow), "remaining", len(conv.Items))
|
||||
}
|
||||
|
||||
// summarizerModel resolves the model used to produce compaction summaries.
|
||||
// Without a configured summary_model (or factory) it reuses the pipeline LLM.
|
||||
func (s *Session) summarizerModel() Model {
|
||||
if s.SummaryModel == "" || s.summarizerFactory == nil {
|
||||
return s.ModelInterface
|
||||
}
|
||||
s.summarizerOnce.Do(func() {
|
||||
m, err := s.summarizerFactory()
|
||||
if err != nil {
|
||||
xlog.Warn("realtime compaction: summary_model load failed, falling back to pipeline LLM", "model", s.SummaryModel, "error", err)
|
||||
m = s.ModelInterface
|
||||
}
|
||||
s.summarizerCached = m
|
||||
})
|
||||
return s.summarizerCached
|
||||
}
|
||||
|
||||
// maybeCompact schedules a background compaction when the live buffer has grown
|
||||
// past the trigger and none is already running. Returns immediately.
|
||||
func (s *Session) maybeCompact(conv *Conversation) {
|
||||
if !s.CompactionEnabled {
|
||||
return
|
||||
}
|
||||
conv.Lock.Lock()
|
||||
over := len(conv.Items) > s.CompactionTrigger
|
||||
conv.Lock.Unlock()
|
||||
if !over {
|
||||
return
|
||||
}
|
||||
if !conv.compacting.CompareAndSwap(false, true) {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
defer conv.compacting.Store(false)
|
||||
// Resolve (and, for a configured summary_model, lazily load) the
|
||||
// summarizer only when a compaction actually runs, off the response
|
||||
// path — so the model load never blocks a user turn.
|
||||
model := s.summarizerModel()
|
||||
if model == nil {
|
||||
return
|
||||
}
|
||||
s.compact(conv, model)
|
||||
}()
|
||||
}
|
||||
@@ -1,308 +0,0 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
var _ = Describe("resolveCompaction", func() {
|
||||
It("disables when the block is absent", func() {
|
||||
enabled, _, _, _ := resolveCompaction(&config.ModelConfig{}, 6)
|
||||
Expect(enabled).To(BeFalse())
|
||||
})
|
||||
|
||||
It("defaults trigger to 2x max history and tokens to 512", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true}}}
|
||||
enabled, trigger, maxTok, _ := resolveCompaction(cfg, 6)
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(trigger).To(Equal(12))
|
||||
Expect(maxTok).To(Equal(512))
|
||||
})
|
||||
|
||||
It("clamps trigger to max history + 1 when misconfigured", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true, TriggerItems: 4}}}
|
||||
_, trigger, _, _ := resolveCompaction(cfg, 6)
|
||||
Expect(trigger).To(Equal(7))
|
||||
})
|
||||
|
||||
It("honors explicit values", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{
|
||||
Enabled: true, TriggerItems: 20, MaxSummaryTokens: 256, SummaryModel: "tiny"}}}
|
||||
enabled, trigger, maxTok, model := resolveCompaction(cfg, 6)
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(trigger).To(Equal(20))
|
||||
Expect(maxTok).To(Equal(256))
|
||||
Expect(model).To(Equal("tiny"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("deleteItem", func() {
|
||||
mk := func(ids ...string) []*types.MessageItemUnion {
|
||||
out := make([]*types.MessageItemUnion, len(ids))
|
||||
for i, id := range ids {
|
||||
out[i] = &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
It("removes the item with the given id", func() {
|
||||
items, ok := deleteItem(mk("a", "b", "c"), "b")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(len(items)).To(Equal(2))
|
||||
Expect(itemID(items[0])).To(Equal("a"))
|
||||
Expect(itemID(items[1])).To(Equal("c"))
|
||||
})
|
||||
|
||||
It("reports not found for an unknown id", func() {
|
||||
_, ok := deleteItem(mk("a"), "zzz")
|
||||
Expect(ok).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("clearInputAudio", func() {
|
||||
It("resets the pending PCM and buffered Opus frames", func() {
|
||||
s := &Session{InputAudioBuffer: []byte{1, 2, 3}, OpusFrames: [][]byte{{9}}}
|
||||
clearInputAudio(s)
|
||||
Expect(s.InputAudioBuffer).To(BeNil())
|
||||
Expect(s.OpusFrames).To(BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("truncateAssistantText", func() {
|
||||
It("clears the text of the assistant content part at the index", func() {
|
||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||
ID: "a1",
|
||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello world"}},
|
||||
}}}
|
||||
ok := truncateAssistantText(items, "a1", 0)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(items[0].Assistant.Content[0].Text).To(Equal(""))
|
||||
})
|
||||
|
||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||
// .Text, so a barge-in truncate must clear .Transcript too or it would no-op.
|
||||
It("clears the transcript of an assistant audio content part", func() {
|
||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||
ID: "a1",
|
||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "hello world"}},
|
||||
}}}
|
||||
ok := truncateAssistantText(items, "a1", 0)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(items[0].Assistant.Content[0].Transcript).To(Equal(""))
|
||||
})
|
||||
|
||||
It("returns false for an unknown id", func() {
|
||||
Expect(truncateAssistantText(nil, "nope", 0)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("compactionCut", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
call := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id}}
|
||||
}
|
||||
out := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id}}
|
||||
}
|
||||
|
||||
It("cuts exactly len-keep when no pairs straddle the boundary", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3"), user("4")}
|
||||
Expect(compactionCut(items, 2)).To(Equal(2))
|
||||
})
|
||||
|
||||
It("returns 0 when nothing to cut", func() {
|
||||
Expect(compactionCut([]*types.MessageItemUnion{user("1")}, 2)).To(Equal(0))
|
||||
})
|
||||
|
||||
It("returns 0 (cuts nothing) when keep is 0 — the unlimited-window sentinel", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||
Expect(compactionCut(items, 0)).To(Equal(0))
|
||||
})
|
||||
|
||||
It("moves the boundary so a call/output pair is not split", func() {
|
||||
// keep=2 -> naive cut=2, but items[2] is the output of items[1]'s call;
|
||||
// pull the cut right so the whole pair stays in the kept tail.
|
||||
items := []*types.MessageItemUnion{user("1"), call("c"), out("c"), user("4")}
|
||||
Expect(compactionCut(items, 2)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("withMemory", func() {
|
||||
It("inserts a memory system message when memory is non-empty", func() {
|
||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||
out := withMemory(base, "user is Bob; wants pizza")
|
||||
Expect(len(out)).To(Equal(2))
|
||||
Expect(out[1].Role).To(Equal("system"))
|
||||
Expect(out[1].StringContent).To(ContainSubstring("user is Bob"))
|
||||
Expect(out[1].StringContent).To(ContainSubstring("Summary of earlier conversation"))
|
||||
})
|
||||
|
||||
It("is a no-op when memory is empty", func() {
|
||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||
Expect(withMemory(base, "")).To(HaveLen(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("renderItemsTranscript", func() {
|
||||
It("renders user and assistant text turns", func() {
|
||||
items := []*types.MessageItemUnion{
|
||||
{User: &types.MessageItemUser{Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: "hi"}}}},
|
||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello"}}}},
|
||||
}
|
||||
out := renderItemsTranscript(items)
|
||||
Expect(out).To(ContainSubstring("user: hi"))
|
||||
Expect(out).To(ContainSubstring("assistant: hello"))
|
||||
})
|
||||
|
||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||
// .Text, so the transcript builder must emit .Transcript too or spoken turns
|
||||
// would be dropped from the summary.
|
||||
It("renders an assistant audio turn from its transcript", func() {
|
||||
items := []*types.MessageItemUnion{
|
||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "spoken words"}}}},
|
||||
}
|
||||
Expect(renderItemsTranscript(items)).To(ContainSubstring("assistant: spoken words"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("buildSummaryMessages", func() {
|
||||
It("includes prior memory and the new transcript", func() {
|
||||
msgs := buildSummaryMessages("prior facts", "user: hi", 512)
|
||||
Expect(len(msgs)).To(Equal(2))
|
||||
Expect(msgs[0].Role).To(Equal("system"))
|
||||
Expect(msgs[1].StringContent).To(ContainSubstring("prior facts"))
|
||||
Expect(msgs[1].StringContent).To(ContainSubstring("user: hi"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("compact", func() {
|
||||
user := func(id, text string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id,
|
||||
Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: text}}}}
|
||||
}
|
||||
|
||||
It("summarizes overflow into Memory and evicts it, keeping the live tail", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||
}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "ROLLED UP"}}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal("ROLLED UP"))
|
||||
Expect(len(conv.Items)).To(Equal(4))
|
||||
Expect(itemID(conv.Items[0])).To(Equal("5"))
|
||||
// The summarizer saw the evicted turns.
|
||||
Expect(m.lastMessages[1].StringContent).To(ContainSubstring("a"))
|
||||
})
|
||||
|
||||
It("leaves Items and Memory untouched when the summarizer errors", func() {
|
||||
items := []*types.MessageItemUnion{user("1", "a"), user("2", "b"), user("3", "c")}
|
||||
conv := &Conversation{Items: items}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 2, MaxHistoryItems: 1, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictErr: errors.New("boom")}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal(""))
|
||||
Expect(len(conv.Items)).To(Equal(3))
|
||||
})
|
||||
|
||||
It("strips leaked reasoning tags from the summary via the shared extractor", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||
}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "<think>planning the summary</think>CLEAN SUMMARY"}}
|
||||
|
||||
s.compact(conv, m)
|
||||
|
||||
Expect(conv.Memory).To(Equal("CLEAN SUMMARY"))
|
||||
Expect(conv.Memory).ToNot(ContainSubstring("planning"))
|
||||
})
|
||||
|
||||
It("does nothing when items are at or below the trigger", func() {
|
||||
conv := &Conversation{Items: []*types.MessageItemUnion{user("1", "a")}}
|
||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4}
|
||||
s.compact(conv, &fakeModel{predictResp: backend.LLMResponse{Response: "x"}})
|
||||
Expect(conv.Memory).To(Equal(""))
|
||||
Expect(len(conv.Items)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("prefixMatches", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
|
||||
It("matches when items begins with the snapshot ids in order", func() {
|
||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("matches an empty snapshot", func() {
|
||||
Expect(prefixMatches([]*types.MessageItemUnion{user("1")}, nil)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("fails when items is shorter than the snapshot (a concurrent delete shrank the head)", func() {
|
||||
items := []*types.MessageItemUnion{user("1")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||
})
|
||||
|
||||
It("fails when the head ids differ (a concurrent delete reordered the head)", func() {
|
||||
items := []*types.MessageItemUnion{user("2"), user("3")}
|
||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("summarizerModel", func() {
|
||||
It("returns the pipeline model when no summary_model is set", func() {
|
||||
m := &fakeModel{}
|
||||
s := &Session{ModelInterface: m}
|
||||
Expect(s.summarizerModel()).To(Equal(m))
|
||||
})
|
||||
|
||||
It("uses the factory (once) when summary_model is set", func() {
|
||||
pipeline := &fakeModel{}
|
||||
small := &fakeModel{}
|
||||
calls := 0
|
||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||
summarizerFactory: func() (Model, error) { calls++; return small, nil }}
|
||||
Expect(s.summarizerModel()).To(Equal(small))
|
||||
Expect(s.summarizerModel()).To(Equal(small))
|
||||
Expect(calls).To(Equal(1))
|
||||
})
|
||||
|
||||
It("falls back to the pipeline model when the factory errors", func() {
|
||||
pipeline := &fakeModel{}
|
||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||
summarizerFactory: func() (Model, error) { return nil, errors.New("nope") }}
|
||||
Expect(s.summarizerModel()).To(Equal(pipeline))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("itemID", func() {
|
||||
It("returns the id for each variant and empty for nil", func() {
|
||||
Expect(itemID(nil)).To(Equal(""))
|
||||
Expect(itemID(&types.MessageItemUnion{User: &types.MessageItemUser{ID: "u1"}})).To(Equal("u1"))
|
||||
Expect(itemID(&types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: "a1"}})).To(Equal("a1"))
|
||||
Expect(itemID(&types.MessageItemUnion{System: &types.MessageItemSystem{ID: "s1"}})).To(Equal("s1"))
|
||||
Expect(itemID(&types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: "f1"}})).To(Equal("f1"))
|
||||
Expect(itemID(&types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: "o1"}})).To(Equal("o1"))
|
||||
})
|
||||
})
|
||||
@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
if pipeline.SoundDetection == "" {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath)
|
||||
cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load sound detection config: %w", err)
|
||||
}
|
||||
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
}
|
||||
|
||||
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
|
||||
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
|
||||
}
|
||||
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -542,30 +542,11 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
|
||||
}
|
||||
}
|
||||
|
||||
// loadPipelineSubModel loads a pipeline sub-model config by name and follows a
|
||||
// single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
|
||||
// gets the alias target's full config (Backend, Model, ...) rather than the
|
||||
// alias stub with an empty Backend. Without this the alias survives unresolved
|
||||
// into model loading and fails downstream — notably in distributed mode with
|
||||
// "backend name is empty". Mirrors the top-level alias resolution in
|
||||
// core/http/middleware/request.go.
|
||||
func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
|
||||
cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resolved, _, err := cl.ResolveAlias(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
// returns and loads either a wrapped model or a model that support audio-to-audio
|
||||
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
|
||||
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
|
||||
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -576,7 +557,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
}
|
||||
|
||||
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -608,7 +589,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
xlog.Debug("Loading a wrapped model")
|
||||
|
||||
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
|
||||
cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath)
|
||||
cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -623,7 +604,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||
applyPipelineThinking(cfgLLM, *pipeline)
|
||||
|
||||
cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath)
|
||||
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// loadPipelineSubModel must resolve a pipeline sub-model that references an
|
||||
// alias (e.g. `llm: default`) one hop to the alias target's full config — so
|
||||
// the effective backend is the target's backend, not the empty backend of the
|
||||
// alias stub. This mirrors the top-level alias resolution done in
|
||||
// core/http/middleware/request.go, which the realtime pipeline previously
|
||||
// skipped (failing in distributed mode with "backend name is empty").
|
||||
var _ = Describe("loadPipelineSubModel", func() {
|
||||
It("resolves a sub-model alias one hop to the target's config", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
|
||||
// A real model config with a concrete backend.
|
||||
realLLM := `name: real-llm
|
||||
backend: llama-cpp
|
||||
parameters:
|
||||
model: real-llm.gguf
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
|
||||
|
||||
// An alias pointing at the real model.
|
||||
aliasCfg := `name: default
|
||||
alias: real-llm
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
|
||||
|
||||
cl := config.NewModelConfigLoader(tmpDir)
|
||||
Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
|
||||
|
||||
// Resolving the alias must follow the hop to the target's full config.
|
||||
resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(resolved.IsAlias()).To(BeFalse())
|
||||
Expect(resolved.Backend).To(Equal("llama-cpp"))
|
||||
|
||||
// A non-alias name must load unchanged.
|
||||
direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(direct.Backend).To(Equal("llama-cpp"))
|
||||
Expect(direct.Name).To(Equal("real-llm"))
|
||||
})
|
||||
})
|
||||
@@ -55,70 +55,17 @@ func BasePathPrefix(c echo.Context) string {
|
||||
// The returned URL is guaranteed to end with `/`.
|
||||
// The method should be used in conjunction with the StripPathPrefix middleware.
|
||||
func BaseURL(c echo.Context) string {
|
||||
// An explicit external base URL (LOCALAI_BASE_URL) is authoritative for
|
||||
// the origin. The proxy-derived path prefix is still appended so a
|
||||
// reverse-proxy mount point keeps working. Trailing slashes are
|
||||
// normalized via BasePathPrefix, which always starts and ends with "/".
|
||||
if ext, ok := c.Get("_external_base_url").(string); ok && ext != "" {
|
||||
return strings.TrimRight(ext, "/") + BasePathPrefix(c)
|
||||
}
|
||||
|
||||
fwdProto, fwdHost := parseForwarded(c.Request().Header.Get("Forwarded"))
|
||||
|
||||
scheme := "http"
|
||||
switch {
|
||||
case c.Request().TLS != nil:
|
||||
if c.Request().Header.Get("X-Forwarded-Proto") == "https" {
|
||||
scheme = "https"
|
||||
case strings.EqualFold(firstToken(c.Request().Header.Get("X-Forwarded-Proto")), "https"):
|
||||
scheme = "https"
|
||||
case strings.EqualFold(fwdProto, "https"):
|
||||
} else if c.Request().TLS != nil {
|
||||
scheme = "https"
|
||||
}
|
||||
|
||||
host := c.Request().Host
|
||||
if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
|
||||
host = forwardedHost
|
||||
} else if fwdHost != "" {
|
||||
host = fwdHost
|
||||
}
|
||||
|
||||
return scheme + "://" + host + BasePathPrefix(c)
|
||||
}
|
||||
|
||||
// firstToken returns the first comma-separated token of v, trimmed of spaces.
|
||||
// Reverse-proxy chains can emit X-Forwarded-Proto as "https,http"; only the
|
||||
// first hop (closest to the client) is meaningful for scheme detection.
|
||||
func firstToken(v string) string {
|
||||
if i := strings.IndexByte(v, ','); i >= 0 {
|
||||
v = v[:i]
|
||||
}
|
||||
return strings.TrimSpace(v)
|
||||
}
|
||||
|
||||
// parseForwarded extracts the proto and host directives from the first element
|
||||
// of an RFC 7239 Forwarded header (e.g. `for=x;proto=https;host=h, for=y`).
|
||||
// Values may be quoted. Returns empty strings when absent or malformed so the
|
||||
// caller can fall through to other signals.
|
||||
func parseForwarded(header string) (proto, host string) {
|
||||
if header == "" {
|
||||
return "", ""
|
||||
}
|
||||
// Only the first element (closest proxy to the client) matters here.
|
||||
if i := strings.IndexByte(header, ','); i >= 0 {
|
||||
header = header[:i]
|
||||
}
|
||||
for _, directive := range strings.Split(header, ";") {
|
||||
key, value, ok := strings.Cut(strings.TrimSpace(directive), "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
value = strings.Trim(strings.TrimSpace(value), `"`)
|
||||
switch strings.ToLower(strings.TrimSpace(key)) {
|
||||
case "proto":
|
||||
proto = value
|
||||
case "host":
|
||||
host = value
|
||||
}
|
||||
}
|
||||
return proto, host
|
||||
}
|
||||
|
||||
@@ -135,138 +135,4 @@ var _ = Describe("BaseURL", func() {
|
||||
Entry("missing leading slash", "evil"),
|
||||
)
|
||||
})
|
||||
|
||||
Context("scheme detection hardening", func() {
|
||||
It("treats comma-separated X-Forwarded-Proto as https when first token is https", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/x", func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/x", nil)
|
||||
req.Header.Set("X-Forwarded-Proto", "https,http")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("https://example.com/"))
|
||||
})
|
||||
|
||||
It("derives https from the RFC 7239 Forwarded proto directive", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/x", func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/x", nil)
|
||||
req.Header.Set("Forwarded", "for=192.0.2.1;proto=https;host=proxy.example")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("https://proxy.example/"))
|
||||
})
|
||||
|
||||
It("prefers X-Forwarded-Host over the Forwarded host directive", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/x", func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/x", nil)
|
||||
req.Header.Set("X-Forwarded-Host", "xfh.example")
|
||||
req.Header.Set("Forwarded", "host=fwd.example;proto=https")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("https://xfh.example/"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("explicit external base URL override", func() {
|
||||
It("uses the configured origin over conflicting forwarded headers", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/x", func(c echo.Context) error {
|
||||
c.Set("_external_base_url", "https://192.168.0.13:34567")
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/x", nil)
|
||||
req.Header.Set("X-Forwarded-Proto", "http")
|
||||
req.Header.Set("X-Forwarded-Host", "internal:8080")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("https://192.168.0.13:34567/"))
|
||||
})
|
||||
|
||||
It("combines the configured origin with a detected path prefix", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/hello", func(c echo.Context) error {
|
||||
c.Set("_original_path", "/localai/hello")
|
||||
c.Set("_external_base_url", "https://ext.example")
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/hello", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("https://ext.example/localai/"))
|
||||
})
|
||||
|
||||
It("ignores an empty override", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
app.GET("/x", func(c echo.Context) error {
|
||||
c.Set("_external_base_url", "")
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
req := httptest.NewRequest("GET", "/x", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(actualURL).To(Equal("http://example.com/"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("parseForwarded helper", func() {
|
||||
It("parses unquoted proto and host", func() {
|
||||
proto, host := parseForwarded("for=192.0.2.1;proto=https;host=h.example")
|
||||
Expect(proto).To(Equal("https"))
|
||||
Expect(host).To(Equal("h.example"))
|
||||
})
|
||||
|
||||
It("strips quotes around values", func() {
|
||||
proto, host := parseForwarded(`proto="https";host="h.example"`)
|
||||
Expect(proto).To(Equal("https"))
|
||||
Expect(host).To(Equal("h.example"))
|
||||
})
|
||||
|
||||
It("uses only the first element of a multi-element header", func() {
|
||||
proto, host := parseForwarded("proto=https;host=first.example, proto=http;host=second.example")
|
||||
Expect(proto).To(Equal("https"))
|
||||
Expect(host).To(Equal("first.example"))
|
||||
})
|
||||
|
||||
It("returns empty strings for an empty header", func() {
|
||||
proto, host := parseForwarded("")
|
||||
Expect(proto).To(BeEmpty())
|
||||
Expect(host).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("skips directives without a value", func() {
|
||||
proto, host := parseForwarded("proto;host=h.example")
|
||||
Expect(proto).To(BeEmpty())
|
||||
Expect(host).To(Equal("h.example"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("firstToken helper", func() {
|
||||
It("returns the whole trimmed string when there is no comma", func() {
|
||||
Expect(firstToken(" https ")).To(Equal("https"))
|
||||
})
|
||||
|
||||
It("returns the first trimmed token when there is a comma", func() {
|
||||
Expect(firstToken("https , http")).To(Equal("https"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -86,7 +86,6 @@
|
||||
"input": {
|
||||
"placeholder": "Message...",
|
||||
"attachFile": "Attach file",
|
||||
"send": "Send message",
|
||||
"stopGenerating": "Stop generating",
|
||||
"canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download",
|
||||
"canvasLabel": "Canvas",
|
||||
|
||||
@@ -77,20 +77,6 @@
|
||||
"noModelsTitle": "No Models Available",
|
||||
"noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting."
|
||||
},
|
||||
"starters": {
|
||||
"title": "Recommended for your hardware",
|
||||
"tier": {
|
||||
"cpu": "CPU-only",
|
||||
"gpu-small": "GPU",
|
||||
"gpu-large": "GPU"
|
||||
},
|
||||
"cpuNote": "No GPU detected — these small models stay responsive on CPU.",
|
||||
"gpuNote": "Picked to fit your available VRAM with room for context.",
|
||||
"install": "Install",
|
||||
"installing": "Installing",
|
||||
"installStarted": "Installing {{model}}…",
|
||||
"installFailed": "Install failed: {{message}}"
|
||||
},
|
||||
"connect": {
|
||||
"title": "One endpoint, every API",
|
||||
"subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.",
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Penjadwalan",
|
||||
"subtitle": "Aturan penempatan model dan replika di seluruh kluster"
|
||||
"subtitle": "Aturan penempatan model dan replika di seluruh klaster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Komputasi AI Terdistribusi",
|
||||
@@ -86,4 +86,4 @@
|
||||
"title": "Penjelajah",
|
||||
"subtitle": "Jelajahi file dan konfigurasi"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,7 +72,7 @@
|
||||
"actions": {
|
||||
"copy": "Salin",
|
||||
"regenerate": "Hasilkan ulang",
|
||||
"jumpToLatest": "Lompat ke terbaru"
|
||||
"jumpToLatest": "Jump to latest"
|
||||
},
|
||||
"streaming": {
|
||||
"transferring": "Mentransfer model...",
|
||||
@@ -115,4 +115,4 @@
|
||||
"clearAll": "Hapus semua",
|
||||
"deleteAllTitle": "Hapus semua percakapan"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"unsaved": {
|
||||
"title": "Buang perubahan yang belum disimpan?",
|
||||
"message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
|
||||
"leave": "Tinggalkan Halaman"
|
||||
"title": "Discard unsaved changes?",
|
||||
"message": "You have unsaved changes that will be lost if you leave this page.",
|
||||
"leave": "Leave"
|
||||
},
|
||||
"actions": {
|
||||
"save": "Simpan",
|
||||
|
||||
@@ -7,15 +7,15 @@
|
||||
"resourceGpu": "GPU",
|
||||
"resourceRam": "RAM",
|
||||
"greeting": {
|
||||
"morning": "Selamat pagi",
|
||||
"afternoon": "Selamat siang",
|
||||
"evening": "Selamat malam",
|
||||
"night": "Selamat lembur"
|
||||
"morning": "Good morning",
|
||||
"afternoon": "Good afternoon",
|
||||
"evening": "Good evening",
|
||||
"night": "Working late"
|
||||
},
|
||||
"statusLine": {
|
||||
"modelsLoaded_one": "{{count}} model dimuat",
|
||||
"modelsLoaded_other": "{{count}} model dimuat",
|
||||
"noModelsLoaded": "Tidak ada model yang dimuat",
|
||||
"modelsLoaded_one": "{{count}} model loaded",
|
||||
"modelsLoaded_other": "{{count}} models loaded",
|
||||
"noModelsLoaded": "No models loaded",
|
||||
"nodes_one": "{{count}} node",
|
||||
"nodes_other": "{{count}} nodes"
|
||||
},
|
||||
@@ -79,14 +79,14 @@
|
||||
},
|
||||
"connect": {
|
||||
"title": "Satu endpoint, semua API",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"nativeTitle": "API native",
|
||||
"compatTitle": "Kompatibilitas drop-in",
|
||||
"apiReference": "Referensi API lengkap",
|
||||
"copy": "Salin",
|
||||
"copied": "Disalin",
|
||||
"browse": "Jelajahi API",
|
||||
"hide": "Sembunyikan endpoint",
|
||||
"dismiss": "Abaikan"
|
||||
"browse": "Browse the API",
|
||||
"hide": "Hide endpoints",
|
||||
"dismiss": "Dismiss"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"video": "Video",
|
||||
"tts": "TTS",
|
||||
"sound": "Suara",
|
||||
"transform": "Transformasi"
|
||||
"transform": "Transform"
|
||||
}
|
||||
},
|
||||
"image": {
|
||||
@@ -30,7 +30,7 @@
|
||||
"refImagesAdded_other": "{{count}} gambar ditambahkan"
|
||||
},
|
||||
"actions": {
|
||||
"view": "Lihat",
|
||||
"view": "View",
|
||||
"generate": "Hasilkan",
|
||||
"generating": "Menghasilkan..."
|
||||
},
|
||||
@@ -153,4 +153,4 @@
|
||||
"clearConfirm": "Hapus",
|
||||
"cleared": "Riwayat dihapus"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,11 +19,11 @@
|
||||
"operate": "Operasikan"
|
||||
},
|
||||
"operate": {
|
||||
"inference": "Inferensi",
|
||||
"cluster": "Kluster",
|
||||
"observability": "Observabilitas",
|
||||
"access": "Akses",
|
||||
"system": "Sistem"
|
||||
"inference": "Inference",
|
||||
"cluster": "Cluster",
|
||||
"observability": "Observability",
|
||||
"access": "Access",
|
||||
"system": "System"
|
||||
},
|
||||
"items": {
|
||||
"home": "Beranda",
|
||||
@@ -64,7 +64,7 @@
|
||||
"copyright": "© 2023-{{year}} {{author}}"
|
||||
},
|
||||
"console": {
|
||||
"automation": "Automasi",
|
||||
"automation": "Otomasi",
|
||||
"training": "Pelatihan"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6363,59 +6363,6 @@ select.input {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
/* ──────────────────── Home: hardware-aware starter models ──────────────────── */
|
||||
|
||||
.home-starters {
|
||||
margin: var(--spacing-lg) 0;
|
||||
padding: var(--spacing-lg);
|
||||
}
|
||||
.home-starters-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: var(--spacing-md);
|
||||
}
|
||||
.home-starters-head strong {
|
||||
font-size: 0.9375rem;
|
||||
}
|
||||
.home-starters-tier {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-xs);
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-text-muted);
|
||||
}
|
||||
.home-starters-sub {
|
||||
margin: var(--spacing-xs) 0 var(--spacing-md);
|
||||
font-size: 0.8125rem;
|
||||
color: var(--color-text-secondary);
|
||||
}
|
||||
.home-starters-list {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing-xs);
|
||||
}
|
||||
.home-starters-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-md);
|
||||
padding: var(--spacing-xs) 0;
|
||||
}
|
||||
.home-starters-name {
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
word-break: break-all;
|
||||
}
|
||||
.home-starters-size {
|
||||
margin-left: auto;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-text-muted);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
|
||||
|
||||
.home-connect {
|
||||
|
||||
@@ -1,25 +1,8 @@
|
||||
import { useEffect, useMemo, useCallback } from 'react'
|
||||
import { useEffect, useMemo } from 'react'
|
||||
import { useModels } from '../hooks/useModels'
|
||||
import SearchableSelect from './SearchableSelect'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
// Remember the last model the user picked, keyed by capability, so returning to
|
||||
// a page (Home chat box, Image, TTS, Talk...) defaults to that model instead of
|
||||
// whatever happens to sort first. Only persisted when a capability key exists —
|
||||
// `externalOptions` callers pass no capability and get the old first-item
|
||||
// behaviour. localStorage access is wrapped because private-browsing modes throw.
|
||||
const LAST_MODEL_PREFIX = 'localai_last_model:'
|
||||
|
||||
function readLastModel(capability) {
|
||||
if (!capability) return null
|
||||
try { return localStorage.getItem(LAST_MODEL_PREFIX + capability) } catch { return null }
|
||||
}
|
||||
|
||||
function writeLastModel(capability, model) {
|
||||
if (!capability || !model) return
|
||||
try { localStorage.setItem(LAST_MODEL_PREFIX + capability, model) } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
export default function ModelSelector({
|
||||
value, onChange, capability, className = '',
|
||||
options: externalOptions, loading: externalLoading,
|
||||
@@ -36,27 +19,16 @@ export default function ModelSelector({
|
||||
const isLoading = externalOptions ? (externalLoading || false) : hookLoading
|
||||
const isDisabled = isLoading || (externalDisabled || false)
|
||||
|
||||
// Persist genuine selections so the next visit can restore them.
|
||||
const handleChange = useCallback((next) => {
|
||||
writeLastModel(capability, next)
|
||||
onChange(next)
|
||||
}, [capability, onChange])
|
||||
|
||||
useEffect(() => {
|
||||
if (modelNames.length > 0 && (!value || !modelNames.includes(value))) {
|
||||
// Prefer the remembered model when it's still available; otherwise fall
|
||||
// back to the first option. Don't re-persist here — auto-select is not a
|
||||
// user choice, and writing back the stored value would be a harmless but
|
||||
// pointless round-trip.
|
||||
const remembered = readLastModel(capability)
|
||||
onChange(remembered && modelNames.includes(remembered) ? remembered : modelNames[0])
|
||||
onChange(modelNames[0])
|
||||
}
|
||||
}, [modelNames, value, onChange, capability])
|
||||
}, [modelNames, value, onChange])
|
||||
|
||||
return (
|
||||
<SearchableSelect
|
||||
value={value || ''}
|
||||
onChange={handleChange}
|
||||
onChange={onChange}
|
||||
options={modelNames}
|
||||
placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))}
|
||||
searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')}
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
import { useState, useEffect, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { modelsApi } from '../utils/api'
|
||||
import { useResources } from '../hooks/useResources'
|
||||
|
||||
// Curated, hardware-tiered starter models for the empty-state onboarding. Names
|
||||
// are real gallery entries (gallery/index.yaml); we intersect them against the
|
||||
// live gallery at render time so a custom/trimmed gallery degrades gracefully
|
||||
// (unmatched entries simply don't render).
|
||||
//
|
||||
// The guiding rule the maintainer asked for: CPU-only machines should be
|
||||
// steered to genuinely small models (1-4B, Q4) that stay responsive without a
|
||||
// GPU. GPU tiers scale the suggestion up with available VRAM.
|
||||
const SMALL = [
|
||||
{ name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' },
|
||||
{ name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
|
||||
{ name: 'qwen3-1.7b', size: '~1.4 GB' },
|
||||
{ name: 'gemma-3-1b-it', size: '~0.8 GB' },
|
||||
]
|
||||
const MID = [
|
||||
{ name: 'qwen3-4b', size: '~2.5 GB' },
|
||||
{ name: 'gemma-3-4b-it', size: '~3 GB' },
|
||||
{ name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
|
||||
]
|
||||
const LARGE = [
|
||||
{ name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' },
|
||||
{ name: 'qwen3-4b', size: '~2.5 GB' },
|
||||
{ name: 'mistral-7b-instruct-v0.3', size: '~4 GB' },
|
||||
]
|
||||
|
||||
const GB = 1024 * 1024 * 1024
|
||||
|
||||
// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when
|
||||
// CPU-only). Thresholds are deliberately conservative so a suggestion that
|
||||
// "fits" really does.
|
||||
function pickTier(resources) {
|
||||
const isGpu = resources?.type === 'gpu'
|
||||
const vram = resources?.aggregate?.total_memory || 0
|
||||
if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL }
|
||||
if (vram < 8 * GB) return { id: 'gpu-small', list: MID }
|
||||
return { id: 'gpu-large', list: LARGE }
|
||||
}
|
||||
|
||||
export default function StarterModels({ addToast, onInstallStarted }) {
|
||||
const { t } = useTranslation('home')
|
||||
const { resources } = useResources()
|
||||
const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading
|
||||
const [installing, setInstalling] = useState(() => new Set())
|
||||
|
||||
const tier = useMemo(() => pickTier(resources), [resources])
|
||||
const candidates = tier.list
|
||||
|
||||
// Verify candidates exist in the live gallery. One search per name (the tier
|
||||
// has at most a handful) keeps this resilient to gallery customization.
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
const names = [...new Set(candidates.map(c => c.name))]
|
||||
Promise.all(names.map(name =>
|
||||
modelsApi.list({ search: name, page: 1 })
|
||||
.then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null)
|
||||
.catch(() => null)
|
||||
)).then(found => {
|
||||
if (cancelled) return
|
||||
const hits = found.filter(Boolean)
|
||||
// If verification yielded nothing (e.g. gallery unreachable), fall back to
|
||||
// showing the curated list rather than an empty widget.
|
||||
setAvailable(hits.length > 0 ? new Set(hits) : null)
|
||||
})
|
||||
return () => { cancelled = true }
|
||||
}, [candidates])
|
||||
|
||||
const visible = available === null
|
||||
? candidates
|
||||
: candidates.filter(c => available.has(c.name))
|
||||
|
||||
if (visible.length === 0) return null
|
||||
|
||||
const install = async (name) => {
|
||||
setInstalling(prev => new Set(prev).add(name))
|
||||
try {
|
||||
await modelsApi.install(name)
|
||||
addToast?.(t('starters.installStarted', { model: name }), 'success')
|
||||
onInstallStarted?.(name)
|
||||
} catch (err) {
|
||||
addToast?.(t('starters.installFailed', { message: err.message }), 'error')
|
||||
setInstalling(prev => {
|
||||
const next = new Set(prev)
|
||||
next.delete(name)
|
||||
return next
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<section className="home-starters card">
|
||||
<div className="home-starters-head">
|
||||
<strong>{t('starters.title')}</strong>
|
||||
<span className="home-starters-tier">
|
||||
<i className={`fas ${tier.id === 'cpu' ? 'fa-memory' : 'fa-microchip'}`} aria-hidden="true" />
|
||||
{t(`starters.tier.${tier.id}`)}
|
||||
</span>
|
||||
</div>
|
||||
<p className="home-starters-sub">
|
||||
{tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
|
||||
</p>
|
||||
<ul className="home-starters-list">
|
||||
{visible.map(c => {
|
||||
const busy = installing.has(c.name)
|
||||
return (
|
||||
<li key={c.name} className="home-starters-item">
|
||||
<span className="home-starters-name">{c.name}</span>
|
||||
<span className="home-starters-size">{c.size}</span>
|
||||
<button
|
||||
type="button"
|
||||
className="btn btn-primary btn-sm"
|
||||
disabled={busy}
|
||||
onClick={() => install(c.name)}
|
||||
>
|
||||
{busy
|
||||
? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('starters.installing')}</>)
|
||||
: (<><i className="fas fa-download" aria-hidden="true" /> {t('starters.install')}</>)}
|
||||
</button>
|
||||
</li>
|
||||
)
|
||||
})}
|
||||
</ul>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
66
core/http/react-ui/src/hooks/usePolling.js
vendored
66
core/http/react-ui/src/hooks/usePolling.js
vendored
@@ -1,66 +0,0 @@
|
||||
import { useEffect, useRef, useCallback } from 'react'
|
||||
|
||||
// usePolling runs `fn` immediately and then on a fixed interval, with two
|
||||
// behaviours every hand-rolled setInterval in this app was missing:
|
||||
//
|
||||
// 1. Visibility-aware: the timer pauses while the tab is hidden
|
||||
// (document.hidden) and fires an immediate catch-up poll when the tab
|
||||
// becomes visible again. A backgrounded dashboard no longer hammers the
|
||||
// server every few seconds for data nobody is looking at.
|
||||
// 2. Non-overlapping: if `fn` returns a promise that takes longer than the
|
||||
// interval, the next tick waits for it instead of stacking requests.
|
||||
//
|
||||
// `enabled: false` stops polling entirely (one-shot or gated polls). The
|
||||
// returned `refetch` runs `fn` on demand and is stable across renders.
|
||||
export function usePolling(fn, intervalMs = 5000, { enabled = true, immediate = true } = {}) {
|
||||
const fnRef = useRef(fn)
|
||||
fnRef.current = fn
|
||||
|
||||
const runningRef = useRef(false)
|
||||
const refetch = useCallback(async () => {
|
||||
// Guard against overlap: a slow poll shouldn't pile up behind a fast timer.
|
||||
if (runningRef.current) return
|
||||
runningRef.current = true
|
||||
try {
|
||||
return await fnRef.current()
|
||||
} finally {
|
||||
runningRef.current = false
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (!enabled) return
|
||||
let timer = null
|
||||
|
||||
const tick = () => { refetch() }
|
||||
|
||||
const start = () => {
|
||||
if (timer != null) return
|
||||
timer = setInterval(tick, intervalMs)
|
||||
}
|
||||
const stop = () => {
|
||||
if (timer != null) { clearInterval(timer); timer = null }
|
||||
}
|
||||
|
||||
const onVisibility = () => {
|
||||
if (document.hidden) {
|
||||
stop()
|
||||
} else {
|
||||
// Catch up immediately on return, then resume the cadence.
|
||||
tick()
|
||||
start()
|
||||
}
|
||||
}
|
||||
|
||||
if (immediate) tick()
|
||||
if (!document.hidden) start()
|
||||
document.addEventListener('visibilitychange', onVisibility)
|
||||
|
||||
return () => {
|
||||
stop()
|
||||
document.removeEventListener('visibilitychange', onVisibility)
|
||||
}
|
||||
}, [enabled, intervalMs, immediate, refetch])
|
||||
|
||||
return { refetch }
|
||||
}
|
||||
17
core/http/react-ui/src/hooks/useResources.js
vendored
17
core/http/react-ui/src/hooks/useResources.js
vendored
@@ -1,11 +1,11 @@
|
||||
import { useState, useCallback } from 'react'
|
||||
import { useState, useEffect, useCallback, useRef } from 'react'
|
||||
import { resourcesApi } from '../utils/api'
|
||||
import { usePolling } from './usePolling'
|
||||
|
||||
export function useResources(pollInterval = 5000) {
|
||||
const [resources, setResources] = useState(null)
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [error, setError] = useState(null)
|
||||
const intervalRef = useRef(null)
|
||||
|
||||
const fetchResources = useCallback(async () => {
|
||||
try {
|
||||
@@ -19,10 +19,13 @@ export function useResources(pollInterval = 5000) {
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Visibility-aware polling: pauses while the tab is hidden and catches up on
|
||||
// return (see usePolling). Resource stats are pure dashboard data, so there's
|
||||
// no reason to keep fetching them for a backgrounded tab.
|
||||
const { refetch } = usePolling(fetchResources, pollInterval)
|
||||
useEffect(() => {
|
||||
fetchResources()
|
||||
intervalRef.current = setInterval(fetchResources, pollInterval)
|
||||
return () => {
|
||||
if (intervalRef.current) clearInterval(intervalRef.current)
|
||||
}
|
||||
}, [fetchResources, pollInterval])
|
||||
|
||||
return { resources, loading, error, refetch }
|
||||
return { resources, loading, error, refetch: fetchResources }
|
||||
}
|
||||
|
||||
@@ -765,10 +765,8 @@ export default function AgentChat() {
|
||||
className="chat-send-btn"
|
||||
onClick={handleSend}
|
||||
disabled={processing || !input.trim()}
|
||||
aria-label="Send message"
|
||||
title="Send message"
|
||||
>
|
||||
<i className="fas fa-paper-plane" aria-hidden="true" />
|
||||
<i className="fas fa-paper-plane" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1427,10 +1427,8 @@ export default function Chat() {
|
||||
className="chat-send-btn"
|
||||
onClick={handleSend}
|
||||
disabled={!input.trim() && files.length === 0}
|
||||
aria-label={t('input.send')}
|
||||
title={t('input.send')}
|
||||
>
|
||||
<i className="fas fa-paper-plane" aria-hidden="true" />
|
||||
<i className="fas fa-paper-plane" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -10,7 +10,6 @@ import UnifiedMCPDropdown from '../components/UnifiedMCPDropdown'
|
||||
import ConfirmDialog from '../components/ConfirmDialog'
|
||||
import HomeConnect from '../components/HomeConnect'
|
||||
import { useResources } from '../hooks/useResources'
|
||||
import { usePolling } from '../hooks/usePolling'
|
||||
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
|
||||
import { API_CONFIG } from '../utils/config'
|
||||
import { greetingKey } from '../utils/greeting'
|
||||
@@ -18,7 +17,6 @@ import StatusPill from '../components/StatusPill'
|
||||
import Skeleton from '../components/Skeleton'
|
||||
import SectionHeading from '../components/SectionHeading'
|
||||
import EmptyState from '../components/EmptyState'
|
||||
import StarterModels from '../components/StarterModels'
|
||||
import { staggerStyle } from '../hooks/useStagger'
|
||||
|
||||
export default function Home() {
|
||||
@@ -70,36 +68,40 @@ export default function Home() {
|
||||
.catch(() => {})
|
||||
}, [])
|
||||
|
||||
// Poll cluster node data in distributed mode. Visibility-aware + gated on
|
||||
// distributedMode so a non-distributed or backgrounded tab makes no calls.
|
||||
const fetchCluster = useCallback(async () => {
|
||||
try {
|
||||
const data = await nodesApi.list()
|
||||
const nodes = Array.isArray(data) ? data : []
|
||||
const backendNodes = nodes.filter(n => !n.node_type || n.node_type === 'backend')
|
||||
const totalVRAM = backendNodes.reduce((sum, n) => sum + (n.total_vram || 0), 0)
|
||||
const usedVRAM = backendNodes.reduce((sum, n) => {
|
||||
if (n.total_vram && n.available_vram != null) return sum + (n.total_vram - n.available_vram)
|
||||
return sum
|
||||
}, 0)
|
||||
const totalRAM = backendNodes.reduce((sum, n) => sum + (n.total_ram || 0), 0)
|
||||
const usedRAM = backendNodes.reduce((sum, n) => {
|
||||
if (n.total_ram && n.available_ram != null) return sum + (n.total_ram - n.available_ram)
|
||||
return sum
|
||||
}, 0)
|
||||
const isGPU = totalVRAM > 0
|
||||
const healthyCount = backendNodes.filter(n => n.status === 'healthy').length
|
||||
const totalCount = backendNodes.length
|
||||
setClusterData({
|
||||
totalMem: isGPU ? totalVRAM : totalRAM,
|
||||
usedMem: isGPU ? usedVRAM : usedRAM,
|
||||
isGPU,
|
||||
healthyCount,
|
||||
totalCount,
|
||||
})
|
||||
} catch { setClusterData(null) }
|
||||
}, [])
|
||||
usePolling(fetchCluster, 5000, { enabled: distributedMode })
|
||||
// Poll cluster node data in distributed mode
|
||||
useEffect(() => {
|
||||
if (!distributedMode) return
|
||||
const fetchCluster = async () => {
|
||||
try {
|
||||
const data = await nodesApi.list()
|
||||
const nodes = Array.isArray(data) ? data : []
|
||||
const backendNodes = nodes.filter(n => !n.node_type || n.node_type === 'backend')
|
||||
const totalVRAM = backendNodes.reduce((sum, n) => sum + (n.total_vram || 0), 0)
|
||||
const usedVRAM = backendNodes.reduce((sum, n) => {
|
||||
if (n.total_vram && n.available_vram != null) return sum + (n.total_vram - n.available_vram)
|
||||
return sum
|
||||
}, 0)
|
||||
const totalRAM = backendNodes.reduce((sum, n) => sum + (n.total_ram || 0), 0)
|
||||
const usedRAM = backendNodes.reduce((sum, n) => {
|
||||
if (n.total_ram && n.available_ram != null) return sum + (n.total_ram - n.available_ram)
|
||||
return sum
|
||||
}, 0)
|
||||
const isGPU = totalVRAM > 0
|
||||
const healthyCount = backendNodes.filter(n => n.status === 'healthy').length
|
||||
const totalCount = backendNodes.length
|
||||
setClusterData({
|
||||
totalMem: isGPU ? totalVRAM : totalRAM,
|
||||
usedMem: isGPU ? usedVRAM : usedRAM,
|
||||
isGPU,
|
||||
healthyCount,
|
||||
totalCount,
|
||||
})
|
||||
} catch { setClusterData(null) }
|
||||
}
|
||||
fetchCluster()
|
||||
const interval = setInterval(fetchCluster, 5000)
|
||||
return () => clearInterval(interval)
|
||||
}, [distributedMode])
|
||||
|
||||
// Fetch configured models (to know if any exist) and loaded models (currently running)
|
||||
const fetchSystemInfo = useCallback(async () => {
|
||||
@@ -121,7 +123,11 @@ export default function Home() {
|
||||
}
|
||||
}, [])
|
||||
|
||||
usePolling(fetchSystemInfo, 5000)
|
||||
useEffect(() => {
|
||||
fetchSystemInfo()
|
||||
const interval = setInterval(fetchSystemInfo, 5000)
|
||||
return () => clearInterval(interval)
|
||||
}, [fetchSystemInfo])
|
||||
|
||||
// Check MCP availability when selected model changes
|
||||
useEffect(() => {
|
||||
@@ -517,8 +523,6 @@ export default function Home() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<StarterModels addToast={addToast} onInstallStarted={fetchSystemInfo} />
|
||||
|
||||
<div className="home-wizard-actions">
|
||||
<button className="btn btn-primary" onClick={() => navigate('/app/models')}>
|
||||
<i className="fas fa-store" /> {t('wizard.browseGallery')}
|
||||
|
||||
@@ -24,37 +24,7 @@ function formatNumber(n) {
|
||||
return String(n)
|
||||
}
|
||||
|
||||
// Opt-in token pricing. LocalAI is self-hosted and has no inherent monetary
|
||||
// cost, but multi-user deployments use estimated cost for chargeback/budgeting.
|
||||
// Prices are admin-supplied $ per 1M tokens, stored locally (per-browser), and
|
||||
// the whole cost surface stays hidden until a non-zero price is set.
|
||||
const TOKEN_PRICING_KEY = 'localai_token_pricing'
|
||||
|
||||
function loadPricing() {
|
||||
try {
|
||||
const p = JSON.parse(localStorage.getItem(TOKEN_PRICING_KEY) || '{}')
|
||||
return { prompt: Number(p.prompt) || 0, completion: Number(p.completion) || 0 }
|
||||
} catch { return { prompt: 0, completion: 0 } }
|
||||
}
|
||||
|
||||
function savePricing(p) {
|
||||
try { localStorage.setItem(TOKEN_PRICING_KEY, JSON.stringify(p)) } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
function pricingEnabled(p) { return (p?.prompt || 0) > 0 || (p?.completion || 0) > 0 }
|
||||
|
||||
function costOf(row, p) {
|
||||
return (row.prompt_tokens / 1_000_000) * (p.prompt || 0)
|
||||
+ (row.completion_tokens / 1_000_000) * (p.completion || 0)
|
||||
}
|
||||
|
||||
function formatCost(n) {
|
||||
if (!n) return '$0.00'
|
||||
if (n < 0.01) return '<$0.01'
|
||||
return '$' + n.toFixed(2)
|
||||
}
|
||||
|
||||
function StatCard({ icon, label, value, muted, text }) {
|
||||
function StatCard({ icon, label, value, muted }) {
|
||||
return (
|
||||
<div className="card" style={{ padding: 'var(--spacing-sm) var(--spacing-md)', flex: '1 1 0', minWidth: 120, opacity: muted ? 0.7 : 1 }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
|
||||
@@ -62,7 +32,7 @@ function StatCard({ icon, label, value, muted, text }) {
|
||||
<span style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', fontWeight: 500, textTransform: 'uppercase', letterSpacing: '0.03em' }}>{label}</span>
|
||||
</div>
|
||||
<div style={{ fontSize: '1.375rem', fontWeight: 700, fontFamily: 'var(--font-mono)', color: muted ? 'var(--color-text-secondary)' : 'var(--color-text-primary)' }}>
|
||||
{text != null ? text : `${muted ? '~' : ''}${formatNumber(value)}`}
|
||||
{muted ? '~' : ''}{formatNumber(value)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
@@ -672,10 +642,6 @@ export default function Usage() {
|
||||
const [activeTab, setActiveTab] = useState('models')
|
||||
const [quotas, setQuotas] = useState([])
|
||||
const [selectedUserId, setSelectedUserId] = useState(null)
|
||||
const [pricing, setPricingState] = useState(loadPricing)
|
||||
const [showPricing, setShowPricing] = useState(false)
|
||||
const setPricing = (p) => { setPricingState(p); savePricing(p) }
|
||||
const costEnabled = pricingEnabled(pricing)
|
||||
|
||||
const fetchUsage = useCallback(async () => {
|
||||
setLoading(true)
|
||||
@@ -777,50 +743,11 @@ export default function Usage() {
|
||||
<i className="fas fa-key" style={{ fontSize: '0.7rem' }} /> {t('usage.sources.tab')}
|
||||
</button>
|
||||
<div style={{ flex: 1 }} />
|
||||
<button
|
||||
className={`btn btn-sm ${costEnabled ? 'btn-primary' : 'btn-secondary'}`}
|
||||
onClick={() => setShowPricing(v => !v)}
|
||||
style={{ gap: 4 }}
|
||||
title="Set token pricing to estimate cost"
|
||||
>
|
||||
<i className="fas fa-dollar-sign" /> {costEnabled ? 'Pricing' : 'Set pricing'}
|
||||
</button>
|
||||
<button className="btn btn-secondary btn-sm" onClick={fetchUsage} disabled={loading} style={{ gap: 4 }}>
|
||||
<i className={`fas fa-rotate${loading ? ' fa-spin' : ''}`} /> Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{showPricing && (
|
||||
<div className="card" style={{ display: 'flex', alignItems: 'flex-end', gap: 'var(--spacing-md)', flexWrap: 'wrap', padding: 'var(--spacing-md)', marginBottom: 'var(--spacing-md)' }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Prompt $/1M tokens</label>
|
||||
<input
|
||||
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
|
||||
value={pricing.prompt || ''}
|
||||
placeholder="0.00"
|
||||
onChange={e => setPricing({ ...pricing, prompt: Number(e.target.value) || 0 })}
|
||||
/>
|
||||
</div>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Completion $/1M tokens</label>
|
||||
<input
|
||||
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
|
||||
value={pricing.completion || ''}
|
||||
placeholder="0.00"
|
||||
onChange={e => setPricing({ ...pricing, completion: Number(e.target.value) || 0 })}
|
||||
/>
|
||||
</div>
|
||||
{costEnabled && (
|
||||
<button className="btn btn-secondary btn-sm" onClick={() => setPricing({ prompt: 0, completion: 0 })} style={{ gap: 4 }}>
|
||||
<i className="fas fa-times" /> Clear
|
||||
</button>
|
||||
)}
|
||||
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', flex: '1 1 200px' }}>
|
||||
Estimated cost only. Prices are stored in this browser and applied to recorded token counts.
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{loading ? (
|
||||
<div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}>
|
||||
<LoadingSpinner size="lg" />
|
||||
@@ -833,9 +760,6 @@ export default function Usage() {
|
||||
<StatCard icon="fas fa-arrow-up" label="Prompt" value={displayTotals.prompt_tokens} />
|
||||
<StatCard icon="fas fa-arrow-down" label="Completion" value={displayTotals.completion_tokens} />
|
||||
<StatCard icon="fas fa-coins" label="Total" value={displayTotals.total_tokens} />
|
||||
{costEnabled && (
|
||||
<StatCard icon="fas fa-dollar-sign" label="Est. Cost" text={formatCost(costOf(displayTotals, pricing))} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Predictions */}
|
||||
@@ -865,7 +789,6 @@ export default function Usage() {
|
||||
<th style={{ width: 110 }}>Prompt</th>
|
||||
<th style={{ width: 110 }}>Completion</th>
|
||||
<th style={{ width: 110 }}>Total</th>
|
||||
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
|
||||
<th style={{ width: 140 }}></th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -877,7 +800,6 @@ export default function Usage() {
|
||||
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
||||
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
||||
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
||||
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
|
||||
<td><UsageBar value={row.total_tokens} max={maxTokens} /></td>
|
||||
</tr>
|
||||
))}
|
||||
@@ -905,7 +827,6 @@ export default function Usage() {
|
||||
<th style={{ width: 110 }}>Prompt</th>
|
||||
<th style={{ width: 110 }}>Completion</th>
|
||||
<th style={{ width: 110 }}>Total</th>
|
||||
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
|
||||
<th style={{ width: 110 }}>Proj. Total</th>
|
||||
<th style={{ width: 140 }}></th>
|
||||
</tr>
|
||||
@@ -928,7 +849,6 @@ export default function Usage() {
|
||||
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
||||
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
||||
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
||||
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
|
||||
<td style={{ ...monoCell, color: 'var(--color-text-muted)', fontStyle: 'italic' }}>
|
||||
{up?.predictions ? `~${formatNumber(up.predictions.projectedTotals.total_tokens)}` : '-'}
|
||||
</td>
|
||||
@@ -936,7 +856,7 @@ export default function Usage() {
|
||||
</tr>
|
||||
{isExpanded && up && (
|
||||
<tr>
|
||||
<td colSpan={costEnabled ? 9 : 8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}>
|
||||
<td colSpan={8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}>
|
||||
<div style={{ padding: 'var(--spacing-md)' }}>
|
||||
{up.predictions && (
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(100px, 1fr))', gap: 'var(--spacing-xs)', marginBottom: 'var(--spacing-sm)' }}>
|
||||
|
||||
@@ -268,7 +268,7 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
|
||||
// Set up OAuth manager when any OAuth/OIDC provider is configured
|
||||
if appConfig.Auth.GitHubClientID != "" || appConfig.Auth.OIDCClientID != "" {
|
||||
oauthMgr, err := auth.NewOAuthManager(
|
||||
appConfig.ExternalBaseURL,
|
||||
appConfig.Auth.BaseURL,
|
||||
auth.OAuthParams{
|
||||
GitHubClientID: appConfig.Auth.GitHubClientID,
|
||||
GitHubClientSecret: appConfig.Auth.GitHubClientSecret,
|
||||
|
||||
@@ -79,29 +79,21 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
|
||||
}).Create(op).Error
|
||||
}
|
||||
|
||||
// UpdateProgress updates progress for an operation. The cancellable flag is
|
||||
// persisted on every tick so a replica that restarts mid-install rehydrates the
|
||||
// op as still cancellable — otherwise the column keeps its Create-time zero
|
||||
// value (false), the UI hides the cancel button, and the orphaned op can only
|
||||
// be dismissed by waiting for the 30-minute stale reaper.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
|
||||
// UpdateProgress updates progress for an operation.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
|
||||
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||
"progress": progress,
|
||||
"message": message,
|
||||
"downloaded_file_size": downloadedSize,
|
||||
"cancellable": cancellable,
|
||||
"updated_at": time.Now(),
|
||||
}).Error
|
||||
}
|
||||
|
||||
// UpdateStatus updates the status of an operation. A terminal status is never
|
||||
// cancellable, so the flag is cleared here to keep the persisted row consistent
|
||||
// with what the UI should offer.
|
||||
// UpdateStatus updates the status of an operation.
|
||||
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
|
||||
updates := map[string]any{
|
||||
"status": status,
|
||||
"cancellable": false,
|
||||
"updated_at": time.Now(),
|
||||
"status": status,
|
||||
"updated_at": time.Now(),
|
||||
}
|
||||
if errMsg != "" {
|
||||
updates["error"] = errMsg
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
package galleryop_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// Reproduces "an in-flight install can't be cancelled after a restart". The
|
||||
// live install path marks OpStatus.Cancellable=true on every progress tick, but
|
||||
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
|
||||
// cancellable flag, and Create defaulted it to false. So after a replica
|
||||
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
|
||||
// reported cancellable:false, and the UI hid the cancel button — the orphaned
|
||||
// op lingered until the 30-minute stale reaper expired it. The cancellable
|
||||
// state must be persisted so a rehydrated in-flight op stays cancellable.
|
||||
var _ = Describe("GalleryService cancellable persistence across restart", func() {
|
||||
It("rehydrates an in-flight op as still cancellable", func() {
|
||||
db := testutil.SetupTestDB()
|
||||
store, err := distributed.NewGalleryStore(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
svc.SetGalleryStore(store)
|
||||
|
||||
// Seed the in-flight op row as the worker goroutine does on admission.
|
||||
Expect(store.Create(&distributed.GalleryOperationRecord{
|
||||
ID: "op-inflight",
|
||||
GalleryElementName: "llama-cpp-development",
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
})).To(Succeed())
|
||||
|
||||
// Simulate a progress tick: the live path always marks installs
|
||||
// cancellable while they are downloading/processing.
|
||||
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
|
||||
Message: "downloading",
|
||||
Progress: 25,
|
||||
Cancellable: true,
|
||||
})
|
||||
|
||||
// A fresh replica boots and hydrates from the store.
|
||||
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
fresh.SetGalleryStore(store)
|
||||
Expect(fresh.Hydrate()).To(Succeed())
|
||||
|
||||
st := fresh.GetStatus("op-inflight")
|
||||
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
|
||||
Expect(st.Cancellable).To(BeTrue(),
|
||||
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
|
||||
})
|
||||
})
|
||||
@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
|
||||
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
|
||||
}
|
||||
} else {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
|
||||
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
|
||||
}
|
||||
}
|
||||
@@ -467,7 +467,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
Cancellable: true,
|
||||
})
|
||||
}
|
||||
err := g.backendHandler(&op, systemState)
|
||||
@@ -500,8 +499,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: opType,
|
||||
Status: "pending",
|
||||
// A delete is not cancellable; an install is.
|
||||
Cancellable: !op.Delete,
|
||||
})
|
||||
}
|
||||
err := g.modelHandler(&op, cl, systemState)
|
||||
|
||||
@@ -19,40 +19,25 @@ import (
|
||||
// Per-replica: a single tracker instance is bound to (nodeID, modelName, replicaIndex).
|
||||
// The router constructs one tracker per Route() result, so each in-flight tick lands
|
||||
// on the correct row even when multiple replicas of the same model live on the same node.
|
||||
//
|
||||
// Embedding only grpc.ControlBackend (not the whole grpc.Backend) is what makes
|
||||
// the in-flight accounting safe by construction: the control-plane methods pass
|
||||
// through untracked, while every grpc.InferenceBackend method must be declared
|
||||
// explicitly below to satisfy grpc.Backend. Adding an inference method to the
|
||||
// interface therefore breaks this file's build (see the var assertion below)
|
||||
// until it is wrapped with track() - so a new inference path can't be added
|
||||
// without an in-flight accounting decision.
|
||||
type InFlightTrackingClient struct {
|
||||
grpc.ControlBackend // passthrough for control-plane / streaming-constructor methods
|
||||
inner grpc.InferenceBackend // tracked inference methods delegate here
|
||||
registry InFlightTracker
|
||||
nodeID string
|
||||
modelName string
|
||||
replicaIndex int
|
||||
grpc.Backend // embed for passthrough of untracked methods
|
||||
registry InFlightTracker
|
||||
nodeID string
|
||||
modelName string
|
||||
replicaIndex int
|
||||
|
||||
firstOnce sync.Once // guards onFirstComplete
|
||||
onFirstComplete func() // called once after the first tracked inference call completes
|
||||
}
|
||||
|
||||
// Compile-time contract: *InFlightTrackingClient must implement the FULL backend
|
||||
// surface. Because it embeds only ControlBackend, this fails to compile if any
|
||||
// InferenceBackend method is left unwrapped.
|
||||
var _ grpc.Backend = (*InFlightTrackingClient)(nil)
|
||||
|
||||
// NewInFlightTrackingClient wraps a gRPC backend client with in-flight tracking.
|
||||
func NewInFlightTrackingClient(inner grpc.Backend, registry InFlightTracker, nodeID, modelName string, replicaIndex int) *InFlightTrackingClient {
|
||||
return &InFlightTrackingClient{
|
||||
ControlBackend: inner,
|
||||
inner: inner,
|
||||
registry: registry,
|
||||
nodeID: nodeID,
|
||||
modelName: modelName,
|
||||
replicaIndex: replicaIndex,
|
||||
Backend: inner,
|
||||
registry: registry,
|
||||
nodeID: nodeID,
|
||||
modelName: modelName,
|
||||
replicaIndex: replicaIndex,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,162 +91,154 @@ func (c *InFlightTrackingClient) reconcile(err error) error {
|
||||
|
||||
func (c *InFlightTrackingClient) Predict(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.Reply, error) {
|
||||
defer c.track(ctx)()
|
||||
reply, err := c.inner.Predict(ctx, in, opts...)
|
||||
reply, err := c.Backend.Predict(ctx, in, opts...)
|
||||
return reply, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.inner.PredictStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.Backend.PredictStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Embeddings(ctx, in, opts...)
|
||||
res, err := c.Backend.Embeddings(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.GenerateImage(ctx, in, opts...)
|
||||
res, err := c.Backend.GenerateImage(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.GenerateVideo(ctx, in, opts...)
|
||||
res, err := c.Backend.GenerateVideo(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TTS(ctx context.Context, in *pb.TTSRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.TTS(ctx, in, opts...)
|
||||
res, err := c.Backend.TTS(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.inner.TTSStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.Backend.TTSStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.SoundGeneration(ctx, in, opts...)
|
||||
res, err := c.Backend.SoundGeneration(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...ggrpc.CallOption) (*pb.TranscriptResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.AudioTranscription(ctx, in, opts...)
|
||||
res, err := c.Backend.AudioTranscription(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.inner.AudioTranscriptionStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.Backend.AudioTranscriptionStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOptions, opts ...ggrpc.CallOption) (*pb.DetectResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Detect(ctx, in, opts...)
|
||||
res, err := c.Backend.Detect(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Depth(ctx, in, opts...)
|
||||
res, err := c.Backend.Depth(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Rerank(ctx, in, opts...)
|
||||
res, err := c.Backend.Rerank(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VAD(ctx context.Context, in *pb.VADRequest, opts ...ggrpc.CallOption) (*pb.VADResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.VAD(ctx, in, opts...)
|
||||
res, err := c.Backend.VAD(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...ggrpc.CallOption) (*pb.DiarizeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Diarize(ctx, in, opts...)
|
||||
res, err := c.Backend.Diarize(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.FaceVerify(ctx, in, opts...)
|
||||
res, err := c.Backend.FaceVerify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.FaceAnalyzeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.FaceAnalyze(ctx, in, opts...)
|
||||
res, err := c.Backend.FaceAnalyze(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...ggrpc.CallOption) (*pb.VoiceVerifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.VoiceVerify(ctx, in, opts...)
|
||||
res, err := c.Backend.VoiceVerify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.VoiceAnalyzeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.VoiceAnalyze(ctx, in, opts...)
|
||||
res, err := c.Backend.VoiceAnalyze(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...ggrpc.CallOption) (*pb.VoiceEmbedResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.VoiceEmbed(ctx, in, opts...)
|
||||
res, err := c.Backend.VoiceEmbed(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...ggrpc.CallOption) (*pb.TokenClassifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.TokenClassify(ctx, in, opts...)
|
||||
res, err := c.Backend.TokenClassify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Score(ctx context.Context, in *pb.ScoreRequest, opts ...ggrpc.CallOption) (*pb.ScoreResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.Score(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...ggrpc.CallOption) (*pb.SoundDetectionResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.SoundDetection(ctx, in, opts...)
|
||||
res, err := c.Backend.Score(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...ggrpc.CallOption) (*pb.AudioEncodeResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.AudioEncode(ctx, in, opts...)
|
||||
res, err := c.Backend.AudioEncode(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...ggrpc.CallOption) (*pb.AudioDecodeResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.AudioDecode(ctx, in, opts...)
|
||||
res, err := c.Backend.AudioDecode(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...ggrpc.CallOption) (*pb.AudioTransformResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.AudioTransform(ctx, in, opts...)
|
||||
res, err := c.Backend.AudioTransform(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
// AudioTransformStream, AudioToAudioStream and Forward live in grpc.ControlBackend
|
||||
// and are passed through via the embedded field, NOT tracked: they return a stream
|
||||
// client and the inference spans the stream's lifetime, not the constructor call.
|
||||
// Wrapping the constructor with track() would increment and immediately decrement
|
||||
// (and fire onFirstComplete) before any audio flows. Tracking those correctly needs
|
||||
// the done() func tied to stream close, which the Backend interface doesn't surface
|
||||
// here. If they ever need tracking, move them to grpc.InferenceBackend - the build
|
||||
// will then force an explicit wrapper here.
|
||||
// AudioTransformStream, AudioToAudioStream and Forward are deliberately left as
|
||||
// embedded passthrough: they return a stream client and the inference spans the
|
||||
// stream's lifetime, not the constructor call. Wrapping the constructor with
|
||||
// track() would increment and immediately decrement (and fire onFirstComplete)
|
||||
// before any audio flows. Tracking those correctly needs the done() func tied to
|
||||
// stream close, which the current Backend interface doesn't surface here.
|
||||
|
||||
@@ -408,13 +408,6 @@ var _ = Describe("InFlightTrackingClient", func() {
|
||||
return err
|
||||
})
|
||||
})
|
||||
|
||||
It("SoundDetection", func() {
|
||||
assertTracked(func() error {
|
||||
_, err := client.SoundDetection(context.Background(), &pb.SoundDetectionRequest{})
|
||||
return err
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Describe("stale model reload (self-heal)", func() {
|
||||
|
||||
@@ -14,26 +14,6 @@ When running LocalAI behind a TLS termination reverse proxy, the Web UI may fail
|
||||
|
||||
LocalAI uses the `X-Forwarded-Proto` HTTP header to determine the protocol used by clients. When this header is set to `https`, LocalAI will generate HTTPS URLs for static assets in the Web UI.
|
||||
|
||||
## Running behind a reverse proxy (HTTPS / subpath)
|
||||
|
||||
LocalAI does not terminate TLS itself, so HTTPS is provided by a reverse
|
||||
proxy in front of it. Self-referential links (generated image and video
|
||||
URLs, async job status URLs, OAuth callbacks) need the externally visible
|
||||
scheme, host and port.
|
||||
|
||||
LocalAI determines these in this order:
|
||||
|
||||
1. `LOCALAI_BASE_URL` - if set, it is authoritative for the origin. Set it to
|
||||
the externally visible base URL, e.g. `LOCALAI_BASE_URL=https://localai.example.com`
|
||||
or `https://192.168.0.13:34567`. Recommended whenever links come back with
|
||||
the wrong scheme or host.
|
||||
2. Otherwise, the `X-Forwarded-Proto` and `X-Forwarded-Host` headers (or the
|
||||
RFC 7239 `Forwarded` header) sent by the proxy. Ensure your proxy forwards
|
||||
`X-Forwarded-Proto: https`.
|
||||
|
||||
A reverse-proxy subpath mount is supported via `X-Forwarded-Prefix`; it is
|
||||
appended to `LOCALAI_BASE_URL` when both are present.
|
||||
|
||||
## Required Headers
|
||||
|
||||
Your reverse proxy must forward these headers to LocalAI:
|
||||
|
||||
@@ -185,13 +185,6 @@ It is persisted through `POST /api/settings` and read live, so a change takes
|
||||
effect on the next request without a restart. A default that names a model no
|
||||
longer loaded still appears (marked *not loaded*) so it can be toggled off.
|
||||
|
||||
The default set can also be supplied out-of-band with the
|
||||
`LOCALAI_PII_DEFAULT_DETECTORS` environment variable (comma-separated model
|
||||
names, e.g. `privacy-filter-nemotron,secret-filter`). When set it takes
|
||||
precedence over the value persisted via the UI (env > file), which is the
|
||||
right behaviour for immutable container deployments that pin filtering policy
|
||||
at boot rather than via the admin UI.
|
||||
|
||||
This is what makes `cloud-proxy` / MITM redaction work out of the box: those
|
||||
backends default to PII-enabled but ship no detector list, so without a
|
||||
default detector the filter runs with nothing to scan. Set one here and
|
||||
|
||||
@@ -68,33 +68,6 @@ pipeline:
|
||||
|
||||
This is applied only to the realtime session's copy of the LLM config, so it does not affect other users of the same model. Leave it unset to use the LLM model config's own reasoning settings.
|
||||
|
||||
### Conversation compaction (long sessions on CPU)
|
||||
|
||||
By default a realtime session feeds only the last `max_history_items` turns to the LLM; older turns are dropped and forgotten. On CPU, long calls also grow expensive as the prompt fills with verbatim history. Enable `compaction` to instead fold older turns into a rolling summary, so long calls stay cheap without losing earlier context.
|
||||
|
||||
Compaction works with two numbers:
|
||||
|
||||
- **`max_history_items`** is the *live window* — the recent turns kept verbatim in the prompt.
|
||||
- **`compaction.trigger_items`** is the *high-water mark* — let the buffer grow to here, then summarize the overflow (everything above `max_history_items`) into a rolling memory and evict it. It must be greater than `max_history_items`; if it is not, it is clamped up.
|
||||
|
||||
The gap between the two controls how often summarization runs: a summary call fires roughly every `(trigger_items - max_history_items)` turns (here, about every 6 turns).
|
||||
|
||||
```yaml
|
||||
pipeline:
|
||||
max_history_items: 6 # live window — recent turns kept verbatim
|
||||
compaction:
|
||||
enabled: true
|
||||
trigger_items: 12 # summarize overflow back down to max_history_items
|
||||
summary_model: "" # optional: a small model for the summary (CPU); default = pipeline LLM
|
||||
max_summary_tokens: 512
|
||||
```
|
||||
|
||||
{{% notice tip %}}
|
||||
On CPU, set `summary_model` to a small, fast model so compaction never competes with the conversation LLM for compute. Left empty, the pipeline's own LLM produces the summary.
|
||||
{{% /notice %}}
|
||||
|
||||
Clients can also manage history directly via the now-supported `conversation.item.delete`, `conversation.item.truncate`, and `input_audio_buffer.clear` realtime events.
|
||||
|
||||
## Transports
|
||||
|
||||
The Realtime API supports two transports: **WebSocket** and **WebRTC**.
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.5.0"
|
||||
"version": "v4.4.3"
|
||||
}
|
||||
|
||||
@@ -1,208 +1,4 @@
|
||||
---
|
||||
- name: "lfm2.5-1.2b-instruct"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF
|
||||
description: "Try LFM • Docs • LEAP • Discord\n\n# LFM2.5-1.2B-Instruct\n\nLFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.\n\n - **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.\n - **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.\n - **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.\n\nFind more information about LFM2.5 in our blog post.\n\n## \U0001F5D2️ Model Details\n\nLFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:\n\n...\n"
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/dxnYF2fuLpulismtFSGFi.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.15
|
||||
model: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
repeat_penalty: 1.05
|
||||
temperature: 0.1
|
||||
top_k: 50
|
||||
top_p: 0.1
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
sha256: b1b3de114215d9507409a662a501a631095a479a419584e8a2ded6304b19b4f5
|
||||
uri: https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
- name: "qwopus3.6-27b-coder-compat-mtp"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
license: "apache-2.0"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- vision
|
||||
- multimodal
|
||||
- reasoning
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
- spec_type:draft-mtp
|
||||
- spec_n_max:6
|
||||
- spec_p_min:0.75
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
sha256: f893632170124da60e159b7bcc9d91e1cda3014b2c6b8ad9c6cde38a1fcd2f6f
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "kimi-k2.7-code"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF
|
||||
description: |
|
||||
## 1. Model Introduction
|
||||
|
||||
Kimi K2.7 Code is a coding-focused agentic model built upon Kimi K2.6. With substantial improvements on real-world long-horizon coding tasks, it strengthens end-to-end task completion across complex software engineering workflows while improving token efficiency, reducing thinking-token usage by approximately 30% compared with Kimi K2.6.
|
||||
|
||||
## 2. Model Summary
|
||||
|
||||
## 3. Evaluation Results
|
||||
|
||||
Benchmark
|
||||
Kimi K2.6
|
||||
Kimi K2.7 Code
|
||||
GPT-5.5
|
||||
Claude Opus 4.8
|
||||
|
||||
Coding
|
||||
|
||||
Kimi Code Bench v2
|
||||
50.9
|
||||
62.0
|
||||
69.0
|
||||
67.4
|
||||
|
||||
Program Bench
|
||||
48.3
|
||||
53.6
|
||||
69.1
|
||||
63.8
|
||||
|
||||
MLS Bench Lite
|
||||
26.7
|
||||
35.1
|
||||
35.5
|
||||
42.8
|
||||
|
||||
Agentic
|
||||
|
||||
Kimi Claw 24/7 Bench
|
||||
42.9
|
||||
46.9
|
||||
52.8
|
||||
50.4
|
||||
|
||||
MCP Atlas
|
||||
69.4
|
||||
76.0
|
||||
79.4
|
||||
81.3
|
||||
|
||||
MCP Mark Verified
|
||||
72.8
|
||||
81.1
|
||||
92.9
|
||||
76.4
|
||||
|
||||
Footnotes
|
||||
|
||||
...
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://huggingface.co/moonshotai/Kimi-K2.7-Code/resolve/main/figures/kimi-logo.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.01
|
||||
model: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
repeat_penalty: 1
|
||||
temperature: 0.6
|
||||
top_k: -1
|
||||
top_p: 0.95
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
sha256: 65f0aca336f876902323a90e2aff32cac76d071b2cdd818c6a8d78be8fc2c680
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
sha256: 40f4416c130827a11502778891f4ef95b2144db90f51d63aa3548d0952a39683
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
sha256: ba2ba0b5168784ace7c752ecadfc3631279b2bb023824cb0fe9e2dab3dd28f22
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
sha256: 10298a6c98b13ef49be286fefbea8663e16473fb69bbeabe153bc80c60ae116e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
sha256: 8e9e4c8e35d34fc4fef6bfb65a715ad7defbd196970d833c1df6924d701c88b3
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
sha256: ccff6e7f299742f82cf6f51a871e3eb3167511efaee967477cc8387f54d16442
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
sha256: 1a3b639633a2d22f71156a9f643ded2329cdd969cc21177b644b5741bac1af8e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
sha256: bde28f682a1eab973538b2102007d952f37a13c1f7d55e2ed99177445ddc4282
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
sha256: b6a23a95b61e100f7593fa75e2363966323fa767b7e4fdf45d963b59e8fdc69f
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
sha256: fb10231c2e6d76921d40f22690f4aa08a8090c708edeaf7e581abafc24d3b25c
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
sha256: d2290be7ed1a22ac1f9f8a4813389689e075ce2ab8abc3aaaa1157a3cb1462d8
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
sha256: ce0d028314aa3fc783082dbca097e1055d69686a17ab8306574e2949568f26a5
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
sha256: 217864ce63a1d130ab39dcb0996b6097e1aa78eb896e38efaefdbbac3a00b7ec
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
sha256: eb7582ad7066c5eaa01bde95acb00b4ad9cd7b07cd50a6cf5c9ee427258bc9dd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
- filename: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
sha256: b2cc50c8c13fe70fc4968a83332f31e9007ea09ebb9ae91d46a4e4cd2a3053cd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "qwythos-9b-claude-mythos-5-1m"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
@@ -253,7 +49,33 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/GLM-5.2-GGUF
|
||||
description: "# GLM-5.2\n\n\U0001F44B Join our WeChat or Discord community.\n\n\U0001F4D6 Check out the GLM-5.2 blog and GLM-5 Technical report.\n\n\U0001F4CD Use GLM-5.2 API services on Z.ai API Platform.\n\n\U0001F51C Try GLM-5.2 here.\n\n[Paper]\n[GitHub]\n\n## Introduction\n\nWe're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:\n - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work\n - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency\n - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%\n - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders\n\n## Benchmark\n\n## Serve GLM-5.2 Locally\n\n...\n"
|
||||
description: |
|
||||
# GLM-5.2
|
||||
|
||||
👋 Join our WeChat or Discord community.
|
||||
|
||||
📖 Check out the GLM-5.2 blog and GLM-5 Technical report.
|
||||
|
||||
📍 Use GLM-5.2 API services on Z.ai API Platform.
|
||||
|
||||
🔜 Try GLM-5.2 here.
|
||||
|
||||
[Paper]
|
||||
[GitHub]
|
||||
|
||||
## Introduction
|
||||
|
||||
We're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:
|
||||
- **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work
|
||||
- **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency
|
||||
- **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%
|
||||
- **Pure Open**: An MIT open-source license — no regional limits, technical access without borders
|
||||
|
||||
## Benchmark
|
||||
|
||||
## Serve GLM-5.2 Locally
|
||||
|
||||
...
|
||||
license: "mit"
|
||||
tags:
|
||||
- llm
|
||||
@@ -376,7 +198,26 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-v2-MTP-NVFP4-GGUF
|
||||
description: "\U0001FA90 Qwopus3.6-27B-v2-MTP\nMTP Release\n\nMulti-Token Prediction reasoning model fine-tuned from Qwen3.6-27B\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Parameters\n⚡ Speculative Decoding\n\U0001F6E0️ Coding / DevOps / Math\n\n\U0001F4A1 What is Qwopus3.6-27B-v2-MTP?\n\U0001FA90 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.\n\n⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.\n\U0001F9E9 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.\n\U0001F9EA GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.\n\U0001F680 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.\n\n...\n"
|
||||
description: |
|
||||
🪐 Qwopus3.6-27B-v2-MTP
|
||||
MTP Release
|
||||
|
||||
Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Parameters
|
||||
⚡ Speculative Decoding
|
||||
🛠️ Coding / DevOps / Math
|
||||
|
||||
💡 What is Qwopus3.6-27B-v2-MTP?
|
||||
🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.
|
||||
|
||||
⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.
|
||||
🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.
|
||||
🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.
|
||||
🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.
|
||||
|
||||
...
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -402,7 +243,28 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-Coder-MTP-NVFP4-GGUF
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
description: |
|
||||
🪐 Qwopus-3.6-27B-Coder
|
||||
Coder SFT Release
|
||||
|
||||
Agentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Dense Model
|
||||
⚡ Agentic Coding
|
||||
🛠️ Tool Calling & Agent
|
||||
🏆 SWE-bench Verified: 67.0% (off-thinking)
|
||||
|
||||
💡 What is Qwopus-3.6-27B-Coder?
|
||||
🪐 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.
|
||||
|
||||
🧩 Agentic Coding
|
||||
Optimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.
|
||||
|
||||
🛠️ Tool Calling
|
||||
Learns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.
|
||||
|
||||
...
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -825,8 +687,8 @@
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-MTP-GGUF/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf
|
||||
sha256: b2898667ed7b2388f0ab7691393833ae777f247492bbe62fdb4b2bd3e3cf3f79
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf
|
||||
sha256: b2b9180093496da2e00439e3fa23227c591355901bfa579bc6897bbc01b755ef
|
||||
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-MTP-GGUF/mmproj-F32.gguf
|
||||
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/mmproj-F32.gguf
|
||||
@@ -1622,8 +1484,8 @@
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 31cf5fc2406a0c7aaebcc26d440bf0df94e215d0589d5205bf319649c052b50a
|
||||
- name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -41,34 +41,11 @@ func buildClient(address string, parallel bool, wd WatchDog, enableWatchDog bool
|
||||
}
|
||||
}
|
||||
|
||||
// Backend is the full client surface of a model backend. It is deliberately
|
||||
// composed of two sub-interfaces so that wrappers can get a COMPILE-TIME
|
||||
// guarantee about which methods they must account for:
|
||||
//
|
||||
// - InferenceBackend - methods that each perform one discrete inference call
|
||||
// (the call begins on entry and ends on return). A wrapper that does
|
||||
// per-call accounting - e.g. the distributed router's in-flight tracker,
|
||||
// core/services/nodes.InFlightTrackingClient - embeds only ControlBackend
|
||||
// and implements every InferenceBackend method explicitly. Adding a method
|
||||
// to InferenceBackend therefore breaks that wrapper's build until it is
|
||||
// implemented: inference can't be added without an accounting decision.
|
||||
// - ControlBackend - everything that is NOT a discrete inference call:
|
||||
// lifecycle/control-plane operations and the streaming constructors whose
|
||||
// work spans the returned stream rather than the constructor call. These
|
||||
// are safe to pass through untracked.
|
||||
//
|
||||
// Keep the two sets disjoint; every backend method belongs to exactly one.
|
||||
type Backend interface {
|
||||
InferenceBackend
|
||||
ControlBackend
|
||||
}
|
||||
|
||||
// InferenceBackend is the subset of Backend whose methods each map to a single
|
||||
// inference call. Wrappers that account for in-flight work must implement these
|
||||
// explicitly (see Backend). Do NOT add methods that return a stream client or
|
||||
// that are control-plane only - those belong in ControlBackend.
|
||||
type InferenceBackend interface {
|
||||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
@@ -76,8 +53,6 @@ type InferenceBackend interface {
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
|
||||
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
|
||||
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
|
||||
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
|
||||
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
|
||||
@@ -85,25 +60,8 @@ type InferenceBackend interface {
|
||||
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)
|
||||
VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...grpc.CallOption) (*pb.VoiceAnalyzeResponse, error)
|
||||
VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...grpc.CallOption) (*pb.VoiceEmbedResponse, error)
|
||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
|
||||
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
|
||||
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
|
||||
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
|
||||
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
|
||||
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
|
||||
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
|
||||
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
|
||||
}
|
||||
|
||||
// ControlBackend is the subset of Backend that is NOT per-call inference:
|
||||
// lifecycle/control-plane operations and the streaming constructors whose work
|
||||
// spans the returned stream rather than the constructor call. In-flight-tracking
|
||||
// wrappers embed this directly and pass it through untracked (see Backend).
|
||||
type ControlBackend interface {
|
||||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
|
||||
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
|
||||
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
|
||||
@@ -112,11 +70,24 @@ type ControlBackend interface {
|
||||
StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
|
||||
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
|
||||
|
||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||
|
||||
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
|
||||
|
||||
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
|
||||
|
||||
GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
|
||||
|
||||
// Streaming constructors: these return a stream client immediately; the
|
||||
// actual inference spans the stream's lifetime, not this call, so they are
|
||||
// NOT tracked as a single in-flight unit.
|
||||
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
|
||||
|
||||
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
|
||||
|
||||
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
|
||||
|
||||
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
|
||||
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
|
||||
|
||||
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
|
||||
AudioTransformStream(ctx context.Context, opts ...grpc.CallOption) (AudioTransformStreamClient, error)
|
||||
AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error)
|
||||
|
||||
|
||||
@@ -53,13 +53,12 @@ var _ = Describe("Gallery Distributed", Label("Distributed"), func() {
|
||||
Expect(retrieved.Status).To(Equal("downloading"))
|
||||
Expect(retrieved.FrontendID).To(Equal("f1"))
|
||||
|
||||
// Update progress (cancellable: a downloading install can be cancelled)
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB", true)).To(Succeed())
|
||||
// Update progress
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB")).To(Succeed())
|
||||
|
||||
updated, _ := galleryStore.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.75, 0.01))
|
||||
Expect(updated.Message).To(Equal("75% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
|
||||
// Complete
|
||||
Expect(galleryStore.UpdateStatus(op.ID, "completed", "")).To(Succeed())
|
||||
|
||||
@@ -104,12 +104,11 @@ var _ = Describe("Phase 4: MCP, Skills, Gallery, Fine-Tuning", Label("Distribute
|
||||
}
|
||||
stores.Gallery.Create(op)
|
||||
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB", true)).To(Succeed())
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB")).To(Succeed())
|
||||
|
||||
updated, _ := stores.Gallery.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.5, 0.01))
|
||||
Expect(updated.Message).To(Equal("50% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should deduplicate concurrent downloads", func() {
|
||||
|
||||
Reference in New Issue
Block a user