mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 09:09:07 -04:00
Compare commits
24 Commits
feat/recon
...
v4.5.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
deb430f3ec | ||
|
|
dd8c8778e2 | ||
|
|
06a7b6cadb | ||
|
|
67c8889866 | ||
|
|
1d49041c85 | ||
|
|
2edc4e25b3 | ||
|
|
7888067914 | ||
|
|
9eedbf537a | ||
|
|
69c16481c8 | ||
|
|
56f8a6623f | ||
|
|
4755d676a3 | ||
|
|
10184b5e28 | ||
|
|
fdf475ec5f | ||
|
|
9d54a599b0 | ||
|
|
63bcbf6c12 | ||
|
|
95b058e1c5 | ||
|
|
f2abcc7503 | ||
|
|
62c99c10b3 | ||
|
|
7226bb9f30 | ||
|
|
569d9bbd9e | ||
|
|
682fb2718c | ||
|
|
20c643e1f6 | ||
|
|
64a4351f3a | ||
|
|
b7d67f5779 |
2
.github/workflows/backend.yml
vendored
2
.github/workflows/backend.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
|||||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
|
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
|
|||||||
2
.github/workflows/backend_build.yml
vendored
2
.github/workflows/backend_build.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/backend_build_darwin.yml
vendored
2
.github/workflows/backend_build_darwin.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
|||||||
HOMEBREW_NO_ANALYTICS: '1'
|
HOMEBREW_NO_ANALYTICS: '1'
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/backend_merge.yml
vendored
2
.github/workflows/backend_merge.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
||||||
# keepalive cleanup script) but none of the source tree.
|
# keepalive cleanup script) but none of the source tree.
|
||||||
- name: Checkout (.github/scripts only)
|
- name: Checkout (.github/scripts only)
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
sparse-checkout: |
|
sparse-checkout: |
|
||||||
.github/scripts
|
.github/scripts
|
||||||
|
|||||||
2
.github/workflows/backend_pr.yml
vendored
2
.github/workflows/backend_pr.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
|
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
|
|||||||
2
.github/workflows/base-images.yml
vendored
2
.github/workflows/base-images.yml
vendored
@@ -127,7 +127,7 @@ jobs:
|
|||||||
# the original l4t matrix entry which set skip-drivers: 'true'.
|
# the original l4t matrix entry which set skip-drivers: 'true'.
|
||||||
skip-drivers: 'true'
|
skip-drivers: 'true'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: false
|
submodules: false
|
||||||
- name: Free disk space
|
- name: Free disk space
|
||||||
|
|||||||
6
.github/workflows/build-test.yaml
vendored
6
.github/workflows/build-test.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -25,7 +25,7 @@ jobs:
|
|||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -47,7 +47,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ jobs:
|
|||||||
bump:
|
bump:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
|
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
|
|||||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -92,7 +92,7 @@ jobs:
|
|||||||
file: "backend/go/vibevoice-cpp/Makefile"
|
file: "backend/go/vibevoice-cpp/Makefile"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
@@ -128,7 +128,7 @@ jobs:
|
|||||||
if: github.repository == 'mudler/LocalAI'
|
if: github.repository == 'mudler/LocalAI'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
- name: Bump vLLM cu130 wheel pin 🔧
|
- name: Bump vLLM cu130 wheel pin 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -13,7 +13,7 @@ jobs:
|
|||||||
- repository: "mudler/LocalAI"
|
- repository: "mudler/LocalAI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
|||||||
if: github.repository == 'mudler/LocalAI'
|
if: github.repository == 'mudler/LocalAI'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
uses: ./.github/actions/configure-apt-mirror
|
uses: ./.github/actions/configure-apt-mirror
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
|||||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
|
|||||||
2
.github/workflows/gallery-agent.yaml
vendored
2
.github/workflows/gallery-agent.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -44,7 +44,7 @@ jobs:
|
|||||||
uses: docker/setup-buildx-action@master
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
|
|
||||||
- name: Cache Intel images
|
- name: Cache Intel images
|
||||||
uses: docker/build-push-action@v7
|
uses: docker/build-push-action@v7
|
||||||
|
|||||||
2
.github/workflows/gh-pages.yml
vendored
2
.github/workflows/gh-pages.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
|||||||
HUGO_VERSION: "0.146.3"
|
HUGO_VERSION: "0.146.3"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # needed for enableGitInfo
|
fetch-depth: 0 # needed for enableGitInfo
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|||||||
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@@ -80,7 +80,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
|
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
id: apt_mirror
|
id: apt_mirror
|
||||||
|
|||||||
2
.github/workflows/image_merge.yml
vendored
2
.github/workflows/image_merge.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
||||||
# script). Skips the rest of the source tree.
|
# script). Skips the rest of the source tree.
|
||||||
- name: Checkout (.github/scripts only)
|
- name: Checkout (.github/scripts only)
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
sparse-checkout: |
|
sparse-checkout: |
|
||||||
.github/scripts
|
.github/scripts
|
||||||
|
|||||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
|||||||
golangci-lint:
|
golangci-lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
# Full history so golangci-lint's new-from-merge-base can reach
|
# Full history so golangci-lint's new-from-merge-base can reach
|
||||||
# origin/master and compute the diff against it.
|
# origin/master and compute the diff against it.
|
||||||
|
|||||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -28,7 +28,7 @@ jobs:
|
|||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -46,7 +46,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
GO111MODULE: on
|
GO111MODULE: on
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Source
|
- name: Checkout Source
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
|
|||||||
86
.github/workflows/test-extra.yml
vendored
86
.github/workflows/test-extra.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
|||||||
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
@@ -67,7 +67,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -90,7 +90,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -113,7 +113,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -137,7 +137,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -158,7 +158,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -178,7 +178,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -240,7 +240,7 @@ jobs:
|
|||||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
# df -h
|
# df -h
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -265,7 +265,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -288,7 +288,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -309,7 +309,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -330,7 +330,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -351,7 +351,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -373,7 +373,7 @@ jobs:
|
|||||||
# timeout-minutes: 45
|
# timeout-minutes: 45
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -394,7 +394,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -415,7 +415,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -436,7 +436,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -462,7 +462,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -484,7 +484,7 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -513,7 +513,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -530,7 +530,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -552,7 +552,7 @@ jobs:
|
|||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -579,7 +579,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -604,7 +604,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -625,7 +625,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -645,7 +645,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -664,7 +664,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -681,7 +681,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -698,7 +698,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -741,7 +741,7 @@ jobs:
|
|||||||
# timeout-minutes: 90
|
# timeout-minutes: 90
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -783,7 +783,7 @@ jobs:
|
|||||||
# timeout-minutes: 90
|
# timeout-minutes: 90
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v6
|
# uses: actions/checkout@v7
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -808,7 +808,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -840,7 +840,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -876,7 +876,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -915,7 +915,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -952,7 +952,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -987,7 +987,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -1013,7 +1013,7 @@ jobs:
|
|||||||
timeout-minutes: 150
|
timeout-minutes: 150
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1042,7 +1042,7 @@ jobs:
|
|||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -1058,7 +1058,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1091,7 +1091,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1114,7 +1114,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1140,7 +1140,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|||||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Free disk space
|
- name: Free disk space
|
||||||
@@ -84,7 +84,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
|||||||
2
.github/workflows/tests-aio.yml
vendored
2
.github/workflows/tests-aio.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
|||||||
sudo rm -rfv build || true
|
sudo rm -rfv build || true
|
||||||
df -h
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|||||||
2
.github/workflows/tests-e2e.yml
vendored
2
.github/workflows/tests-e2e.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
go-version: ['1.25.x']
|
go-version: ['1.25.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
---
|
||||||
|
name: 'PII NER tier E2E (live GGUF, CPU)'
|
||||||
|
|
||||||
|
# Runs the real privacy-filter GGUF NER tier end-to-end on CPU — the gap the
|
||||||
|
# hermetic tests/e2e suite cannot cover (it only exercises the in-process
|
||||||
|
# pattern tier). Heavy (builds the C++ backend image + downloads a ~2.7 GB
|
||||||
|
# GGUF), so it is path-filtered on PRs and otherwise runs nightly / on demand.
|
||||||
|
#
|
||||||
|
# This drives the container-level harness (tests/e2e-backends) via
|
||||||
|
# `make test-extra-backend-privacy-filter`: it builds the privacy-filter image,
|
||||||
|
# downloads the model, loads it on CPU, and asserts byte-correct, UTF-8-aligned
|
||||||
|
# TokenClassify spans. The complementary HTTP-path specs in tests/e2e
|
||||||
|
# (e2e_pii_ner_test.go) Skip unless PII_NER_MODEL_GGUF is wired.
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 3 * * *'
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths:
|
||||||
|
- 'backend/cpp/privacy-filter/**'
|
||||||
|
- 'backend/Dockerfile.privacy-filter'
|
||||||
|
- 'core/services/routing/pii/**'
|
||||||
|
- 'core/services/routing/piidetector/**'
|
||||||
|
- 'core/backend/token_classify.go'
|
||||||
|
- 'core/http/endpoints/localai/pii.go'
|
||||||
|
- 'core/schema/pii.go'
|
||||||
|
- 'tests/e2e-backends/**'
|
||||||
|
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||||
|
- 'tests/e2e/e2e_suite_test.go'
|
||||||
|
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- 'backend/cpp/privacy-filter/**'
|
||||||
|
- 'backend/Dockerfile.privacy-filter'
|
||||||
|
- 'core/services/routing/pii/**'
|
||||||
|
- 'core/services/routing/piidetector/**'
|
||||||
|
- 'core/backend/token_classify.go'
|
||||||
|
- 'core/http/endpoints/localai/pii.go'
|
||||||
|
- 'core/schema/pii.go'
|
||||||
|
- 'tests/e2e-backends/**'
|
||||||
|
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||||
|
- 'tests/e2e/e2e_suite_test.go'
|
||||||
|
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-tests-pii-ner-e2e-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
tests-pii-ner-e2e:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.25.x']
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v7
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Free disk space
|
||||||
|
run: |
|
||||||
|
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL || true
|
||||||
|
sudo docker image prune --all --force || true
|
||||||
|
df -h
|
||||||
|
- name: Configure apt mirror on runner
|
||||||
|
uses: ./.github/actions/configure-apt-mirror
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
cache: false
|
||||||
|
- name: Proto Dependencies
|
||||||
|
run: |
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential
|
||||||
|
# Builds local-ai-backend:privacy-filter, downloads the GGUF, loads it on
|
||||||
|
# CPU and runs the token_classify capability spec (byte-offset contract).
|
||||||
|
- name: Run live PII NER backend E2E
|
||||||
|
run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
|
||||||
|
- name: Setup tmate session if tests fail
|
||||||
|
if: ${{ failure() }}
|
||||||
|
uses: mxschmitt/action-tmate@v3.23
|
||||||
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
2
.github/workflows/tests-ui-e2e.yml
vendored
2
.github/workflows/tests-ui-e2e.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v7
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v7
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
uses: ./.github/actions/configure-apt-mirror
|
uses: ./.github/actions/configure-apt-mirror
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -91,3 +91,6 @@ core/http/react-ui/test-results/
|
|||||||
|
|
||||||
# Local worktrees
|
# Local worktrees
|
||||||
.worktrees/
|
.worktrees/
|
||||||
|
|
||||||
|
# SDD / brainstorm scratch (agent-driven development)
|
||||||
|
.superpowers/
|
||||||
|
|||||||
10
Makefile
10
Makefile
@@ -690,6 +690,16 @@ test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
|
|||||||
BACKEND_TEST_CTX_SIZE=2048 \
|
BACKEND_TEST_CTX_SIZE=2048 \
|
||||||
$(MAKE) test-extra-backend
|
$(MAKE) test-extra-backend
|
||||||
|
|
||||||
|
## privacy-filter: the PII/NER token-classification backend. Exercises the
|
||||||
|
## TokenClassify RPC and asserts byte-correct, UTF-8-aligned span offsets
|
||||||
|
## against the openai-privacy-filter multilingual GGUF (CPU-runnable, ~50M
|
||||||
|
## active params). This is the live-backend coverage for the PII NER tier.
|
||||||
|
test-extra-backend-privacy-filter: docker-build-privacy-filter
|
||||||
|
BACKEND_IMAGE=local-ai-backend:privacy-filter \
|
||||||
|
BACKEND_TEST_MODEL_URL=https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf \
|
||||||
|
BACKEND_TEST_CAPS=health,load,token_classify \
|
||||||
|
$(MAKE) test-extra-backend
|
||||||
|
|
||||||
## vllm is resolved from a HuggingFace model id (no file download) and
|
## vllm is resolved from a HuggingFace model id (no file download) and
|
||||||
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
||||||
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62
|
LLAMA_VERSION?=73618f27a801c0b8614ceaf3547d3c2a99baae14
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# CrispASR version (release tag)
|
# CrispASR version (release tag)
|
||||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||||
CRISPASR_VERSION?=d745bda4386ae0f9d1d2f23fff8ec95d76428221
|
CRISPASR_VERSION?=63b57289255267edf66e43e33bc3911e04a2e92d
|
||||||
SO_TARGET?=libgocrispasr.so
|
SO_TARGET?=libgocrispasr.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
WHISPER_CPP_VERSION?=bae6bc02b1940bbfb87b6a0299c565e563b916d1
|
||||||
SO_TARGET?=libgowhisper.so
|
SO_TARGET?=libgowhisper.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,9 +10,15 @@ sentencepiece
|
|||||||
torch==2.7.1
|
torch==2.7.1
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu121
|
--extra-index-url https://download.pytorch.org/whl/cu121
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,9 +10,15 @@ sentencepiece
|
|||||||
torch
|
torch
|
||||||
ftfy
|
ftfy
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,9 +10,15 @@ sentencepiece
|
|||||||
torch
|
torch
|
||||||
ftfy
|
ftfy
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
|
|||||||
@@ -1,17 +1,23 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||||
torch==2.10.0+rocm7.0
|
torch==2.10.0+rocm7.0
|
||||||
torchvision==0.25.0+rocm7.0
|
torchvision==0.25.0+rocm7.0
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
@@ -3,18 +3,24 @@ torch
|
|||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
||||||
torch
|
torch
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
transformers
|
transformers==4.57.6
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
@@ -9,9 +9,15 @@ numpy<2
|
|||||||
sentencepiece
|
sentencepiece
|
||||||
torchvision
|
torchvision
|
||||||
ftfy
|
ftfy
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||||
torch
|
torch
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
transformers
|
transformers==4.57.6
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
@@ -10,9 +10,15 @@ sentencepiece
|
|||||||
torchvision
|
torchvision
|
||||||
ftfy
|
ftfy
|
||||||
chardet
|
chardet
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
|
|||||||
@@ -1,16 +1,22 @@
|
|||||||
torch==2.7.1
|
torch==2.7.1
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
git+https://github.com/huggingface/diffusers
|
diffusers==0.38.0
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers==4.57.6
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# TODO: re-add compel once it supports transformers >= 5.
|
# diffusers and transformers are pinned together on purpose. transformers v5
|
||||||
# Tracking: https://github.com/damian0815/compel/pull/129
|
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
||||||
# https://github.com/damian0815/compel/issues/128
|
# breaks single-file Stable Diffusion loading on every released diffusers
|
||||||
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
||||||
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
# main via git froze whichever broken pair existed at image-build time. Pin the
|
||||||
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
# last known-good released pair so builds are reproducible and can't drift into
|
||||||
|
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
||||||
|
#
|
||||||
|
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
||||||
|
# with this pin and previously forced pip into multi-hour resolver backtracking
|
||||||
|
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
||||||
|
# the import succeeding, so dropping it here is safe.
|
||||||
@@ -341,11 +341,9 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
|||||||
}
|
}
|
||||||
appCfg := a.ApplicationConfig()
|
appCfg := a.ApplicationConfig()
|
||||||
|
|
||||||
if cfg.PII.Enabled != nil {
|
// PIIIsEnabled already encodes "explicit pii.enabled wins, else backend
|
||||||
enabled = *cfg.PII.Enabled
|
// default (cloud-proxy)" — the single source of that rule.
|
||||||
} else {
|
enabled = cfg.PIIIsEnabled()
|
||||||
enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
|
|
||||||
}
|
|
||||||
if !enabled {
|
if !enabled {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
@@ -354,7 +352,7 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
|||||||
if len(detectors) == 0 {
|
if len(detectors) == 0 {
|
||||||
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
||||||
}
|
}
|
||||||
return enabled, detectors
|
return true, detectors // enabled is necessarily true past the !enabled guard
|
||||||
}
|
}
|
||||||
|
|
||||||
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
||||||
|
|||||||
@@ -357,6 +357,15 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
|||||||
Pressure: pressure,
|
Pressure: pressure,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Wire staging-progress broadcasting so file-staging shows up on every
|
||||||
|
// replica, not just the one performing the transfer. Without this, a
|
||||||
|
// /api/operations poll that round-robins onto a peer sees no staging row and
|
||||||
|
// the progress flickers. The origin publishes; peers mirror via the wildcard.
|
||||||
|
router.StagingTracker().SetPublisher(natsClient)
|
||||||
|
if _, err := router.StagingTracker().SubscribeBroadcasts(natsClient); err != nil {
|
||||||
|
xlog.Warn("Failed to subscribe to staging progress broadcasts", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
||||||
// RegistrationToken feed the state-reconciliation passes: pending op
|
// RegistrationToken feed the state-reconciliation passes: pending op
|
||||||
// drain uses the adapter, and model health probes use the token to auth
|
// drain uses the adapter, and model health probes use the token to auth
|
||||||
|
|||||||
@@ -537,6 +537,36 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
|||||||
Component: "number",
|
Component: "number",
|
||||||
Order: 79,
|
Order: 79,
|
||||||
},
|
},
|
||||||
|
"pipeline.compaction.enabled": {
|
||||||
|
Section: "pipeline",
|
||||||
|
Label: "Compaction Enabled",
|
||||||
|
Description: "Fold conversation items that age out of the live window (Max History Items) into a rolling summary instead of dropping them, so long realtime sessions stay cheap without losing earlier context. Off by default.",
|
||||||
|
Component: "toggle",
|
||||||
|
Order: 80,
|
||||||
|
},
|
||||||
|
"pipeline.compaction.trigger_items": {
|
||||||
|
Section: "pipeline",
|
||||||
|
Label: "Compaction Trigger Items",
|
||||||
|
Description: "High-water mark: once the live conversation exceeds this many items, the overflow above Max History Items is summarized and evicted. Must be greater than Max History Items; defaults to twice it. The gap controls how often summarization runs.",
|
||||||
|
Component: "number",
|
||||||
|
Order: 81,
|
||||||
|
},
|
||||||
|
"pipeline.compaction.summary_model": {
|
||||||
|
Section: "pipeline",
|
||||||
|
Label: "Compaction Summary Model",
|
||||||
|
Description: "Optional smaller/cheaper model used to produce the rolling summary. Empty reuses the pipeline's own LLM. On CPU, a tiny model here keeps compaction from competing with the conversation LLM.",
|
||||||
|
Component: "input",
|
||||||
|
Advanced: true,
|
||||||
|
Order: 82,
|
||||||
|
},
|
||||||
|
"pipeline.compaction.max_summary_tokens": {
|
||||||
|
Section: "pipeline",
|
||||||
|
Label: "Compaction Max Summary Tokens",
|
||||||
|
Description: "Advisory cap on the rolling summary length (fed to the summarizer prompt). Defaults to 512.",
|
||||||
|
Component: "number",
|
||||||
|
Advanced: true,
|
||||||
|
Order: 83,
|
||||||
|
},
|
||||||
|
|
||||||
// --- Functions ---
|
// --- Functions ---
|
||||||
"function.grammar.parallel_calls": {
|
"function.grammar.parallel_calls": {
|
||||||
|
|||||||
@@ -641,11 +641,32 @@ type Pipeline struct {
|
|||||||
// context fills.
|
// context fills.
|
||||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||||
|
|
||||||
|
// Compaction folds conversation items that age out of the live window
|
||||||
|
// (max_history_items) into a rolling summary instead of dropping them, so
|
||||||
|
// long realtime sessions stay cheap without losing earlier context. Nil
|
||||||
|
// (block absent) means disabled, preserving existing behavior.
|
||||||
|
Compaction *PipelineCompaction `yaml:"compaction,omitempty" json:"compaction,omitempty"`
|
||||||
|
|
||||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||||
// (block absent) means no gate, preserving existing behavior.
|
// (block absent) means no gate, preserving existing behavior.
|
||||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PipelineCompaction configures summarize-then-drop for a realtime pipeline.
|
||||||
|
type PipelineCompaction struct {
|
||||||
|
// Enabled turns summarize-then-drop on. Default false.
|
||||||
|
Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
|
||||||
|
// TriggerItems is the high-water mark: once live items exceed it, overflow
|
||||||
|
// above max_history_items is summarized and evicted. Must exceed
|
||||||
|
// max_history_items; clamped up if not. Default: 2x max_history_items.
|
||||||
|
TriggerItems int `yaml:"trigger_items,omitempty" json:"trigger_items,omitempty"`
|
||||||
|
// SummaryModel optionally names a smaller/cheaper model for the summary
|
||||||
|
// call. Empty uses the pipeline's own LLM.
|
||||||
|
SummaryModel string `yaml:"summary_model,omitempty" json:"summary_model,omitempty"`
|
||||||
|
// MaxSummaryTokens advises the summary length (fed to the prompt). Default 512.
|
||||||
|
MaxSummaryTokens int `yaml:"max_summary_tokens,omitempty" json:"max_summary_tokens,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
||||||
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
||||||
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
)
|
)
|
||||||
|
|
||||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||||
@@ -33,6 +34,35 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
|||||||
return settings, nil
|
return settings, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
||||||
|
// receiver, leaving the receiver's value untouched wherever overlay left a
|
||||||
|
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
||||||
|
// can be told apart from "absent" (see the type doc), which makes this a
|
||||||
|
// faithful partial update: a caller that submits only the field it owns
|
||||||
|
// changes exactly that field and never clobbers unrelated settings.
|
||||||
|
//
|
||||||
|
// This is the read-modify-write contract the persistence helpers exist for.
|
||||||
|
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
||||||
|
// on top, and writes the result — so a focused admin page that POSTs only its
|
||||||
|
// own field (the Middleware page sends only mitm_listen; the detector table
|
||||||
|
// only pii_default_detectors) no longer nulls every other setting.
|
||||||
|
//
|
||||||
|
// Reflection keeps the merge total over the struct: a field added to
|
||||||
|
// RuntimeSettings later is merged automatically, so the persistence path can
|
||||||
|
// never silently drop a new setting the way a hand-maintained field list
|
||||||
|
// would. Non-pointer fields (none today) are skipped — they cannot express
|
||||||
|
// "absent", so the receiver wins.
|
||||||
|
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
||||||
|
dst := reflect.ValueOf(s).Elem()
|
||||||
|
src := reflect.ValueOf(overlay)
|
||||||
|
for i := 0; i < src.NumField(); i++ {
|
||||||
|
f := src.Field(i)
|
||||||
|
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
||||||
|
dst.Field(i).Set(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||||
// runtime_settings.json with restricted permissions (it may carry API
|
// runtime_settings.json with restricted permissions (it may carry API
|
||||||
// keys and P2P tokens).
|
// keys and P2P tokens).
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func strPtr(s string) *string { return &s }
|
func strPtr(s string) *string { return &s }
|
||||||
|
func boolPtr(b bool) *bool { return &b }
|
||||||
|
|
||||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||||
var (
|
var (
|
||||||
@@ -51,6 +52,47 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
||||||
|
// relies on: a focused admin page POSTs only the field it owns, and the
|
||||||
|
// handler reads the on-disk settings and overlays the request on top.
|
||||||
|
// Without it, the body would be written verbatim and every field the
|
||||||
|
// caller omitted would be nulled (the reported regression: changing
|
||||||
|
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
||||||
|
Describe("MergeNonNil partial update", func() {
|
||||||
|
It("overlays set fields and preserves unset ones", func() {
|
||||||
|
base := config.RuntimeSettings{
|
||||||
|
MITMListen: strPtr(":9000"),
|
||||||
|
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
||||||
|
WatchdogIdleEnabled: boolPtr(true),
|
||||||
|
ApiKeys: &[]string{"persisted-key"},
|
||||||
|
PIIDefaultDetectors: &[]string{"det-a"},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
||||||
|
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
||||||
|
base.MergeNonNil(overlay)
|
||||||
|
|
||||||
|
Expect(base.MITMListen).ToNot(BeNil())
|
||||||
|
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
||||||
|
// Everything the overlay left unset must survive untouched.
|
||||||
|
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
||||||
|
Expect(*base.Galleries).To(HaveLen(1))
|
||||||
|
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
||||||
|
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
||||||
|
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
||||||
|
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
||||||
|
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
||||||
|
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("lets an explicit empty slice clear a field", func() {
|
||||||
|
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
||||||
|
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
||||||
|
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
||||||
|
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||||
// MITM listener address must survive a write/read round trip so the
|
// MITM listener address must survive a write/read round trip so the
|
||||||
// next process restart can bring the listener back up. (Intercept
|
// next process restart can bring the listener back up. (Intercept
|
||||||
|
|||||||
@@ -385,6 +385,23 @@ func GetNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListAllNodeModelsEndpoint returns all loaded models across all healthy nodes.
|
||||||
|
// @Summary List all loaded models cluster-wide
|
||||||
|
// @Tags Nodes
|
||||||
|
// @Success 200 {array} nodes.NodeModel
|
||||||
|
// @Router /api/nodes/models [get]
|
||||||
|
func ListAllNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||||
|
return func(c echo.Context) error {
|
||||||
|
ctx := c.Request().Context()
|
||||||
|
models, err := registry.ListAllLoadedModels(ctx)
|
||||||
|
if err != nil {
|
||||||
|
xlog.Error("Failed to list all node models", "error", err)
|
||||||
|
return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to list node models"))
|
||||||
|
}
|
||||||
|
return c.JSON(http.StatusOK, models)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// DrainNodeEndpoint sets a node to draining status (no new requests).
|
// DrainNodeEndpoint sets a node to draining status (no new requests).
|
||||||
func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
|
|||||||
@@ -407,4 +407,44 @@ var _ = Describe("Node HTTP handlers", func() {
|
|||||||
Expect(names).To(ConsistOf("alpha", "beta"))
|
Expect(names).To(ConsistOf("alpha", "beta"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("ListAllNodeModelsEndpoint", func() {
|
||||||
|
It("returns an empty list when no models are loaded", func() {
|
||||||
|
e := echo.New()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
c := e.NewContext(req, rec)
|
||||||
|
|
||||||
|
handler := ListAllNodeModelsEndpoint(registry)
|
||||||
|
Expect(handler(c)).To(Succeed())
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
|
||||||
|
var list []nodes.NodeModel
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns loaded models across healthy nodes", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
Expect(registry.Register(ctx, &nodes.BackendNode{
|
||||||
|
ID: "n1", Name: "alpha", Address: "10.0.0.1:50051", Status: nodes.StatusHealthy,
|
||||||
|
}, true)).To(Succeed())
|
||||||
|
Expect(registry.SetNodeModel(ctx, "n1", "llama-3.3", 0, "loaded", "10.0.0.1:50051", 0)).To(Succeed())
|
||||||
|
|
||||||
|
e := echo.New()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
c := e.NewContext(req, rec)
|
||||||
|
|
||||||
|
handler := ListAllNodeModelsEndpoint(registry)
|
||||||
|
Expect(handler(c)).To(Succeed())
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
|
||||||
|
var list []nodes.NodeModel
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(HaveLen(1))
|
||||||
|
Expect(list[0].ModelName).To(Equal("llama-3.3"))
|
||||||
|
Expect(list[0].NodeID).To(Equal("n1"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -4,8 +4,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
@@ -110,6 +108,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read whatever is already persisted: it is both the source of truth
|
||||||
|
// for branding asset filenames (below) and the base we merge this
|
||||||
|
// request onto before writing. A read failure must not let a Save
|
||||||
|
// silently discard the existing settings — surface it instead.
|
||||||
|
persisted, err := appConfig.ReadPersistedSettings()
|
||||||
|
if err != nil {
|
||||||
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||||
|
Success: false,
|
||||||
|
Error: "Failed to read existing settings: " + err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Branding asset filenames are owned exclusively by
|
// Branding asset filenames are owned exclusively by
|
||||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||||
// round-trips them via GET /api/settings, but its local state is stale
|
// round-trips them via GET /api/settings, but its local state is stale
|
||||||
@@ -118,11 +128,9 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
// at page open. Replace whatever the body sent for these three fields
|
// at page open. Replace whatever the body sent for these three fields
|
||||||
// with the values currently on disk so /api/settings can never
|
// with the values currently on disk so /api/settings can never
|
||||||
// regress them.
|
// regress them.
|
||||||
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
settings.LogoFile = persisted.LogoFile
|
||||||
settings.LogoFile = existing.LogoFile
|
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
||||||
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
settings.FaviconFile = persisted.FaviconFile
|
||||||
settings.FaviconFile = existing.FaviconFile
|
|
||||||
}
|
|
||||||
|
|
||||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||||
// merged env+runtime list. When the user clicks Save, the same merged
|
// merged env+runtime list. When the user clicks Save, the same merged
|
||||||
@@ -145,16 +153,17 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
settings.ApiKeys = &runtimeOnly
|
settings.ApiKeys = &runtimeOnly
|
||||||
}
|
}
|
||||||
|
|
||||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
// Persist as a partial update: overlay only the fields this request set
|
||||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
// onto the settings already on disk. Focused admin pages POST just the
|
||||||
if err != nil {
|
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
||||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
// detector table only pii_default_detectors), so writing the request
|
||||||
Success: false,
|
// body verbatim would null every unrelated setting (the no-omitempty
|
||||||
Error: "Failed to marshal settings: " + err.Error(),
|
// api_keys / pii_default_detectors fields even round-trip as JSON
|
||||||
})
|
// null). The full Settings page still round-trips every field, so its
|
||||||
}
|
// Save is unchanged.
|
||||||
|
toPersist := persisted
|
||||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
toPersist.MergeNonNil(settings)
|
||||||
|
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||||
Success: false,
|
Success: false,
|
||||||
Error: "Failed to write settings file: " + err.Error(),
|
Error: "Failed to write settings file: " + err.Error(),
|
||||||
|
|||||||
@@ -52,6 +52,10 @@ var _ = Describe("Settings endpoints", func() {
|
|||||||
// Settings are persisted here; set after construction since there's no
|
// Settings are persisted here; set after construction since there's no
|
||||||
// dedicated AppOption for it.
|
// dedicated AppOption for it.
|
||||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||||
|
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
||||||
|
// mitm_listen, which starts the listener and writes a CA; without this
|
||||||
|
// it defaults to ./mitm-ca and litters the package source tree.
|
||||||
|
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
||||||
|
|
||||||
e = echo.New()
|
e = echo.New()
|
||||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||||
@@ -109,6 +113,39 @@ var _ = Describe("Settings endpoints", func() {
|
|||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
||||||
|
// the one field it owns — mitm_listen. The old handler wrote the request
|
||||||
|
// body verbatim, so every other persisted setting was dropped (and
|
||||||
|
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
||||||
|
// null). A partial POST must now merge onto what is already on disk.
|
||||||
|
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
||||||
|
// First save establishes a fuller settings file (as the full Settings
|
||||||
|
// page would): galleries, an API key, and the MITM listener. The
|
||||||
|
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
||||||
|
// ephemeral free port rather than a fixed one that may be in use.
|
||||||
|
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||||
|
|
||||||
|
// The Middleware proxy tab then changes only the listen address — the
|
||||||
|
// exact partial body that nulled everything else before the fix.
|
||||||
|
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||||
|
|
||||||
|
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
var ondisk config.RuntimeSettings
|
||||||
|
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
||||||
|
|
||||||
|
Expect(ondisk.MITMListen).ToNot(BeNil())
|
||||||
|
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
||||||
|
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
||||||
|
Expect(*ondisk.Galleries).To(HaveLen(1))
|
||||||
|
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
||||||
|
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
||||||
|
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
||||||
|
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||||
|
})
|
||||||
|
|
||||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||||
// React master toggle must start the live watchdog immediately, without a
|
// React master toggle must start the live watchdog immediately, without a
|
||||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -134,6 +135,18 @@ type Session struct {
|
|||||||
// pairs are kept together so we never feed an orphaned tool result.
|
// pairs are kept together so we never feed an orphaned tool result.
|
||||||
MaxHistoryItems int
|
MaxHistoryItems int
|
||||||
|
|
||||||
|
// Compaction settings resolved from pipeline.compaction (see resolveCompaction).
|
||||||
|
CompactionEnabled bool
|
||||||
|
CompactionTrigger int
|
||||||
|
SummaryModel string
|
||||||
|
MaxSummaryTokens int
|
||||||
|
|
||||||
|
// summarizerFactory lazily builds the model used for compaction summaries
|
||||||
|
// when summary_model is configured; nil means reuse the pipeline LLM.
|
||||||
|
summarizerFactory func() (Model, error)
|
||||||
|
summarizerOnce sync.Once
|
||||||
|
summarizerCached Model
|
||||||
|
|
||||||
// AssistantExecutor is non-nil when the session opted into the in-process
|
// AssistantExecutor is non-nil when the session opted into the in-process
|
||||||
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
||||||
// executor's catalog are run inproc and their output is fed back to the
|
// executor's catalog are run inproc and their output is fed back to the
|
||||||
@@ -241,6 +254,12 @@ type Conversation struct {
|
|||||||
ID string
|
ID string
|
||||||
Items []*types.MessageItemUnion
|
Items []*types.MessageItemUnion
|
||||||
Lock sync.Mutex
|
Lock sync.Mutex
|
||||||
|
// Memory is the rolling summary of items already evicted by compaction. It
|
||||||
|
// is kept out of Items (so trimRealtimeItems never drops it) and rendered
|
||||||
|
// as a system message right after the session instructions.
|
||||||
|
Memory string
|
||||||
|
// compacting ensures at most one background compaction runs per conversation.
|
||||||
|
compacting atomic.Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Conversation) ToServer() types.Conversation {
|
func (c *Conversation) ToServer() types.Conversation {
|
||||||
@@ -540,13 +559,12 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
||||||
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
||||||
}
|
}
|
||||||
|
session.CompactionEnabled, session.CompactionTrigger, session.MaxSummaryTokens, session.SummaryModel = resolveCompaction(cfg, session.MaxHistoryItems)
|
||||||
|
|
||||||
// Create a default conversation
|
// Create a default conversation
|
||||||
conversationID := generateConversationID()
|
conversationID := generateConversationID()
|
||||||
conversation := &Conversation{
|
conversation := &Conversation{
|
||||||
ID: conversationID,
|
ID: conversationID,
|
||||||
// TODO: We need to truncate the conversation items when a new item is added and we have run out of space. There are multiple places where items
|
|
||||||
// can be added so we could use a datastructure here that enforces truncation upon addition
|
|
||||||
Items: []*types.MessageItemUnion{},
|
Items: []*types.MessageItemUnion{},
|
||||||
}
|
}
|
||||||
session.Conversations[conversationID] = conversation
|
session.Conversations[conversationID] = conversation
|
||||||
@@ -577,6 +595,18 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
}
|
}
|
||||||
session.ModelInterface = m
|
session.ModelInterface = m
|
||||||
|
|
||||||
|
if session.SummaryModel != "" {
|
||||||
|
summaryModelName := session.SummaryModel
|
||||||
|
sid := sessionID
|
||||||
|
session.summarizerFactory = func() (Model, error) {
|
||||||
|
summaryCfg, lerr := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(summaryModelName, application.ApplicationConfig())
|
||||||
|
if lerr != nil {
|
||||||
|
return nil, fmt.Errorf("load summary model config %q: %w", summaryModelName, lerr)
|
||||||
|
}
|
||||||
|
return newModel(&summaryCfg.Pipeline, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), evaluator, buildRealtimeRoutingContext(application, sid))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if cfg.Pipeline.VoiceGateEnabled() {
|
if cfg.Pipeline.VoiceGateEnabled() {
|
||||||
gate, gerr := newVoiceGate(
|
gate, gerr := newVoiceGate(
|
||||||
*cfg.Pipeline.VoiceRecognition,
|
*cfg.Pipeline.VoiceRecognition,
|
||||||
@@ -807,6 +837,15 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
commitUtterance(respCtx, allAudio, session, conversation, t)
|
commitUtterance(respCtx, allAudio, session, conversation, t)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
case types.InputAudioBufferClearEvent:
|
||||||
|
xlog.Debug("recv", "message", string(msg))
|
||||||
|
// Discard a partially-captured utterance so the client can restart
|
||||||
|
// input cleanly without the stale buffer leaking into the next commit.
|
||||||
|
clearInputAudio(session)
|
||||||
|
sendEvent(t, types.InputAudioBufferClearedEvent{
|
||||||
|
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||||
|
})
|
||||||
|
|
||||||
case types.ConversationItemCreateEvent:
|
case types.ConversationItemCreateEvent:
|
||||||
xlog.Debug("recv", "message", string(msg))
|
xlog.Debug("recv", "message", string(msg))
|
||||||
// Add the item to the conversation
|
// Add the item to the conversation
|
||||||
@@ -841,7 +880,39 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
})
|
})
|
||||||
|
|
||||||
case types.ConversationItemDeleteEvent:
|
case types.ConversationItemDeleteEvent:
|
||||||
sendError(t, "not_implemented", "Deleting items not implemented", "", "event_TODO")
|
xlog.Debug("recv", "message", string(msg))
|
||||||
|
if e.ItemID == "" {
|
||||||
|
sendError(t, "invalid_item_id", "Need item_id, but none specified", "", "event_TODO")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
conversation.Lock.Lock()
|
||||||
|
updated, ok := deleteItem(conversation.Items, e.ItemID)
|
||||||
|
conversation.Items = updated
|
||||||
|
conversation.Lock.Unlock()
|
||||||
|
if !ok {
|
||||||
|
sendError(t, "invalid_item_id", "Item to delete not found", "", "event_TODO")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sendEvent(t, types.ConversationItemDeletedEvent{
|
||||||
|
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||||
|
ItemID: e.ItemID,
|
||||||
|
})
|
||||||
|
|
||||||
|
case types.ConversationItemTruncateEvent:
|
||||||
|
xlog.Debug("recv", "message", string(msg))
|
||||||
|
conversation.Lock.Lock()
|
||||||
|
ok := truncateAssistantText(conversation.Items, e.ItemID, e.ContentIndex)
|
||||||
|
conversation.Lock.Unlock()
|
||||||
|
if !ok {
|
||||||
|
sendError(t, "invalid_item_id", "Item to truncate not found", "", "event_TODO")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sendEvent(t, types.ConversationItemTruncatedEvent{
|
||||||
|
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
||||||
|
ItemID: e.ItemID,
|
||||||
|
ContentIndex: e.ContentIndex,
|
||||||
|
AudioEndMs: e.AudioEndMs,
|
||||||
|
})
|
||||||
|
|
||||||
case types.ConversationItemRetrieveEvent:
|
case types.ConversationItemRetrieveEvent:
|
||||||
xlog.Debug("recv", "message", string(msg))
|
xlog.Debug("recv", "message", string(msg))
|
||||||
@@ -854,21 +925,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
conversation.Lock.Lock()
|
conversation.Lock.Lock()
|
||||||
var retrievedItem types.MessageItemUnion
|
var retrievedItem types.MessageItemUnion
|
||||||
for _, item := range conversation.Items {
|
for _, item := range conversation.Items {
|
||||||
// We need to check ID in the union
|
if itemID(item) == e.ItemID {
|
||||||
var id string
|
|
||||||
if item.System != nil {
|
|
||||||
id = item.System.ID
|
|
||||||
} else if item.User != nil {
|
|
||||||
id = item.User.ID
|
|
||||||
} else if item.Assistant != nil {
|
|
||||||
id = item.Assistant.ID
|
|
||||||
} else if item.FunctionCall != nil {
|
|
||||||
id = item.FunctionCall.ID
|
|
||||||
} else if item.FunctionCallOutput != nil {
|
|
||||||
id = item.FunctionCallOutput.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == e.ItemID {
|
|
||||||
retrievedItem = *item
|
retrievedItem = *item
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -1666,6 +1723,9 @@ const maxAssistantToolTurns = 10
|
|||||||
|
|
||||||
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
||||||
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
||||||
|
// Fold aged-out turns into the rolling memory off the critical path; the
|
||||||
|
// next turn reaps the smaller buffer.
|
||||||
|
session.maybeCompact(conv)
|
||||||
}
|
}
|
||||||
|
|
||||||
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
||||||
@@ -1721,6 +1781,7 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
|
|||||||
var lastUserSpeaker *types.Speaker
|
var lastUserSpeaker *types.Speaker
|
||||||
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
||||||
conv.Lock.Lock()
|
conv.Lock.Lock()
|
||||||
|
conversationHistory = withMemory(conversationHistory, conv.Memory)
|
||||||
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if item.User != nil {
|
if item.User != nil {
|
||||||
|
|||||||
326
core/http/endpoints/openai/realtime_compaction.go
Normal file
326
core/http/endpoints/openai/realtime_compaction.go
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
package openai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMaxSummaryTokens = 512
|
||||||
|
memoryPrefix = "Summary of earlier conversation:\n"
|
||||||
|
// compactionTimeout bounds the summarizer call so a stuck model can't pin the
|
||||||
|
// compacting flag (and thus block all further compaction) forever.
|
||||||
|
compactionTimeout = 60 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
// withMemory inserts the rolling summary as a system message after the existing
|
||||||
|
// (instructions) history. No-op when memory is empty.
|
||||||
|
func withMemory(history schema.Messages, memory string) schema.Messages {
|
||||||
|
if memory == "" {
|
||||||
|
return history
|
||||||
|
}
|
||||||
|
content := memoryPrefix + memory
|
||||||
|
return append(history, schema.Message{
|
||||||
|
Role: string(types.MessageRoleSystem),
|
||||||
|
StringContent: content,
|
||||||
|
Content: content,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// renderItemsTranscript renders conversation items as a plain "role: text"
|
||||||
|
// transcript for summarization. Non-text items (bare tool calls) are labelled
|
||||||
|
// so the summarizer keeps track of actions taken.
|
||||||
|
func renderItemsTranscript(items []*types.MessageItemUnion) string {
|
||||||
|
var b strings.Builder
|
||||||
|
for _, item := range items {
|
||||||
|
switch {
|
||||||
|
case item.User != nil:
|
||||||
|
b.WriteString("user: ")
|
||||||
|
for _, c := range item.User.Content {
|
||||||
|
if c.Text != "" {
|
||||||
|
b.WriteString(c.Text)
|
||||||
|
}
|
||||||
|
if c.Transcript != "" {
|
||||||
|
b.WriteString(c.Transcript)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
case item.Assistant != nil:
|
||||||
|
b.WriteString("assistant: ")
|
||||||
|
// Realtime assistant *audio* turns store the spoken words in
|
||||||
|
// .Transcript (not .Text), so emit both or spoken turns are dropped.
|
||||||
|
for _, c := range item.Assistant.Content {
|
||||||
|
if c.Text != "" {
|
||||||
|
b.WriteString(c.Text)
|
||||||
|
}
|
||||||
|
if c.Transcript != "" {
|
||||||
|
b.WriteString(c.Transcript)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
case item.FunctionCall != nil:
|
||||||
|
b.WriteString(fmt.Sprintf("assistant called tool %s(%s)\n", item.FunctionCall.Name, item.FunctionCall.Arguments))
|
||||||
|
case item.FunctionCallOutput != nil:
|
||||||
|
b.WriteString(fmt.Sprintf("tool result: %s\n", item.FunctionCallOutput.Output))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(b.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildSummaryMessages builds the chat messages for the summarizer LLM: a system
|
||||||
|
// instruction plus prior memory and the new transcript to fold in. maxTokens is
|
||||||
|
// advisory (fed to the prompt; not hard-enforced in v1).
|
||||||
|
func buildSummaryMessages(priorMemory, transcript string, maxTokens int) schema.Messages {
|
||||||
|
system := fmt.Sprintf("You maintain a running memory of a live voice conversation. "+
|
||||||
|
"Merge the prior memory with the new exchanges into an updated memory. "+
|
||||||
|
"Keep names, decisions, facts, preferences, and open threads. Be concise "+
|
||||||
|
"(under ~%d tokens). Output only the updated memory, with no reasoning or tags.", maxTokens)
|
||||||
|
var user strings.Builder
|
||||||
|
if priorMemory != "" {
|
||||||
|
user.WriteString("Prior memory:\n")
|
||||||
|
user.WriteString(priorMemory)
|
||||||
|
user.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
user.WriteString("New exchanges to fold in:\n")
|
||||||
|
user.WriteString(transcript)
|
||||||
|
return schema.Messages{
|
||||||
|
{Role: string(types.MessageRoleSystem), StringContent: system, Content: system},
|
||||||
|
{Role: string(types.MessageRoleUser), StringContent: user.String(), Content: user.String()},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// clearInputAudio resets the session's pending input audio buffer (the raw
|
||||||
|
// PCM and any buffered Opus frames). Used by the input_audio_buffer.clear
|
||||||
|
// realtime event so a client can discard a partially-captured utterance.
|
||||||
|
func clearInputAudio(s *Session) {
|
||||||
|
s.AudioBufferLock.Lock()
|
||||||
|
s.InputAudioBuffer = nil
|
||||||
|
s.AudioBufferLock.Unlock()
|
||||||
|
s.OpusFramesLock.Lock()
|
||||||
|
s.OpusFrames = nil
|
||||||
|
s.OpusFramesLock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// itemID extracts the id from any MessageItemUnion variant ("" if none).
|
||||||
|
func itemID(item *types.MessageItemUnion) string {
|
||||||
|
switch {
|
||||||
|
case item == nil:
|
||||||
|
return ""
|
||||||
|
case item.System != nil:
|
||||||
|
return item.System.ID
|
||||||
|
case item.User != nil:
|
||||||
|
return item.User.ID
|
||||||
|
case item.Assistant != nil:
|
||||||
|
return item.Assistant.ID
|
||||||
|
case item.FunctionCall != nil:
|
||||||
|
return item.FunctionCall.ID
|
||||||
|
case item.FunctionCallOutput != nil:
|
||||||
|
return item.FunctionCallOutput.ID
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteItem removes the item with id from items, returning the new slice and
|
||||||
|
// whether it was found.
|
||||||
|
func deleteItem(items []*types.MessageItemUnion, id string) ([]*types.MessageItemUnion, bool) {
|
||||||
|
for i, item := range items {
|
||||||
|
if itemID(item) == id {
|
||||||
|
return append(items[:i:i], items[i+1:]...), true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return items, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// truncateAssistantText clears the text of the assistant item's content part at
|
||||||
|
// contentIndex. Minimal truncate: used to discard an interrupted/barge-in
|
||||||
|
// response tail. Both .Text and .Transcript are cleared because realtime audio
|
||||||
|
// turns store the spoken words in .Transcript (clearing only .Text would no-op).
|
||||||
|
func truncateAssistantText(items []*types.MessageItemUnion, id string, contentIndex int) bool {
|
||||||
|
for _, item := range items {
|
||||||
|
if itemID(item) != id || item.Assistant == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if contentIndex >= 0 && contentIndex < len(item.Assistant.Content) {
|
||||||
|
item.Assistant.Content[contentIndex].Text = ""
|
||||||
|
item.Assistant.Content[contentIndex].Transcript = ""
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// compactionCut returns the index splitting items into overflow (items[:cut],
|
||||||
|
// to be summarized+evicted) and the kept live tail (items[cut:]), keeping the
|
||||||
|
// last `keep` items. It mirrors trimRealtimeItems' pair-safety: the cut is
|
||||||
|
// pulled left so a function_call and its function_call_output are never split
|
||||||
|
// across the boundary (the whole pair lands in the kept tail). Returns 0 when
|
||||||
|
// there is nothing to cut.
|
||||||
|
func compactionCut(items []*types.MessageItemUnion, keep int) int {
|
||||||
|
// keep <= 0 means no live-window cap (the "unlimited history" sentinel, as
|
||||||
|
// in trimRealtimeItems): there is nothing to evict, so cut nothing. This
|
||||||
|
// also avoids indexing items[len(items)] in the pair-safety loop below.
|
||||||
|
if keep <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
cut := len(items) - keep
|
||||||
|
if cut <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
|
||||||
|
cut--
|
||||||
|
}
|
||||||
|
return cut
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveCompaction reads the pipeline.compaction block, applying defaults and
|
||||||
|
// the trigger>max_history invariant. maxHistory is the already-resolved live
|
||||||
|
// window size. Returns enabled=false (and zero values) when compaction is off.
|
||||||
|
func resolveCompaction(cfg *config.ModelConfig, maxHistory int) (enabled bool, trigger, maxSummaryTokens int, summaryModel string) {
|
||||||
|
if cfg == nil || cfg.Pipeline.Compaction == nil || !cfg.Pipeline.Compaction.Enabled {
|
||||||
|
return false, 0, 0, ""
|
||||||
|
}
|
||||||
|
c := cfg.Pipeline.Compaction
|
||||||
|
trigger = c.TriggerItems
|
||||||
|
if trigger <= 0 {
|
||||||
|
trigger = maxHistory * 2
|
||||||
|
}
|
||||||
|
if trigger <= maxHistory {
|
||||||
|
trigger = maxHistory + 1
|
||||||
|
}
|
||||||
|
maxSummaryTokens = c.MaxSummaryTokens
|
||||||
|
if maxSummaryTokens <= 0 {
|
||||||
|
maxSummaryTokens = defaultMaxSummaryTokens
|
||||||
|
}
|
||||||
|
return true, trigger, maxSummaryTokens, c.SummaryModel
|
||||||
|
}
|
||||||
|
|
||||||
|
// prefixMatches reports whether items begins with the same ids, in order, as
|
||||||
|
// snapshot — i.e. the overflow we summarized is still at the head (no concurrent
|
||||||
|
// client delete reshuffled it).
|
||||||
|
func prefixMatches(items, snapshot []*types.MessageItemUnion) bool {
|
||||||
|
if len(items) < len(snapshot) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range snapshot {
|
||||||
|
if itemID(items[i]) != itemID(snapshot[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// compact folds overflow items into conv.Memory and evicts them. It never holds
|
||||||
|
// conv.Lock across the summarizer call: snapshot under lock, summarize unlocked,
|
||||||
|
// commit under lock (re-validating the head is unchanged). On any error it
|
||||||
|
// leaves the conversation untouched — items are never dropped without a summary.
|
||||||
|
func (s *Session) compact(conv *Conversation, model Model) {
|
||||||
|
if model == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Snapshot.
|
||||||
|
conv.Lock.Lock()
|
||||||
|
if len(conv.Items) <= s.CompactionTrigger {
|
||||||
|
conv.Lock.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cut := compactionCut(conv.Items, s.MaxHistoryItems)
|
||||||
|
if cut <= 0 {
|
||||||
|
conv.Lock.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
overflow := append([]*types.MessageItemUnion(nil), conv.Items[:cut]...)
|
||||||
|
prior := conv.Memory
|
||||||
|
conv.Lock.Unlock()
|
||||||
|
|
||||||
|
// Summarize (unlocked).
|
||||||
|
msgs := buildSummaryMessages(prior, renderItemsTranscript(overflow), s.MaxSummaryTokens)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), compactionTimeout)
|
||||||
|
defer cancel()
|
||||||
|
predFunc, err := model.Predict(ctx, msgs, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
||||||
|
if err != nil {
|
||||||
|
xlog.Warn("realtime compaction: summarizer predict failed", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pred, err := predFunc()
|
||||||
|
if err != nil {
|
||||||
|
xlog.Warn("realtime compaction: summarizer inference failed", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Strip any leaked reasoning/thinking spans using the same extractor the
|
||||||
|
// rest of the realtime path uses, rather than a bespoke regex.
|
||||||
|
rcfg := reasoning.Config{}
|
||||||
|
if mc := model.PredictConfig(); mc != nil {
|
||||||
|
rcfg = spokenReasoningConfig(mc.ReasoningConfig)
|
||||||
|
}
|
||||||
|
_, summary := reasoning.ExtractReasoningComplete(pred.Response, "", rcfg)
|
||||||
|
summary = strings.TrimSpace(summary)
|
||||||
|
if summary == "" {
|
||||||
|
xlog.Warn("realtime compaction: empty summary, skipping eviction")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit.
|
||||||
|
conv.Lock.Lock()
|
||||||
|
defer conv.Lock.Unlock()
|
||||||
|
if !prefixMatches(conv.Items, overflow) {
|
||||||
|
xlog.Debug("realtime compaction: head changed during summary, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
conv.Memory = summary
|
||||||
|
conv.Items = conv.Items[len(overflow):]
|
||||||
|
xlog.Debug("realtime compaction: evicted items into memory", "evicted", len(overflow), "remaining", len(conv.Items))
|
||||||
|
}
|
||||||
|
|
||||||
|
// summarizerModel resolves the model used to produce compaction summaries.
|
||||||
|
// Without a configured summary_model (or factory) it reuses the pipeline LLM.
|
||||||
|
func (s *Session) summarizerModel() Model {
|
||||||
|
if s.SummaryModel == "" || s.summarizerFactory == nil {
|
||||||
|
return s.ModelInterface
|
||||||
|
}
|
||||||
|
s.summarizerOnce.Do(func() {
|
||||||
|
m, err := s.summarizerFactory()
|
||||||
|
if err != nil {
|
||||||
|
xlog.Warn("realtime compaction: summary_model load failed, falling back to pipeline LLM", "model", s.SummaryModel, "error", err)
|
||||||
|
m = s.ModelInterface
|
||||||
|
}
|
||||||
|
s.summarizerCached = m
|
||||||
|
})
|
||||||
|
return s.summarizerCached
|
||||||
|
}
|
||||||
|
|
||||||
|
// maybeCompact schedules a background compaction when the live buffer has grown
|
||||||
|
// past the trigger and none is already running. Returns immediately.
|
||||||
|
func (s *Session) maybeCompact(conv *Conversation) {
|
||||||
|
if !s.CompactionEnabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
conv.Lock.Lock()
|
||||||
|
over := len(conv.Items) > s.CompactionTrigger
|
||||||
|
conv.Lock.Unlock()
|
||||||
|
if !over {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !conv.compacting.CompareAndSwap(false, true) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
defer conv.compacting.Store(false)
|
||||||
|
// Resolve (and, for a configured summary_model, lazily load) the
|
||||||
|
// summarizer only when a compaction actually runs, off the response
|
||||||
|
// path — so the model load never blocks a user turn.
|
||||||
|
model := s.summarizerModel()
|
||||||
|
if model == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.compact(conv, model)
|
||||||
|
}()
|
||||||
|
}
|
||||||
308
core/http/endpoints/openai/realtime_compaction_test.go
Normal file
308
core/http/endpoints/openai/realtime_compaction_test.go
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
package openai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("resolveCompaction", func() {
|
||||||
|
It("disables when the block is absent", func() {
|
||||||
|
enabled, _, _, _ := resolveCompaction(&config.ModelConfig{}, 6)
|
||||||
|
Expect(enabled).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("defaults trigger to 2x max history and tokens to 512", func() {
|
||||||
|
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true}}}
|
||||||
|
enabled, trigger, maxTok, _ := resolveCompaction(cfg, 6)
|
||||||
|
Expect(enabled).To(BeTrue())
|
||||||
|
Expect(trigger).To(Equal(12))
|
||||||
|
Expect(maxTok).To(Equal(512))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("clamps trigger to max history + 1 when misconfigured", func() {
|
||||||
|
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true, TriggerItems: 4}}}
|
||||||
|
_, trigger, _, _ := resolveCompaction(cfg, 6)
|
||||||
|
Expect(trigger).To(Equal(7))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("honors explicit values", func() {
|
||||||
|
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{
|
||||||
|
Enabled: true, TriggerItems: 20, MaxSummaryTokens: 256, SummaryModel: "tiny"}}}
|
||||||
|
enabled, trigger, maxTok, model := resolveCompaction(cfg, 6)
|
||||||
|
Expect(enabled).To(BeTrue())
|
||||||
|
Expect(trigger).To(Equal(20))
|
||||||
|
Expect(maxTok).To(Equal(256))
|
||||||
|
Expect(model).To(Equal("tiny"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("deleteItem", func() {
|
||||||
|
mk := func(ids ...string) []*types.MessageItemUnion {
|
||||||
|
out := make([]*types.MessageItemUnion, len(ids))
|
||||||
|
for i, id := range ids {
|
||||||
|
out[i] = &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
It("removes the item with the given id", func() {
|
||||||
|
items, ok := deleteItem(mk("a", "b", "c"), "b")
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
Expect(len(items)).To(Equal(2))
|
||||||
|
Expect(itemID(items[0])).To(Equal("a"))
|
||||||
|
Expect(itemID(items[1])).To(Equal("c"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("reports not found for an unknown id", func() {
|
||||||
|
_, ok := deleteItem(mk("a"), "zzz")
|
||||||
|
Expect(ok).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("clearInputAudio", func() {
|
||||||
|
It("resets the pending PCM and buffered Opus frames", func() {
|
||||||
|
s := &Session{InputAudioBuffer: []byte{1, 2, 3}, OpusFrames: [][]byte{{9}}}
|
||||||
|
clearInputAudio(s)
|
||||||
|
Expect(s.InputAudioBuffer).To(BeNil())
|
||||||
|
Expect(s.OpusFrames).To(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("truncateAssistantText", func() {
|
||||||
|
It("clears the text of the assistant content part at the index", func() {
|
||||||
|
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||||
|
ID: "a1",
|
||||||
|
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello world"}},
|
||||||
|
}}}
|
||||||
|
ok := truncateAssistantText(items, "a1", 0)
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
Expect(items[0].Assistant.Content[0].Text).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||||
|
// .Text, so a barge-in truncate must clear .Transcript too or it would no-op.
|
||||||
|
It("clears the transcript of an assistant audio content part", func() {
|
||||||
|
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
||||||
|
ID: "a1",
|
||||||
|
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "hello world"}},
|
||||||
|
}}}
|
||||||
|
ok := truncateAssistantText(items, "a1", 0)
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
Expect(items[0].Assistant.Content[0].Transcript).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns false for an unknown id", func() {
|
||||||
|
Expect(truncateAssistantText(nil, "nope", 0)).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("compactionCut", func() {
|
||||||
|
user := func(id string) *types.MessageItemUnion {
|
||||||
|
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||||
|
}
|
||||||
|
call := func(id string) *types.MessageItemUnion {
|
||||||
|
return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id}}
|
||||||
|
}
|
||||||
|
out := func(id string) *types.MessageItemUnion {
|
||||||
|
return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id}}
|
||||||
|
}
|
||||||
|
|
||||||
|
It("cuts exactly len-keep when no pairs straddle the boundary", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("1"), user("2"), user("3"), user("4")}
|
||||||
|
Expect(compactionCut(items, 2)).To(Equal(2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns 0 when nothing to cut", func() {
|
||||||
|
Expect(compactionCut([]*types.MessageItemUnion{user("1")}, 2)).To(Equal(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns 0 (cuts nothing) when keep is 0 — the unlimited-window sentinel", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||||
|
Expect(compactionCut(items, 0)).To(Equal(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("moves the boundary so a call/output pair is not split", func() {
|
||||||
|
// keep=2 -> naive cut=2, but items[2] is the output of items[1]'s call;
|
||||||
|
// pull the cut right so the whole pair stays in the kept tail.
|
||||||
|
items := []*types.MessageItemUnion{user("1"), call("c"), out("c"), user("4")}
|
||||||
|
Expect(compactionCut(items, 2)).To(Equal(1))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("withMemory", func() {
|
||||||
|
It("inserts a memory system message when memory is non-empty", func() {
|
||||||
|
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||||
|
out := withMemory(base, "user is Bob; wants pizza")
|
||||||
|
Expect(len(out)).To(Equal(2))
|
||||||
|
Expect(out[1].Role).To(Equal("system"))
|
||||||
|
Expect(out[1].StringContent).To(ContainSubstring("user is Bob"))
|
||||||
|
Expect(out[1].StringContent).To(ContainSubstring("Summary of earlier conversation"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("is a no-op when memory is empty", func() {
|
||||||
|
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
||||||
|
Expect(withMemory(base, "")).To(HaveLen(1))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("renderItemsTranscript", func() {
|
||||||
|
It("renders user and assistant text turns", func() {
|
||||||
|
items := []*types.MessageItemUnion{
|
||||||
|
{User: &types.MessageItemUser{Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: "hi"}}}},
|
||||||
|
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello"}}}},
|
||||||
|
}
|
||||||
|
out := renderItemsTranscript(items)
|
||||||
|
Expect(out).To(ContainSubstring("user: hi"))
|
||||||
|
Expect(out).To(ContainSubstring("assistant: hello"))
|
||||||
|
})
|
||||||
|
|
||||||
|
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
||||||
|
// .Text, so the transcript builder must emit .Transcript too or spoken turns
|
||||||
|
// would be dropped from the summary.
|
||||||
|
It("renders an assistant audio turn from its transcript", func() {
|
||||||
|
items := []*types.MessageItemUnion{
|
||||||
|
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "spoken words"}}}},
|
||||||
|
}
|
||||||
|
Expect(renderItemsTranscript(items)).To(ContainSubstring("assistant: spoken words"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("buildSummaryMessages", func() {
|
||||||
|
It("includes prior memory and the new transcript", func() {
|
||||||
|
msgs := buildSummaryMessages("prior facts", "user: hi", 512)
|
||||||
|
Expect(len(msgs)).To(Equal(2))
|
||||||
|
Expect(msgs[0].Role).To(Equal("system"))
|
||||||
|
Expect(msgs[1].StringContent).To(ContainSubstring("prior facts"))
|
||||||
|
Expect(msgs[1].StringContent).To(ContainSubstring("user: hi"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("compact", func() {
|
||||||
|
user := func(id, text string) *types.MessageItemUnion {
|
||||||
|
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id,
|
||||||
|
Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: text}}}}
|
||||||
|
}
|
||||||
|
|
||||||
|
It("summarizes overflow into Memory and evicts it, keeping the live tail", func() {
|
||||||
|
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||||
|
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||||
|
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||||
|
}}
|
||||||
|
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||||
|
m := &fakeModel{predictResp: backend.LLMResponse{Response: "ROLLED UP"}}
|
||||||
|
|
||||||
|
s.compact(conv, m)
|
||||||
|
|
||||||
|
Expect(conv.Memory).To(Equal("ROLLED UP"))
|
||||||
|
Expect(len(conv.Items)).To(Equal(4))
|
||||||
|
Expect(itemID(conv.Items[0])).To(Equal("5"))
|
||||||
|
// The summarizer saw the evicted turns.
|
||||||
|
Expect(m.lastMessages[1].StringContent).To(ContainSubstring("a"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("leaves Items and Memory untouched when the summarizer errors", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("1", "a"), user("2", "b"), user("3", "c")}
|
||||||
|
conv := &Conversation{Items: items}
|
||||||
|
s := &Session{CompactionEnabled: true, CompactionTrigger: 2, MaxHistoryItems: 1, MaxSummaryTokens: 512}
|
||||||
|
m := &fakeModel{predictErr: errors.New("boom")}
|
||||||
|
|
||||||
|
s.compact(conv, m)
|
||||||
|
|
||||||
|
Expect(conv.Memory).To(Equal(""))
|
||||||
|
Expect(len(conv.Items)).To(Equal(3))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("strips leaked reasoning tags from the summary via the shared extractor", func() {
|
||||||
|
conv := &Conversation{Items: []*types.MessageItemUnion{
|
||||||
|
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
||||||
|
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
||||||
|
}}
|
||||||
|
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
||||||
|
m := &fakeModel{predictResp: backend.LLMResponse{Response: "<think>planning the summary</think>CLEAN SUMMARY"}}
|
||||||
|
|
||||||
|
s.compact(conv, m)
|
||||||
|
|
||||||
|
Expect(conv.Memory).To(Equal("CLEAN SUMMARY"))
|
||||||
|
Expect(conv.Memory).ToNot(ContainSubstring("planning"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does nothing when items are at or below the trigger", func() {
|
||||||
|
conv := &Conversation{Items: []*types.MessageItemUnion{user("1", "a")}}
|
||||||
|
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4}
|
||||||
|
s.compact(conv, &fakeModel{predictResp: backend.LLMResponse{Response: "x"}})
|
||||||
|
Expect(conv.Memory).To(Equal(""))
|
||||||
|
Expect(len(conv.Items)).To(Equal(1))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("prefixMatches", func() {
|
||||||
|
user := func(id string) *types.MessageItemUnion {
|
||||||
|
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||||
|
}
|
||||||
|
|
||||||
|
It("matches when items begins with the snapshot ids in order", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
||||||
|
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||||
|
Expect(prefixMatches(items, snap)).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("matches an empty snapshot", func() {
|
||||||
|
Expect(prefixMatches([]*types.MessageItemUnion{user("1")}, nil)).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("fails when items is shorter than the snapshot (a concurrent delete shrank the head)", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("1")}
|
||||||
|
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||||
|
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("fails when the head ids differ (a concurrent delete reordered the head)", func() {
|
||||||
|
items := []*types.MessageItemUnion{user("2"), user("3")}
|
||||||
|
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
||||||
|
Expect(prefixMatches(items, snap)).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("summarizerModel", func() {
|
||||||
|
It("returns the pipeline model when no summary_model is set", func() {
|
||||||
|
m := &fakeModel{}
|
||||||
|
s := &Session{ModelInterface: m}
|
||||||
|
Expect(s.summarizerModel()).To(Equal(m))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses the factory (once) when summary_model is set", func() {
|
||||||
|
pipeline := &fakeModel{}
|
||||||
|
small := &fakeModel{}
|
||||||
|
calls := 0
|
||||||
|
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||||
|
summarizerFactory: func() (Model, error) { calls++; return small, nil }}
|
||||||
|
Expect(s.summarizerModel()).To(Equal(small))
|
||||||
|
Expect(s.summarizerModel()).To(Equal(small))
|
||||||
|
Expect(calls).To(Equal(1))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("falls back to the pipeline model when the factory errors", func() {
|
||||||
|
pipeline := &fakeModel{}
|
||||||
|
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
||||||
|
summarizerFactory: func() (Model, error) { return nil, errors.New("nope") }}
|
||||||
|
Expect(s.summarizerModel()).To(Equal(pipeline))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("itemID", func() {
|
||||||
|
It("returns the id for each variant and empty for nil", func() {
|
||||||
|
Expect(itemID(nil)).To(Equal(""))
|
||||||
|
Expect(itemID(&types.MessageItemUnion{User: &types.MessageItemUser{ID: "u1"}})).To(Equal("u1"))
|
||||||
|
Expect(itemID(&types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: "a1"}})).To(Equal("a1"))
|
||||||
|
Expect(itemID(&types.MessageItemUnion{System: &types.MessageItemSystem{ID: "s1"}})).To(Equal("s1"))
|
||||||
|
Expect(itemID(&types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: "f1"}})).To(Equal("f1"))
|
||||||
|
Expect(itemID(&types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: "o1"}})).To(Equal("o1"))
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -288,6 +288,21 @@ test.describe('Model Editor - Interactive Tab', () => {
|
|||||||
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('pattern min_len clamps a directly-typed negative to 0', async ({ page }) => {
|
||||||
|
const searchInput = page.locator('input[placeholder="Search fields to add..."]')
|
||||||
|
await searchInput.fill('Custom Secret Patterns')
|
||||||
|
const dropdown = searchInput.locator('..').locator('..')
|
||||||
|
await dropdown.locator('div', { hasText: 'Custom Secret Patterns' }).first().click()
|
||||||
|
|
||||||
|
await page.locator('button', { hasText: 'Add pattern' }).click()
|
||||||
|
// The number input's min={0} only limits the spinner arrows, not keyboard
|
||||||
|
// entry; the editor must sanitise a typed negative so a meaningless
|
||||||
|
// negative length floor never reaches the saved config.
|
||||||
|
const minLen = page.locator('input[aria-label="Minimum length"]')
|
||||||
|
await minLen.fill('-5')
|
||||||
|
await expect(minLen).toHaveValue('0')
|
||||||
|
})
|
||||||
|
|
||||||
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
||||||
// must render WITH its values. flattenConfig used to recurse into the map,
|
// must render WITH its values. flattenConfig used to recurse into the map,
|
||||||
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
||||||
@@ -329,4 +344,37 @@ test.describe('Model Editor - Interactive Tab', () => {
|
|||||||
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// A map cannot hold two values for one key, so renaming a row to an existing
|
||||||
|
// group must collapse to a single row (Object.fromEntries, last write wins)
|
||||||
|
// rather than rendering two conflicting rows that silently lose one on save.
|
||||||
|
test('entity_actions collapses a duplicate group to a single row', async ({ page }) => {
|
||||||
|
await page.route('**/api/models/edit/ner-model', (route) => {
|
||||||
|
route.fulfill({
|
||||||
|
contentType: 'application/json',
|
||||||
|
body: JSON.stringify({
|
||||||
|
name: 'ner-model',
|
||||||
|
config: [
|
||||||
|
'name: ner-model',
|
||||||
|
'backend: llama-cpp',
|
||||||
|
'pii_detection:',
|
||||||
|
' entity_actions:',
|
||||||
|
' SSN: block',
|
||||||
|
' EMAIL: mask',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
await page.goto('/app/model-editor/ner-model')
|
||||||
|
|
||||||
|
const groupInputs = page.locator('input[aria-label="Entity group"]')
|
||||||
|
await expect(groupInputs).toHaveCount(2)
|
||||||
|
|
||||||
|
// Rename the EMAIL row to duplicate SSN; the editor collapses to one SSN row.
|
||||||
|
await groupInputs.nth(1).fill('SSN')
|
||||||
|
await expect(groupInputs).toHaveCount(1)
|
||||||
|
await expect(groupInputs.nth(0)).toHaveValue('SSN')
|
||||||
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|||||||
34
core/http/react-ui/e2e/nodes-detail.spec.js
Normal file
34
core/http/react-ui/e2e/nodes-detail.spec.js
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import { test, expect } from './coverage-fixtures.js'
|
||||||
|
|
||||||
|
const ID = 'n1'
|
||||||
|
async function mockNode(page) {
|
||||||
|
await page.route(`**/api/nodes/${ID}`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||||
|
body: JSON.stringify({ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy', total_vram: 24e9, available_vram: 12e9, max_replicas_per_model: 1, labels: { env: 'prod' } }) }))
|
||||||
|
await page.route(`**/api/nodes/${ID}/models`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||||
|
body: JSON.stringify([{ node_id: ID, model_name: 'llama-3.3', state: 'loaded', in_flight: 0, replica_index: 0 }]) }))
|
||||||
|
await page.route(`**/api/nodes/${ID}/backends`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||||
|
body: JSON.stringify([{ name: 'llama-cpp', is_system: true, installed_at: '2026-06-01T00:00:00Z' }]) }))
|
||||||
|
}
|
||||||
|
|
||||||
|
test.describe('Node detail page', () => {
|
||||||
|
test('renders sections for a node', async ({ page }) => {
|
||||||
|
await mockNode(page)
|
||||||
|
await page.goto(`/app/nodes/${ID}`)
|
||||||
|
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||||
|
await expect(page.getByText('alpha')).toBeVisible()
|
||||||
|
await expect(page.getByText('llama-3.3')).toBeVisible()
|
||||||
|
await expect(page.getByText('llama-cpp')).toBeVisible()
|
||||||
|
await expect(page.getByText('env=prod')).toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('is reachable by clicking a roster panel', async ({ page }) => {
|
||||||
|
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json',
|
||||||
|
body: JSON.stringify([{ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' }]) }))
|
||||||
|
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||||
|
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||||
|
await mockNode(page)
|
||||||
|
await page.goto('/app/nodes')
|
||||||
|
await page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('alpha').click()
|
||||||
|
await expect(page).toHaveURL(new RegExp(`/app/nodes/${ID}$`))
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -12,28 +12,37 @@ const NODE_NAME = 'worker-test'
|
|||||||
const BACKEND_NAME = 'cuda12-vllm-development'
|
const BACKEND_NAME = 'cuda12-vllm-development'
|
||||||
|
|
||||||
async function mockDistributedNodes(page, { onDelete } = {}) {
|
async function mockDistributedNodes(page, { onDelete } = {}) {
|
||||||
|
const nodeRecord = {
|
||||||
|
id: NODE_ID,
|
||||||
|
name: NODE_NAME,
|
||||||
|
node_type: 'backend',
|
||||||
|
address: '10.0.0.1:50051',
|
||||||
|
http_address: '10.0.0.1:8090',
|
||||||
|
status: 'healthy',
|
||||||
|
total_vram: 0,
|
||||||
|
available_vram: 0,
|
||||||
|
total_ram: 8_000_000_000,
|
||||||
|
available_ram: 4_000_000_000,
|
||||||
|
gpu_vendor: '',
|
||||||
|
last_heartbeat: new Date().toISOString(),
|
||||||
|
created_at: new Date().toISOString(),
|
||||||
|
updated_at: new Date().toISOString(),
|
||||||
|
}
|
||||||
|
|
||||||
await page.route('**/api/nodes', (route) => {
|
await page.route('**/api/nodes', (route) => {
|
||||||
route.fulfill({
|
route.fulfill({
|
||||||
status: 200,
|
status: 200,
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
body: JSON.stringify([
|
body: JSON.stringify([nodeRecord]),
|
||||||
{
|
})
|
||||||
id: NODE_ID,
|
})
|
||||||
name: NODE_NAME,
|
|
||||||
node_type: 'backend',
|
// The detail page fetches the single node via nodesApi.get(id).
|
||||||
address: '10.0.0.1:50051',
|
await page.route(`**/api/nodes/${NODE_ID}`, (route) => {
|
||||||
http_address: '10.0.0.1:8090',
|
route.fulfill({
|
||||||
status: 'healthy',
|
status: 200,
|
||||||
total_vram: 0,
|
contentType: 'application/json',
|
||||||
available_vram: 0,
|
body: JSON.stringify(nodeRecord),
|
||||||
total_ram: 8_000_000_000,
|
|
||||||
available_ram: 4_000_000_000,
|
|
||||||
gpu_vendor: '',
|
|
||||||
last_heartbeat: new Date().toISOString(),
|
|
||||||
created_at: new Date().toISOString(),
|
|
||||||
updated_at: new Date().toISOString(),
|
|
||||||
},
|
|
||||||
]),
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -80,24 +89,18 @@ async function mockDistributedNodes(page, { onDelete } = {}) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async function expandNodeAndWaitForBackends(page) {
|
async function openNodeDetail(page) {
|
||||||
await page.goto('/app/nodes')
|
// The per-node backend table now lives on the deep-linkable detail page
|
||||||
// Click the row to expand it. The chevron toggle and the row both work,
|
// at /app/nodes/:id (the old expand-row + "Manage" disclosure was removed
|
||||||
// but clicking the name cell is the most user-like.
|
// when the roster was restructured). Navigate straight there.
|
||||||
await page.getByText(NODE_NAME).first().click()
|
await page.goto(`/app/nodes/${NODE_ID}`)
|
||||||
// Backends, Capacity and Labels live behind a "Manage" <details>
|
|
||||||
// disclosure (the drawer was distilled to keep at-a-glance content
|
|
||||||
// lean — see distill refactor in the multi-replica branch). Open it
|
|
||||||
// by clicking the summary inside the .node-manage scope so the
|
|
||||||
// per-node backend table is in the DOM before assertions run.
|
|
||||||
await page.locator('.node-manage > summary').first().click()
|
|
||||||
await expect(page.getByRole('cell', { name: BACKEND_NAME, exact: true })).toBeVisible({ timeout: 10_000 })
|
await expect(page.getByRole('cell', { name: BACKEND_NAME, exact: true })).toBeVisible({ timeout: 10_000 })
|
||||||
}
|
}
|
||||||
|
|
||||||
test.describe('Nodes page — per-node backend actions', () => {
|
test.describe('Nodes page — per-node backend actions', () => {
|
||||||
test('upgrade affordance is self-explanatory (not "Reinstall backend" with a sync icon)', async ({ page }) => {
|
test('upgrade affordance is self-explanatory (not "Reinstall backend" with a sync icon)', async ({ page }) => {
|
||||||
await mockDistributedNodes(page)
|
await mockDistributedNodes(page)
|
||||||
await expandNodeAndWaitForBackends(page)
|
await openNodeDetail(page)
|
||||||
|
|
||||||
// Negative: the old, ambiguous wording must not be used.
|
// Negative: the old, ambiguous wording must not be used.
|
||||||
await expect(page.locator('button[title="Reinstall backend"]')).toHaveCount(0)
|
await expect(page.locator('button[title="Reinstall backend"]')).toHaveCount(0)
|
||||||
@@ -114,7 +117,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
|||||||
|
|
||||||
test('per-node backend row shows a delete (trash) button next to upgrade', async ({ page }) => {
|
test('per-node backend row shows a delete (trash) button next to upgrade', async ({ page }) => {
|
||||||
await mockDistributedNodes(page)
|
await mockDistributedNodes(page)
|
||||||
await expandNodeAndWaitForBackends(page)
|
await openNodeDetail(page)
|
||||||
|
|
||||||
const deleteBtn = page.locator('button[title="Delete backend from this node"]')
|
const deleteBtn = page.locator('button[title="Delete backend from this node"]')
|
||||||
await expect(deleteBtn).toBeVisible()
|
await expect(deleteBtn).toBeVisible()
|
||||||
@@ -128,7 +131,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
|||||||
postedBody = route.request().postDataJSON()
|
postedBody = route.request().postDataJSON()
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
await expandNodeAndWaitForBackends(page)
|
await openNodeDetail(page)
|
||||||
|
|
||||||
await page.locator('button[title="Delete backend from this node"]').click()
|
await page.locator('button[title="Delete backend from this node"]').click()
|
||||||
|
|
||||||
@@ -150,7 +153,7 @@ test.describe('Nodes page — per-node backend actions', () => {
|
|||||||
deleteCalls += 1
|
deleteCalls += 1
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
await expandNodeAndWaitForBackends(page)
|
await openNodeDetail(page)
|
||||||
|
|
||||||
await page.locator('button[title="Delete backend from this node"]').click()
|
await page.locator('button[title="Delete backend from this node"]').click()
|
||||||
|
|
||||||
|
|||||||
47
core/http/react-ui/e2e/nodes-roster.spec.js
Normal file
47
core/http/react-ui/e2e/nodes-roster.spec.js
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import { test, expect } from './coverage-fixtures.js'
|
||||||
|
|
||||||
|
async function mockCluster(page, nodes) {
|
||||||
|
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(nodes) }))
|
||||||
|
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||||
|
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||||
|
}
|
||||||
|
|
||||||
|
test.describe('Nodes roster header', () => {
|
||||||
|
test('shows a cluster pulse line and no stat-card grid', async ({ page }) => {
|
||||||
|
await mockCluster(page, [
|
||||||
|
{ id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
|
||||||
|
{ id: 'n2', name: 'beta', node_type: 'backend', address: '10.0.0.2:50051', status: 'draining' },
|
||||||
|
])
|
||||||
|
await page.goto('/app/nodes')
|
||||||
|
await expect(page.locator('.cluster-pulse')).toBeVisible({ timeout: 15_000 })
|
||||||
|
await expect(page.locator('.cluster-pulse')).toContainText('2 nodes')
|
||||||
|
await expect(page.locator('.stat-grid')).toHaveCount(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('shows an approval callout for pending nodes', async ({ page }) => {
|
||||||
|
await mockCluster(page, [{ id: 'n3', name: 'gamma', node_type: 'backend', address: '10.0.0.3:50051', status: 'pending' }])
|
||||||
|
await page.goto('/app/nodes')
|
||||||
|
await expect(page.locator('.attention-callout')).toContainText('approval', { timeout: 15_000 })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test.describe('Nodes roster panels', () => {
|
||||||
|
test('shows model chips without clicking and filters by type', async ({ page }) => {
|
||||||
|
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
|
||||||
|
{ id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
|
||||||
|
{ id: 'a1', name: 'agent-1', node_type: 'agent', address: '10.0.0.9:50051', status: 'healthy' },
|
||||||
|
]) }))
|
||||||
|
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
|
||||||
|
{ node_id: 'n1', model_name: 'llama-3.3', state: 'loaded', in_flight: 2, replica_index: 0 },
|
||||||
|
]) }))
|
||||||
|
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
||||||
|
|
||||||
|
await page.goto('/app/nodes')
|
||||||
|
// model chip visible without any expand click
|
||||||
|
await expect(page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('llama-3.3')).toBeVisible({ timeout: 15_000 })
|
||||||
|
// segmented filter: Agent shows the agent node, hides the backend node
|
||||||
|
await page.getByRole('radio', { name: /Agent/ }).click()
|
||||||
|
await expect(page.getByText('agent-1')).toBeVisible()
|
||||||
|
await expect(page.getByText('alpha')).toHaveCount(0)
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -21,6 +21,7 @@ const PAGES = [
|
|||||||
['/app/backends', 'Backends'],
|
['/app/backends', 'Backends'],
|
||||||
['/app/settings', 'Settings'],
|
['/app/settings', 'Settings'],
|
||||||
['/app/nodes', 'Nodes'],
|
['/app/nodes', 'Nodes'],
|
||||||
|
['/app/scheduling', 'Scheduling'],
|
||||||
['/app/face', 'Face recognition'],
|
['/app/face', 'Face recognition'],
|
||||||
['/app/voice', 'Voice recognition'],
|
['/app/voice', 'Voice recognition'],
|
||||||
['/app/fine-tune', 'Fine-tuning'],
|
['/app/fine-tune', 'Fine-tuning'],
|
||||||
|
|||||||
16
core/http/react-ui/e2e/scheduling.spec.js
Normal file
16
core/http/react-ui/e2e/scheduling.spec.js
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import { test, expect } from './coverage-fixtures.js'
|
||||||
|
|
||||||
|
test.describe('Scheduling page', () => {
|
||||||
|
test('renders at /app/scheduling with rules from the API', async ({ page }) => {
|
||||||
|
await page.route('**/api/nodes/scheduling', (route) => {
|
||||||
|
route.fulfill({
|
||||||
|
status: 200, contentType: 'application/json',
|
||||||
|
body: JSON.stringify([{ model_name: 'llama-3.3', spread_all: true, min_replicas: 0, max_replicas: 0 }]),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
await page.goto('/app/scheduling')
|
||||||
|
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||||
|
await expect(page).toHaveURL(/\/app\/scheduling$/)
|
||||||
|
await expect(page.getByText('llama-3.3')).toBeVisible()
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "Verteilte Knoten",
|
"title": "Verteilte Knoten",
|
||||||
"subtitle": "Backend- und Agenten-Worker-Knoten verwalten"
|
"subtitle": "Backend- und Agenten-Worker-Knoten verwalten"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "Planung",
|
||||||
|
"subtitle": "Modellplatzierung und Replikat-Regeln im gesamten Cluster"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "Verteilte KI-Berechnung",
|
"title": "Verteilte KI-Berechnung",
|
||||||
"subtitle": "Skalieren Sie Ihre KI-Workloads über mehrere Geräte mit Peer-to-Peer-Verteilung"
|
"subtitle": "Skalieren Sie Ihre KI-Workloads über mehrere Geräte mit Peer-to-Peer-Verteilung"
|
||||||
|
|||||||
@@ -50,6 +50,7 @@
|
|||||||
"backends": "Backends",
|
"backends": "Backends",
|
||||||
"traces": "Traces",
|
"traces": "Traces",
|
||||||
"nodes": "Knoten",
|
"nodes": "Knoten",
|
||||||
|
"scheduling": "Planung",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "System",
|
"system": "System",
|
||||||
"settings": "Einstellungen",
|
"settings": "Einstellungen",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "Distributed Nodes",
|
"title": "Distributed Nodes",
|
||||||
"subtitle": "Manage backend and agent worker nodes"
|
"subtitle": "Manage backend and agent worker nodes"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "Scheduling",
|
||||||
|
"subtitle": "Model placement and replica rules across the cluster"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "Distributed AI Computing",
|
"title": "Distributed AI Computing",
|
||||||
"subtitle": "Scale your AI workloads across multiple devices with peer-to-peer distribution"
|
"subtitle": "Scale your AI workloads across multiple devices with peer-to-peer distribution"
|
||||||
|
|||||||
@@ -51,6 +51,7 @@
|
|||||||
"backends": "Backends",
|
"backends": "Backends",
|
||||||
"traces": "Traces",
|
"traces": "Traces",
|
||||||
"nodes": "Nodes",
|
"nodes": "Nodes",
|
||||||
|
"scheduling": "Scheduling",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "System",
|
"system": "System",
|
||||||
"settings": "Settings",
|
"settings": "Settings",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "Nodos distribuidos",
|
"title": "Nodos distribuidos",
|
||||||
"subtitle": "Administra nodos worker de backends y agentes"
|
"subtitle": "Administra nodos worker de backends y agentes"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "Planificación",
|
||||||
|
"subtitle": "Reglas de ubicación de modelos y réplicas en el clúster"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "Computación de IA distribuida",
|
"title": "Computación de IA distribuida",
|
||||||
"subtitle": "Escala tus cargas de trabajo de IA en múltiples dispositivos con distribución peer-to-peer"
|
"subtitle": "Escala tus cargas de trabajo de IA en múltiples dispositivos con distribución peer-to-peer"
|
||||||
|
|||||||
@@ -50,6 +50,7 @@
|
|||||||
"backends": "Backends",
|
"backends": "Backends",
|
||||||
"traces": "Trazas",
|
"traces": "Trazas",
|
||||||
"nodes": "Nodos",
|
"nodes": "Nodos",
|
||||||
|
"scheduling": "Planificación",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "Sistema",
|
"system": "Sistema",
|
||||||
"settings": "Configuración",
|
"settings": "Configuración",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "Node Terdistribusi",
|
"title": "Node Terdistribusi",
|
||||||
"subtitle": "Kelola node backend dan node worker"
|
"subtitle": "Kelola node backend dan node worker"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "Penjadwalan",
|
||||||
|
"subtitle": "Aturan penempatan model dan replika di seluruh klaster"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "Komputasi AI Terdistribusi",
|
"title": "Komputasi AI Terdistribusi",
|
||||||
"subtitle": "Skalakan beban kerja AI Anda ke beberapa perangkat dengan distribusi peer-to-peer"
|
"subtitle": "Skalakan beban kerja AI Anda ke beberapa perangkat dengan distribusi peer-to-peer"
|
||||||
|
|||||||
@@ -51,6 +51,7 @@
|
|||||||
"backends": "Backend",
|
"backends": "Backend",
|
||||||
"traces": "Trace",
|
"traces": "Trace",
|
||||||
"nodes": "Node",
|
"nodes": "Node",
|
||||||
|
"scheduling": "Penjadwalan",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "Sistem",
|
"system": "Sistem",
|
||||||
"settings": "Pengaturan",
|
"settings": "Pengaturan",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "Nodi distribuiti",
|
"title": "Nodi distribuiti",
|
||||||
"subtitle": "Gestisci i nodi worker dei backend e degli agenti"
|
"subtitle": "Gestisci i nodi worker dei backend e degli agenti"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "Pianificazione",
|
||||||
|
"subtitle": "Regole di posizionamento dei modelli e delle repliche nel cluster"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "Calcolo AI distribuito",
|
"title": "Calcolo AI distribuito",
|
||||||
"subtitle": "Scala i tuoi carichi di lavoro AI su più dispositivi con la distribuzione peer-to-peer"
|
"subtitle": "Scala i tuoi carichi di lavoro AI su più dispositivi con la distribuzione peer-to-peer"
|
||||||
|
|||||||
@@ -50,6 +50,7 @@
|
|||||||
"backends": "Backend",
|
"backends": "Backend",
|
||||||
"traces": "Tracce",
|
"traces": "Tracce",
|
||||||
"nodes": "Nodi",
|
"nodes": "Nodi",
|
||||||
|
"scheduling": "Pianificazione",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "Sistema",
|
"system": "Sistema",
|
||||||
"settings": "Impostazioni",
|
"settings": "Impostazioni",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "분산 노드",
|
"title": "분산 노드",
|
||||||
"subtitle": "백엔드 및 에이전트 워커 노드를 관리합니다"
|
"subtitle": "백엔드 및 에이전트 워커 노드를 관리합니다"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "스케줄링",
|
||||||
|
"subtitle": "클러스터 전반의 모델 배치 및 복제본 규칙"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "분산 AI 컴퓨팅",
|
"title": "분산 AI 컴퓨팅",
|
||||||
"subtitle": "피어 투 피어 분산으로 여러 기기에 걸쳐 AI 워크로드를 확장합니다"
|
"subtitle": "피어 투 피어 분산으로 여러 기기에 걸쳐 AI 워크로드를 확장합니다"
|
||||||
|
|||||||
@@ -51,6 +51,7 @@
|
|||||||
"backends": "백엔드",
|
"backends": "백엔드",
|
||||||
"traces": "트레이스",
|
"traces": "트레이스",
|
||||||
"nodes": "노드",
|
"nodes": "노드",
|
||||||
|
"scheduling": "스케줄링",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "시스템",
|
"system": "시스템",
|
||||||
"settings": "설정",
|
"settings": "설정",
|
||||||
|
|||||||
@@ -43,6 +43,10 @@
|
|||||||
"title": "分布式节点",
|
"title": "分布式节点",
|
||||||
"subtitle": "管理后端和智能体工作节点"
|
"subtitle": "管理后端和智能体工作节点"
|
||||||
},
|
},
|
||||||
|
"scheduling": {
|
||||||
|
"title": "调度",
|
||||||
|
"subtitle": "集群中的模型放置和副本规则"
|
||||||
|
},
|
||||||
"p2p": {
|
"p2p": {
|
||||||
"title": "分布式 AI 计算",
|
"title": "分布式 AI 计算",
|
||||||
"subtitle": "通过点对点分发将您的 AI 工作负载扩展到多个设备"
|
"subtitle": "通过点对点分发将您的 AI 工作负载扩展到多个设备"
|
||||||
|
|||||||
@@ -50,6 +50,7 @@
|
|||||||
"backends": "后端",
|
"backends": "后端",
|
||||||
"traces": "追踪",
|
"traces": "追踪",
|
||||||
"nodes": "节点",
|
"nodes": "节点",
|
||||||
|
"scheduling": "调度",
|
||||||
"swarm": "Swarm",
|
"swarm": "Swarm",
|
||||||
"system": "系统",
|
"system": "系统",
|
||||||
"settings": "设置",
|
"settings": "设置",
|
||||||
|
|||||||
@@ -8471,3 +8471,56 @@ select.input {
|
|||||||
.status-pill--error .status-pill__dot { background: var(--color-error); }
|
.status-pill--error .status-pill__dot { background: var(--color-error); }
|
||||||
.status-pill--info .status-pill__dot { background: var(--color-info); }
|
.status-pill--info .status-pill__dot { background: var(--color-info); }
|
||||||
.status-pill--muted .status-pill__dot { background: var(--color-text-muted); }
|
.status-pill--muted .status-pill__dot { background: var(--color-text-muted); }
|
||||||
|
|
||||||
|
/* Nodes: cluster pulse + attention callout (replaces the stat-card strip) */
|
||||||
|
.cluster-pulse {
|
||||||
|
font-size: var(--text-sm);
|
||||||
|
color: var(--color-text-muted);
|
||||||
|
margin: 0 0 var(--spacing-lg);
|
||||||
|
}
|
||||||
|
.cluster-pulse__strong { color: var(--color-text-primary); font-weight: 600; }
|
||||||
|
|
||||||
|
.attention-callout {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: var(--spacing-md);
|
||||||
|
padding: var(--spacing-sm) var(--spacing-md);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
margin-bottom: var(--spacing-lg);
|
||||||
|
font-size: var(--text-sm);
|
||||||
|
}
|
||||||
|
.attention-callout--warn {
|
||||||
|
background: var(--color-warning-light);
|
||||||
|
border: 1px solid var(--color-warning-border);
|
||||||
|
color: var(--color-text-primary);
|
||||||
|
}
|
||||||
|
.attention-callout--error {
|
||||||
|
background: var(--color-error-light);
|
||||||
|
border: 1px solid var(--color-error-border);
|
||||||
|
color: var(--color-text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Node roster panels (Nodes page) */
|
||||||
|
.node-roster { display: flex; flex-direction: column; gap: var(--spacing-sm); }
|
||||||
|
.node-panel {
|
||||||
|
background: var(--color-bg-secondary);
|
||||||
|
border: 1px solid var(--color-border-subtle);
|
||||||
|
border-radius: var(--radius-lg);
|
||||||
|
}
|
||||||
|
.node-panel__main { padding: var(--spacing-md) var(--spacing-lg); cursor: pointer; }
|
||||||
|
.node-panel:hover { border-color: var(--color-border); }
|
||||||
|
.node-panel__head { display: flex; align-items: flex-start; justify-content: space-between; gap: var(--spacing-md); }
|
||||||
|
.node-panel__id { display: flex; align-items: center; gap: var(--spacing-sm); flex-wrap: wrap; }
|
||||||
|
.node-panel__name { font-weight: 600; }
|
||||||
|
.node-panel__meta { display: flex; gap: var(--spacing-lg); margin-top: var(--spacing-sm); color: var(--color-text-muted); font-size: var(--text-xs); }
|
||||||
|
.node-panel__models { display: flex; flex-wrap: wrap; gap: 6px; margin-top: var(--spacing-sm); }
|
||||||
|
.model-chip {
|
||||||
|
display: inline-flex; align-items: center; gap: 5px;
|
||||||
|
font-family: var(--font-mono); font-size: 0.6875rem;
|
||||||
|
padding: 2px 8px; border-radius: var(--radius-sm); border: 1px solid;
|
||||||
|
}
|
||||||
|
.model-chip__dot { width: 6px; height: 6px; border-radius: 50%; }
|
||||||
|
.model-chip__state { opacity: 0.85; font-style: normal; }
|
||||||
|
.node-filter { margin-bottom: var(--spacing-lg); }
|
||||||
|
.node-detail__metrics { display: flex; gap: var(--spacing-xl); margin: var(--spacing-md) 0 var(--spacing-lg); flex-wrap: wrap; }
|
||||||
|
|||||||
@@ -74,7 +74,18 @@ export default function PatternListEditor({ value, onChange }) {
|
|||||||
min={0}
|
min={0}
|
||||||
value={r.min_len || 0}
|
value={r.min_len || 0}
|
||||||
title="Minimum match length (0 = no floor)"
|
title="Minimum match length (0 = no floor)"
|
||||||
onChange={e => update(i, { min_len: parseInt(e.target.value, 10) || 0 })}
|
// min={0} only constrains the spinner, not keyboard entry. Clamp a
|
||||||
|
// typed negative to 0 (a negative floor is meaningless and would
|
||||||
|
// disable the length filter). When we clamp, force the DOM value
|
||||||
|
// too: the resulting 0->0 state change is a no-op, so React's
|
||||||
|
// controlled input would otherwise keep displaying the rejected
|
||||||
|
// "-5" even though the saved value is 0.
|
||||||
|
onChange={e => {
|
||||||
|
const parsed = parseInt(e.target.value, 10)
|
||||||
|
const n = Math.max(0, parsed || 0)
|
||||||
|
if (parsed < 0) e.target.value = String(n)
|
||||||
|
update(i, { min_len: n })
|
||||||
|
}}
|
||||||
style={{ width: 80, fontSize: '0.8125rem' }}
|
style={{ width: 80, fontSize: '0.8125rem' }}
|
||||||
aria-label="Minimum length"
|
aria-label="Minimum length"
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ export const operateConsole = {
|
|||||||
titleKey: 'operate.cluster',
|
titleKey: 'operate.cluster',
|
||||||
items: [
|
items: [
|
||||||
{ path: '/app/nodes', icon: 'fas fa-network-wired', labelKey: 'items.nodes', adminOnly: true, feature: 'distributed' },
|
{ path: '/app/nodes', icon: 'fas fa-network-wired', labelKey: 'items.nodes', adminOnly: true, feature: 'distributed' },
|
||||||
|
{ path: '/app/scheduling', icon: 'fas fa-calendar-alt', labelKey: 'items.scheduling', adminOnly: true, feature: 'distributed' },
|
||||||
{ path: '/app/p2p', icon: 'fas fa-circle-nodes', labelKey: 'items.swarm', adminOnly: true },
|
{ path: '/app/p2p', icon: 'fas fa-circle-nodes', labelKey: 'items.swarm', adminOnly: true },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|||||||
31
core/http/react-ui/src/components/nodes/AttentionCallout.jsx
Normal file
31
core/http/react-ui/src/components/nodes/AttentionCallout.jsx
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
export default function AttentionCallout({ nodes, onApprove }) {
|
||||||
|
const pending = nodes.filter(n => n.status === 'pending')
|
||||||
|
const unhealthy = nodes.filter(n => n.status === 'unhealthy' || n.status === 'offline')
|
||||||
|
if (pending.length === 0 && unhealthy.length === 0) return null
|
||||||
|
|
||||||
|
if (pending.length > 0) {
|
||||||
|
const first = pending[0]
|
||||||
|
const extra = pending.length - 1
|
||||||
|
return (
|
||||||
|
<div className="attention-callout attention-callout--warn">
|
||||||
|
<span>
|
||||||
|
<i className="fas fa-exclamation-circle" />{' '}
|
||||||
|
<strong>{pending.length} node{pending.length > 1 ? 's' : ''} awaiting approval</strong>
|
||||||
|
{' - '}{first.name}{extra > 0 ? ` +${extra} more` : ''}
|
||||||
|
</span>
|
||||||
|
<button className="btn btn-primary btn-sm" onClick={() => onApprove(first.id)}>
|
||||||
|
<i className="fas fa-check" /> Approve {first.name}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<div className="attention-callout attention-callout--error">
|
||||||
|
<span>
|
||||||
|
<i className="fas fa-exclamation-triangle" />{' '}
|
||||||
|
<strong>{unhealthy.length} node{unhealthy.length > 1 ? 's' : ''} unhealthy</strong>
|
||||||
|
{' - '}{unhealthy.map(n => n.name).slice(0, 3).join(', ')}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
196
core/http/react-ui/src/components/nodes/CapacityEditor.jsx
Normal file
196
core/http/react-ui/src/components/nodes/CapacityEditor.jsx
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { nodesApi } from '../../utils/api'
|
||||||
|
import LoadingSpinner from '../LoadingSpinner'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inline editor for a node's per-model replica capacity.
|
||||||
|
*
|
||||||
|
* UX intent: discoverable affordance (pencil icon) that opens an inline
|
||||||
|
* input - never a modal for a single field. Source-of-truth note is shown
|
||||||
|
* inline so operators understand a worker re-registration will overwrite
|
||||||
|
* their override; surfacing this in a tooltip would hide too important a
|
||||||
|
* caveat.
|
||||||
|
*
|
||||||
|
* `confirmShrink` is a hook the parent provides so the page can render its
|
||||||
|
* own confirm dialog (it has access to all nodes and can phrase the message
|
||||||
|
* with full context).
|
||||||
|
*/
|
||||||
|
export default function CapacityEditor({ node, loadedModelCounts, onUpdate, confirmShrink, addToast }) {
|
||||||
|
const current = node.max_replicas_per_model || 1
|
||||||
|
const isOverride = !!node.max_replicas_per_model_manually_set
|
||||||
|
const [editing, setEditing] = useState(false)
|
||||||
|
const [draft, setDraft] = useState(String(current))
|
||||||
|
const [saving, setSaving] = useState(false)
|
||||||
|
const [resetting, setResetting] = useState(false)
|
||||||
|
|
||||||
|
// Reset draft when current value changes (server response, etc.)
|
||||||
|
useEffect(() => {
|
||||||
|
if (!editing) setDraft(String(current))
|
||||||
|
}, [current, editing])
|
||||||
|
|
||||||
|
const cancel = useCallback(() => {
|
||||||
|
setEditing(false)
|
||||||
|
setDraft(String(current))
|
||||||
|
}, [current])
|
||||||
|
|
||||||
|
const save = useCallback(async () => {
|
||||||
|
const value = parseInt(draft, 10)
|
||||||
|
if (!Number.isFinite(value) || value < 1) {
|
||||||
|
addToast('Replica capacity must be 1 or higher', 'error')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (value === current) {
|
||||||
|
setEditing(false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Reducing the cap below current loaded replicas: confirm so the operator
|
||||||
|
// sees the consequence (running replicas keep going until idle eviction).
|
||||||
|
const maxLoadedAcrossModels = Math.max(0, ...Object.values(loadedModelCounts || {}))
|
||||||
|
if (value < maxLoadedAcrossModels) {
|
||||||
|
const proceed = await confirmShrink({ node, newValue: value, currentLoaded: maxLoadedAcrossModels })
|
||||||
|
if (!proceed) return
|
||||||
|
}
|
||||||
|
setSaving(true)
|
||||||
|
try {
|
||||||
|
await nodesApi.updateMaxReplicasPerModel(node.id, value)
|
||||||
|
addToast(`Replica capacity set to ${value} on ${node.name}`, 'success')
|
||||||
|
setEditing(false)
|
||||||
|
onUpdate?.(value)
|
||||||
|
} catch (err) {
|
||||||
|
addToast(`Could not change replica capacity: ${err.message || err}`, 'error')
|
||||||
|
} finally {
|
||||||
|
setSaving(false)
|
||||||
|
}
|
||||||
|
}, [draft, current, node, loadedModelCounts, confirmShrink, onUpdate, addToast])
|
||||||
|
|
||||||
|
const onKeyDown = (e) => {
|
||||||
|
if (e.key === 'Enter') { e.preventDefault(); save() }
|
||||||
|
else if (e.key === 'Escape') { e.preventDefault(); cancel() }
|
||||||
|
}
|
||||||
|
|
||||||
|
const reset = useCallback(async () => {
|
||||||
|
setResetting(true)
|
||||||
|
try {
|
||||||
|
await nodesApi.resetMaxReplicasPerModel(node.id)
|
||||||
|
addToast(`Override cleared on ${node.name}; worker flag will apply on next re-registration`, 'success')
|
||||||
|
onUpdate?.(null)
|
||||||
|
} catch (err) {
|
||||||
|
addToast(`Could not reset override: ${err.message || err}`, 'error')
|
||||||
|
} finally {
|
||||||
|
setResetting(false)
|
||||||
|
}
|
||||||
|
}, [node, onUpdate, addToast])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div style={{
|
||||||
|
display: 'flex', alignItems: 'flex-start', gap: 'var(--spacing-md)',
|
||||||
|
}}>
|
||||||
|
<i className="fas fa-layer-group" style={{ color: 'var(--color-text-muted)', marginTop: 3 }} aria-hidden="true" />
|
||||||
|
<div style={{ flex: 1, minWidth: 0 }}>
|
||||||
|
<div style={{ display: 'flex', alignItems: 'center', gap: 'var(--spacing-sm)', flexWrap: 'wrap' }}>
|
||||||
|
<label
|
||||||
|
htmlFor={`capacity-${node.id}`}
|
||||||
|
style={{ fontSize: '0.8125rem', fontWeight: 600, color: 'var(--color-text-primary)' }}
|
||||||
|
>
|
||||||
|
Max replicas per model
|
||||||
|
</label>
|
||||||
|
{editing ? (
|
||||||
|
<>
|
||||||
|
<input
|
||||||
|
id={`capacity-${node.id}`}
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={draft}
|
||||||
|
disabled={saving}
|
||||||
|
onChange={(e) => setDraft(e.target.value)}
|
||||||
|
onKeyDown={onKeyDown}
|
||||||
|
autoFocus
|
||||||
|
aria-describedby={`capacity-hint-${node.id}`}
|
||||||
|
style={{
|
||||||
|
width: 72, padding: '4px 8px', borderRadius: 'var(--radius-sm)',
|
||||||
|
border: '1px solid var(--color-border)', background: 'var(--color-bg-primary)',
|
||||||
|
fontFamily: 'var(--font-mono)', fontSize: '0.8125rem',
|
||||||
|
color: 'var(--color-text-primary)',
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
onClick={save}
|
||||||
|
disabled={saving}
|
||||||
|
style={{ minHeight: 32 }}
|
||||||
|
aria-label="Save replica capacity"
|
||||||
|
>
|
||||||
|
{saving ? <LoadingSpinner size="xs" /> : <><i className="fas fa-check" /> Save</>}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-secondary btn-sm"
|
||||||
|
onClick={cancel}
|
||||||
|
disabled={saving}
|
||||||
|
style={{ minHeight: 32 }}
|
||||||
|
aria-label="Cancel"
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<span
|
||||||
|
className="cell-mono"
|
||||||
|
style={{ fontSize: '0.8125rem', color: 'var(--color-text-secondary)' }}
|
||||||
|
>
|
||||||
|
{current}
|
||||||
|
</span>
|
||||||
|
{isOverride && (
|
||||||
|
<span
|
||||||
|
title="This value was set from the UI. It will persist across worker restarts until you click Reset."
|
||||||
|
style={{
|
||||||
|
display: 'inline-block', fontSize: '0.6875rem', padding: '1px 6px',
|
||||||
|
borderRadius: 'var(--radius-sm)', fontWeight: 500,
|
||||||
|
background: 'var(--color-bg-primary)',
|
||||||
|
border: '1px solid var(--color-warning, #d97706)',
|
||||||
|
color: 'var(--color-warning, #d97706)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
override
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => setEditing(true)}
|
||||||
|
aria-label={`Edit replica capacity (currently ${current})`}
|
||||||
|
title="Change replica capacity for this node"
|
||||||
|
style={{
|
||||||
|
display: 'inline-flex', alignItems: 'center', justifyContent: 'center',
|
||||||
|
minWidth: 32, minHeight: 32, padding: 4, borderRadius: 'var(--radius-sm)',
|
||||||
|
border: '1px solid var(--color-border-subtle)',
|
||||||
|
background: 'transparent', color: 'var(--color-text-muted)', cursor: 'pointer',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<i className="fas fa-pencil-alt" />
|
||||||
|
</button>
|
||||||
|
{isOverride && (
|
||||||
|
<button
|
||||||
|
onClick={reset}
|
||||||
|
disabled={resetting}
|
||||||
|
aria-label="Clear admin override and let the worker flag apply"
|
||||||
|
title="Clear override; the worker's --max-replicas-per-model flag will apply on the next re-registration"
|
||||||
|
className="btn btn-secondary btn-sm"
|
||||||
|
style={{ minHeight: 32 }}
|
||||||
|
>
|
||||||
|
{resetting ? <LoadingSpinner size="xs" /> : <><i className="fas fa-undo" /> Reset</>}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
id={`capacity-hint-${node.id}`}
|
||||||
|
style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', marginTop: 4, lineHeight: 1.4 }}
|
||||||
|
>
|
||||||
|
{isOverride
|
||||||
|
? <>Set from here. <strong>Reset</strong> to use the worker's default.</>
|
||||||
|
: <>Saved values stick across worker restarts.</>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
18
core/http/react-ui/src/components/nodes/ClusterPulse.jsx
Normal file
18
core/http/react-ui/src/components/nodes/ClusterPulse.jsx
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import { formatVRAM } from './nodeStatus'
|
||||||
|
|
||||||
|
export default function ClusterPulse({ nodes }) {
|
||||||
|
const total = nodes.length
|
||||||
|
const healthy = nodes.filter(n => n.status === 'healthy').length
|
||||||
|
const draining = nodes.filter(n => n.status === 'draining').length
|
||||||
|
const usedVRAM = nodes.reduce((s, n) =>
|
||||||
|
(n.total_vram && n.available_vram != null) ? s + (n.total_vram - n.available_vram) : s, 0)
|
||||||
|
const vramStr = formatVRAM(usedVRAM)
|
||||||
|
return (
|
||||||
|
<p className="cluster-pulse">
|
||||||
|
<span className="cluster-pulse__strong">{total} {total === 1 ? 'node' : 'nodes'}</span>
|
||||||
|
{' · '}<span style={{ color: 'var(--color-success)' }}>{healthy} healthy</span>
|
||||||
|
{draining > 0 && <>{' · '}<span style={{ color: 'var(--color-warning)' }}>{draining} draining</span></>}
|
||||||
|
{vramStr && <>{' · '}{vramStr} VRAM in use</>}
|
||||||
|
</p>
|
||||||
|
)
|
||||||
|
}
|
||||||
98
core/http/react-ui/src/components/nodes/KeyValueChips.jsx
Normal file
98
core/http/react-ui/src/components/nodes/KeyValueChips.jsx
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import { useState } from 'react'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Controlled chip-builder for { key: value } maps. Replaces the prior
|
||||||
|
* comma-separated-string Node Selector input AND the bespoke Labels editor
|
||||||
|
* in the node drawer - both were rendering the same chip pattern with
|
||||||
|
* subtly different markup.
|
||||||
|
*
|
||||||
|
* Fully controlled: parent owns the map and decides what onAdd/onRemove
|
||||||
|
* does (form state for the scheduling form; API calls for the live
|
||||||
|
* labels editor). The component just renders chips and a key/value input
|
||||||
|
* row.
|
||||||
|
*
|
||||||
|
* Props:
|
||||||
|
* pairs - current map of key -> value
|
||||||
|
* onAdd(k,v) - called when the user adds a pair (parent handles dedup
|
||||||
|
* and persistence side effects)
|
||||||
|
* onRemove(k) - called when a chip's × is clicked
|
||||||
|
* placeholderKey, placeholderValue - input hints
|
||||||
|
* ariaLabel - accessible name for the section
|
||||||
|
*/
|
||||||
|
export default function KeyValueChips({ pairs, onAdd, onRemove, placeholderKey = 'key', placeholderValue = 'value', ariaLabel }) {
|
||||||
|
const [k, setK] = useState('')
|
||||||
|
const [v, setV] = useState('')
|
||||||
|
|
||||||
|
const add = () => {
|
||||||
|
const key = k.trim()
|
||||||
|
if (!key) return
|
||||||
|
onAdd(key, v.trim())
|
||||||
|
setK(''); setV('')
|
||||||
|
}
|
||||||
|
const onKeyDown = (e) => {
|
||||||
|
if (e.key === 'Enter') { e.preventDefault(); add() }
|
||||||
|
}
|
||||||
|
|
||||||
|
const entries = pairs ? Object.entries(pairs) : []
|
||||||
|
return (
|
||||||
|
<div aria-label={ariaLabel}>
|
||||||
|
{entries.length > 0 && (
|
||||||
|
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 4, marginBottom: 'var(--spacing-xs)' }}>
|
||||||
|
{entries.map(([key, val]) => (
|
||||||
|
<span key={key} style={{
|
||||||
|
display: 'inline-flex', alignItems: 'center', gap: 4,
|
||||||
|
fontSize: '0.75rem', padding: '2px 8px',
|
||||||
|
borderRadius: 'var(--radius-sm)',
|
||||||
|
background: 'var(--color-bg-tertiary)',
|
||||||
|
border: '1px solid var(--color-border-subtle)',
|
||||||
|
fontFamily: 'var(--font-mono)',
|
||||||
|
}}>
|
||||||
|
{key}={val}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={(e) => { e.stopPropagation(); onRemove(key) }}
|
||||||
|
aria-label={`Remove ${key}`}
|
||||||
|
title="Remove"
|
||||||
|
style={{
|
||||||
|
background: 'none', border: 'none', cursor: 'pointer',
|
||||||
|
color: 'var(--color-text-muted)', fontSize: '0.625rem', padding: 0,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<i className="fas fa-times" />
|
||||||
|
</button>
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div style={{ display: 'flex', gap: 'var(--spacing-xs)', alignItems: 'stretch' }}>
|
||||||
|
<input
|
||||||
|
className="input"
|
||||||
|
type="text"
|
||||||
|
placeholder={placeholderKey}
|
||||||
|
value={k}
|
||||||
|
onChange={e => setK(e.target.value)}
|
||||||
|
onKeyDown={onKeyDown}
|
||||||
|
style={{ flex: 1 }}
|
||||||
|
/>
|
||||||
|
<input
|
||||||
|
className="input"
|
||||||
|
type="text"
|
||||||
|
placeholder={placeholderValue}
|
||||||
|
value={v}
|
||||||
|
onChange={e => setV(e.target.value)}
|
||||||
|
onKeyDown={onKeyDown}
|
||||||
|
style={{ flex: 1 }}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-secondary btn-sm"
|
||||||
|
onClick={add}
|
||||||
|
disabled={!k.trim()}
|
||||||
|
style={{ minHeight: 36 }}
|
||||||
|
>
|
||||||
|
<i className="fas fa-plus" /> Add
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
12
core/http/react-ui/src/components/nodes/ModelChip.jsx
Normal file
12
core/http/react-ui/src/components/nodes/ModelChip.jsx
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import { modelStateConfig } from './nodeStatus'
|
||||||
|
|
||||||
|
export default function ModelChip({ model }) {
|
||||||
|
const cfg = modelStateConfig[model.state] || modelStateConfig.idle
|
||||||
|
return (
|
||||||
|
<span className="model-chip" style={{ background: cfg.bg, color: cfg.color, borderColor: cfg.border }}>
|
||||||
|
<span className="model-chip__dot" style={{ background: cfg.color }} />
|
||||||
|
{model.model_name}
|
||||||
|
{model.state !== 'loaded' && <span className="model-chip__state"> {model.state}</span>}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
}
|
||||||
60
core/http/react-ui/src/components/nodes/NodePanel.jsx
Normal file
60
core/http/react-ui/src/components/nodes/NodePanel.jsx
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import { useNavigate } from 'react-router-dom'
|
||||||
|
import StatusPill from './StatusPill'
|
||||||
|
import ModelChip from './ModelChip'
|
||||||
|
import ActionMenu from '../ActionMenu'
|
||||||
|
import { formatVRAM } from './nodeStatus'
|
||||||
|
|
||||||
|
export default function NodePanel({ node, models = [], onApprove, onDrain, onResume, onRemove }) {
|
||||||
|
const navigate = useNavigate()
|
||||||
|
const isAgent = node.node_type === 'agent'
|
||||||
|
const open = () => navigate(`/app/nodes/${node.id}`)
|
||||||
|
const usedVRAM = node.total_vram && node.available_vram != null ? node.total_vram - node.available_vram : null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="node-panel">
|
||||||
|
<div className="node-panel__main" onClick={open} role="button" tabIndex={0}
|
||||||
|
onKeyDown={(e) => { if (e.key === 'Enter') open() }}>
|
||||||
|
<div className="node-panel__head">
|
||||||
|
<div className="node-panel__id">
|
||||||
|
<StatusPill status={node.status} />
|
||||||
|
<span className="node-panel__name">{node.name}</span>
|
||||||
|
<span className="cell-mono cell-muted">{node.address}</span>
|
||||||
|
</div>
|
||||||
|
<div className="node-panel__actions" onClick={(e) => e.stopPropagation()}>
|
||||||
|
{node.status === 'pending' && (
|
||||||
|
<button className="btn btn-primary btn-sm" onClick={() => onApprove(node.id)}>
|
||||||
|
<i className="fas fa-check" /> Approve
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
<ActionMenu
|
||||||
|
ariaLabel={`Actions for ${node.name}`}
|
||||||
|
triggerLabel={`Actions for ${node.name}`}
|
||||||
|
items={[
|
||||||
|
{ key: 'resume', icon: 'fa-play', label: 'Resume', hidden: node.status !== 'draining', onClick: () => onResume(node.id) },
|
||||||
|
{ key: 'drain', icon: 'fa-pause', label: 'Drain', hidden: node.status === 'draining' || node.status === 'pending', onClick: () => onDrain(node.id) },
|
||||||
|
{ divider: true, hidden: node.status === 'pending' },
|
||||||
|
{ key: 'remove', icon: 'fa-trash', label: 'Remove from cluster', danger: true, onClick: () => onRemove(node) },
|
||||||
|
]}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{!isAgent && (
|
||||||
|
<>
|
||||||
|
<div className="node-panel__meta">
|
||||||
|
{node.total_vram > 0 && (
|
||||||
|
<span className="cell-mono">VRAM {formatVRAM(usedVRAM) || '0'} / {formatVRAM(node.total_vram)}</span>
|
||||||
|
)}
|
||||||
|
<span className="cell-mono">{node.in_flight_count || 0} in-flight</span>
|
||||||
|
</div>
|
||||||
|
<div className="node-panel__models">
|
||||||
|
{models.length === 0
|
||||||
|
? <span className="cell-muted">No models loaded</span>
|
||||||
|
: models.map(m => <ModelChip key={`${m.model_name}-${m.replica_index ?? 0}`} model={m} />)}
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
11
core/http/react-ui/src/components/nodes/StatusPill.jsx
Normal file
11
core/http/react-ui/src/components/nodes/StatusPill.jsx
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
import { statusConfig } from './nodeStatus'
|
||||||
|
|
||||||
|
export default function StatusPill({ status }) {
|
||||||
|
const cfg = statusConfig[status] || statusConfig.unhealthy
|
||||||
|
return (
|
||||||
|
<span className="node-status" style={{ color: cfg.color }}>
|
||||||
|
<span className="node-status__dot" style={{ background: cfg.color }} />
|
||||||
|
{cfg.label}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
}
|
||||||
34
core/http/react-ui/src/components/nodes/nodeStatus.js
vendored
Normal file
34
core/http/react-ui/src/components/nodes/nodeStatus.js
vendored
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export const statusConfig = {
|
||||||
|
healthy: { color: 'var(--color-success)', label: 'Healthy' },
|
||||||
|
unhealthy: { color: 'var(--color-error)', label: 'Unhealthy' },
|
||||||
|
offline: { color: 'var(--color-error)', label: 'Offline' },
|
||||||
|
registering: { color: 'var(--color-primary)', label: 'Registering' },
|
||||||
|
draining: { color: 'var(--color-warning)', label: 'Draining' },
|
||||||
|
pending: { color: 'var(--color-warning)', label: 'Pending Approval' },
|
||||||
|
}
|
||||||
|
|
||||||
|
export const modelStateConfig = {
|
||||||
|
loaded: { bg: 'var(--color-success-light)', color: 'var(--color-success)', border: 'var(--color-success-border)' },
|
||||||
|
loading: { bg: 'var(--color-primary-light)', color: 'var(--color-primary)', border: 'var(--color-primary-border)' },
|
||||||
|
unloading: { bg: 'var(--color-warning-light)', color: 'var(--color-warning)', border: 'var(--color-warning-border)' },
|
||||||
|
idle: { bg: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)', border: 'var(--color-border-subtle)' },
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatVRAM(bytes) {
|
||||||
|
if (!bytes || bytes === 0) return null
|
||||||
|
const gb = bytes / (1024 * 1024 * 1024)
|
||||||
|
return gb >= 1 ? `${gb.toFixed(1)} GB` : `${(bytes / (1024 * 1024)).toFixed(0)} MB`
|
||||||
|
}
|
||||||
|
|
||||||
|
export function timeAgo(dateString) {
|
||||||
|
if (!dateString) return 'never'
|
||||||
|
const seconds = Math.floor((Date.now() - new Date(dateString).getTime()) / 1000)
|
||||||
|
if (seconds < 0) return 'just now'
|
||||||
|
if (seconds < 60) return `${seconds}s ago`
|
||||||
|
const minutes = Math.floor(seconds / 60)
|
||||||
|
if (minutes < 60) return `${minutes}m ago`
|
||||||
|
const hours = Math.floor(minutes / 60)
|
||||||
|
if (hours < 24) return `${hours}h ago`
|
||||||
|
const days = Math.floor(hours / 24)
|
||||||
|
return `${days}d ago`
|
||||||
|
}
|
||||||
352
core/http/react-ui/src/pages/NodeDetail.jsx
Normal file
352
core/http/react-ui/src/pages/NodeDetail.jsx
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { useParams, useNavigate, useOutletContext } from 'react-router-dom'
|
||||||
|
import { nodesApi } from '../utils/api'
|
||||||
|
import PageHeader from '../components/PageHeader'
|
||||||
|
import LoadingSpinner from '../components/LoadingSpinner'
|
||||||
|
import ConfirmDialog from '../components/ConfirmDialog'
|
||||||
|
import StatusPill from '../components/nodes/StatusPill'
|
||||||
|
import CapacityEditor from '../components/nodes/CapacityEditor'
|
||||||
|
import KeyValueChips from '../components/nodes/KeyValueChips'
|
||||||
|
import { formatVRAM, modelStateConfig, timeAgo } from '../components/nodes/nodeStatus'
|
||||||
|
|
||||||
|
// Deep-linkable node management home. Reached by clicking a roster panel on
|
||||||
|
// /app/nodes. Surfaces what's running here plus the management affordances
|
||||||
|
// (capacity, backends, labels, drain/resume/remove) that previously lived in
|
||||||
|
// the expanded-row "Manage" drawer.
|
||||||
|
export default function NodeDetail() {
|
||||||
|
const { id } = useParams()
|
||||||
|
const navigate = useNavigate()
|
||||||
|
const { addToast } = useOutletContext()
|
||||||
|
const [node, setNode] = useState(null)
|
||||||
|
const [models, setModels] = useState([])
|
||||||
|
const [backends, setBackends] = useState([])
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [confirmRemove, setConfirmRemove] = useState(false)
|
||||||
|
const [confirmUnload, setConfirmUnload] = useState(null)
|
||||||
|
const [confirmDeleteBackend, setConfirmDeleteBackend] = useState(null)
|
||||||
|
// Promise-based shrink confirmation: CapacityEditor awaits this hook so the
|
||||||
|
// page owns the dialog (it can phrase the message with full node context).
|
||||||
|
const [confirmShrinkState, setConfirmShrinkState] = useState(null)
|
||||||
|
|
||||||
|
const refresh = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
const n = await nodesApi.get(id)
|
||||||
|
setNode(n)
|
||||||
|
const [m, b] = await Promise.all([nodesApi.getModels(id), nodesApi.getBackends(id)])
|
||||||
|
setModels(Array.isArray(m) ? m : [])
|
||||||
|
setBackends(Array.isArray(b) ? b : [])
|
||||||
|
} catch (err) {
|
||||||
|
addToast(`Failed to load node: ${err.message}`, 'error')
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}, [id, addToast])
|
||||||
|
|
||||||
|
useEffect(() => { refresh() }, [refresh])
|
||||||
|
|
||||||
|
const confirmShrink = useCallback((ctx) => new Promise((resolve) => {
|
||||||
|
setConfirmShrinkState({ ...ctx, resolve })
|
||||||
|
}), [])
|
||||||
|
|
||||||
|
if (loading) return <div className="page page--wide" style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}><LoadingSpinner size="lg" /></div>
|
||||||
|
if (!node) return <div className="page page--wide"><PageHeader title="Node not found" /></div>
|
||||||
|
|
||||||
|
const drain = async () => { try { await nodesApi.drain(id); addToast('Node set to draining', 'success'); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const resume = async () => { try { await nodesApi.resume(id); addToast('Node resumed', 'success'); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const remove = async () => { try { await nodesApi.delete(id); addToast('Node removed', 'success'); navigate('/app/nodes') } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const unload = async (name) => { try { await nodesApi.unloadModel(id, name); addToast(`Model "${name}" unloaded`, 'success'); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const upgradeBackend = async (name) => { try { await nodesApi.installBackend(id, name); addToast(`Backend "${name}" upgraded`, 'success'); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const deleteBackend = async (name) => { try { await nodesApi.deleteBackend(id, name); addToast(`Backend "${name}" deleted`, 'success'); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const addLabel = async (k, v) => { try { await nodesApi.mergeLabels(id, { [k]: v }); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
const delLabel = async (k) => { try { await nodesApi.deleteLabel(id, k); refresh() } catch (e) { addToast(e.message, 'error') } }
|
||||||
|
|
||||||
|
const usedVRAM = node.total_vram && node.available_vram != null ? node.total_vram - node.available_vram : 0
|
||||||
|
// {modelName: replicaCount} of loaded models so the shrink confirm can warn
|
||||||
|
// if the new cap is below the actual count of any single model on this node.
|
||||||
|
const loadedModelCounts = (() => {
|
||||||
|
const counts = {}
|
||||||
|
models.forEach(m => { if (m.state === 'loaded') counts[m.model_name] = (counts[m.model_name] || 0) + 1 })
|
||||||
|
return counts
|
||||||
|
})()
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="page page--wide">
|
||||||
|
<PageHeader
|
||||||
|
eyebrow={<a onClick={() => navigate('/app/nodes')} style={{ cursor: 'pointer', color: 'var(--color-primary)' }}><i className="fas fa-arrow-left" style={{ marginRight: 6 }} aria-hidden="true" />Cluster</a>}
|
||||||
|
title={<><StatusPill status={node.status} /> {node.name}</>}
|
||||||
|
supporting={node.address}
|
||||||
|
actions={
|
||||||
|
<>
|
||||||
|
{node.status === 'draining'
|
||||||
|
? <button className="btn btn-secondary btn-sm" onClick={resume}><i className="fas fa-play" /> Resume</button>
|
||||||
|
: <button className="btn btn-secondary btn-sm" onClick={drain}><i className="fas fa-pause" /> Drain</button>}
|
||||||
|
<button className="btn btn-danger btn-sm" onClick={() => setConfirmRemove(true)}><i className="fas fa-trash" /> Remove</button>
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{/* Inline metrics row: VRAM / in-flight - no boxes, just labelled values. */}
|
||||||
|
<div className="node-detail__metrics">
|
||||||
|
{node.total_vram > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className="drawer-eyebrow">VRAM</div>
|
||||||
|
<span className="cell-mono">{formatVRAM(usedVRAM) || '0'} / {formatVRAM(node.total_vram)}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div>
|
||||||
|
<div className="drawer-eyebrow">In-flight</div>
|
||||||
|
<span className="cell-mono">{node.in_flight_count || 0}</span>
|
||||||
|
</div>
|
||||||
|
{node.node_type !== 'agent' && (
|
||||||
|
<div style={{ minWidth: 0 }}>
|
||||||
|
<div className="drawer-eyebrow">Capacity</div>
|
||||||
|
<CapacityEditor
|
||||||
|
node={node}
|
||||||
|
loadedModelCounts={loadedModelCounts}
|
||||||
|
confirmShrink={confirmShrink}
|
||||||
|
addToast={addToast}
|
||||||
|
onUpdate={() => refresh()}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Running models */}
|
||||||
|
<div style={{ marginTop: 'var(--spacing-lg)' }}>
|
||||||
|
<div className="drawer-eyebrow">Running models</div>
|
||||||
|
{models.length === 0 ? (
|
||||||
|
<p style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)', margin: '0 0 var(--spacing-md) 0' }}>
|
||||||
|
<i className="fas fa-cube" style={{ marginRight: 6, opacity: 0.6 }} aria-hidden="true" />
|
||||||
|
No models loaded yet - they'll appear here when scheduled to this node.
|
||||||
|
</p>
|
||||||
|
) : (
|
||||||
|
<table className="table" style={{ margin: 0 }}>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Model</th>
|
||||||
|
<th>State</th>
|
||||||
|
<th>In-Flight</th>
|
||||||
|
<th style={{ width: 40 }}>Logs</th>
|
||||||
|
<th style={{ textAlign: 'right' }}>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{(() => {
|
||||||
|
// Pre-compute per-model replica counts so the disambiguation
|
||||||
|
// pill only renders when this node actually hosts >1 replica
|
||||||
|
// of the same model. Single-replica deployments stay clean.
|
||||||
|
const replicaCounts = {}
|
||||||
|
models.forEach(m => { replicaCounts[m.model_name] = (replicaCounts[m.model_name] || 0) + 1 })
|
||||||
|
return models.map(m => {
|
||||||
|
const stCfg = modelStateConfig[m.state] || modelStateConfig.idle
|
||||||
|
const showReplica = (replicaCounts[m.model_name] || 0) > 1
|
||||||
|
// Per-replica process key - what the worker stores logs under and what the
|
||||||
|
// store's GetLines/Subscribe match on for replica-scoped filtering.
|
||||||
|
const processKey = `${m.model_name}#${m.replica_index ?? 0}`
|
||||||
|
return (
|
||||||
|
<tr key={m.id || `${m.model_name}#${m.replica_index ?? 0}`}>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)', fontSize: '0.8125rem' }}>
|
||||||
|
{m.model_name}
|
||||||
|
{showReplica && (
|
||||||
|
<span
|
||||||
|
className="cell-mono"
|
||||||
|
aria-label={`replica ${m.replica_index ?? 0}`}
|
||||||
|
title={`Replica ${m.replica_index ?? 0} on this node`}
|
||||||
|
style={{
|
||||||
|
marginLeft: 8, padding: '1px 6px', borderRadius: 'var(--radius-sm)',
|
||||||
|
background: 'var(--color-bg-tertiary)',
|
||||||
|
border: '1px solid var(--color-border-subtle)',
|
||||||
|
fontSize: '0.6875rem', fontWeight: 500,
|
||||||
|
color: 'var(--color-text-secondary)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
rep {m.replica_index ?? 0}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span style={{
|
||||||
|
display: 'inline-block', padding: '2px 8px', borderRadius: 'var(--radius-sm)',
|
||||||
|
fontSize: '0.75rem', fontWeight: 500,
|
||||||
|
background: stCfg.bg, color: stCfg.color, border: `1px solid ${stCfg.border}`,
|
||||||
|
}}>
|
||||||
|
{m.state}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)', fontSize: '0.8125rem' }}>
|
||||||
|
{m.in_flight ?? 0}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a
|
||||||
|
href="#"
|
||||||
|
onClick={(e) => {
|
||||||
|
e.preventDefault()
|
||||||
|
// Send the replica-scoped process key (modelName#replicaIndex).
|
||||||
|
navigate(`/app/node-backend-logs/${id}/${encodeURIComponent(processKey)}`)
|
||||||
|
}}
|
||||||
|
style={{ fontSize: '0.75rem', color: 'var(--color-primary)' }}
|
||||||
|
title={showReplica ? `View backend logs for replica ${m.replica_index ?? 0}` : 'View backend logs'}
|
||||||
|
>
|
||||||
|
<i className="fas fa-terminal" />
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td style={{ textAlign: 'right' }}>
|
||||||
|
<button
|
||||||
|
className="btn btn-danger btn-sm"
|
||||||
|
title={m.in_flight > 0 ? 'Unload model (has in-flight requests)' : 'Unload model'}
|
||||||
|
onClick={() => setConfirmUnload({ modelName: m.model_name, inFlight: m.in_flight ?? 0 })}
|
||||||
|
>
|
||||||
|
<i className="fas fa-stop" />
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})()}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Installed backends */}
|
||||||
|
<div style={{ marginTop: 'var(--spacing-lg)' }}>
|
||||||
|
<div style={{
|
||||||
|
display: 'flex', alignItems: 'center', justifyContent: 'space-between',
|
||||||
|
marginBottom: 'var(--spacing-sm)',
|
||||||
|
}}>
|
||||||
|
<div className="drawer-eyebrow" style={{ margin: 0 }}>Installed backends</div>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-secondary btn-sm"
|
||||||
|
onClick={() => navigate(`/app/backends?target=${encodeURIComponent(id)}`)}
|
||||||
|
title={`Install a backend on ${node.name}`}
|
||||||
|
>
|
||||||
|
<i className="fas fa-plus" /> Add backend
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{backends.length === 0 ? (
|
||||||
|
<p style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)', margin: 0 }}>
|
||||||
|
None installed. <a href="#" style={{ color: 'var(--color-primary)' }} onClick={(e) => { e.preventDefault(); navigate(`/app/backends?target=${encodeURIComponent(id)}`) }}>Install one from the gallery</a> to schedule models here.
|
||||||
|
</p>
|
||||||
|
) : (
|
||||||
|
<table className="table" style={{ margin: 0 }}>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Installed At</th>
|
||||||
|
<th style={{ textAlign: 'right' }}>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{backends.map(b => (
|
||||||
|
<tr key={b.name}>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)', fontSize: '0.8125rem' }}>
|
||||||
|
{b.name}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span style={{
|
||||||
|
display: 'inline-block', padding: '2px 8px', borderRadius: 'var(--radius-sm)',
|
||||||
|
fontSize: '0.75rem', fontWeight: 500,
|
||||||
|
background: b.is_system ? 'var(--color-bg-tertiary)' : 'var(--color-primary-light)',
|
||||||
|
color: b.is_system ? 'var(--color-text-muted)' : 'var(--color-primary)',
|
||||||
|
border: `1px solid ${b.is_system ? 'var(--color-border-subtle)' : 'var(--color-primary-border)'}`,
|
||||||
|
}}>
|
||||||
|
{b.is_system ? 'system' : 'gallery'}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)' }}>
|
||||||
|
{b.installed_at ? timeAgo(b.installed_at) : '-'}
|
||||||
|
</td>
|
||||||
|
<td style={{ textAlign: 'right' }}>
|
||||||
|
{!b.is_system && (
|
||||||
|
<div style={{ display: 'inline-flex', gap: 'var(--spacing-xs)' }}>
|
||||||
|
<button
|
||||||
|
className="btn btn-secondary btn-sm"
|
||||||
|
onClick={() => upgradeBackend(b.name)}
|
||||||
|
title="Upgrade backend on this node"
|
||||||
|
>
|
||||||
|
<i className="fas fa-arrow-up" />
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-danger-ghost btn-sm"
|
||||||
|
onClick={() => setConfirmDeleteBackend({ backend: b.name })}
|
||||||
|
title="Delete backend from this node"
|
||||||
|
>
|
||||||
|
<i className="fas fa-trash" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Labels - node.replica-slots is filtered out so the Capacity editor
|
||||||
|
stays the single source of truth for that label. */}
|
||||||
|
<div style={{ marginTop: 'var(--spacing-lg)' }}>
|
||||||
|
<div className="drawer-eyebrow">Labels</div>
|
||||||
|
<KeyValueChips
|
||||||
|
pairs={Object.fromEntries(Object.entries(node.labels || {}).filter(([k]) => k !== 'node.replica-slots'))}
|
||||||
|
onAdd={addLabel}
|
||||||
|
onRemove={delLabel}
|
||||||
|
placeholderKey="key"
|
||||||
|
placeholderValue="value"
|
||||||
|
ariaLabel="Node labels"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ConfirmDialog
|
||||||
|
open={confirmRemove}
|
||||||
|
title="Remove node"
|
||||||
|
message={`Remove "${node.name}" from the cluster? This will deregister it.`}
|
||||||
|
confirmLabel="Remove"
|
||||||
|
danger
|
||||||
|
onConfirm={() => { remove(); setConfirmRemove(false) }}
|
||||||
|
onCancel={() => setConfirmRemove(false)}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<ConfirmDialog
|
||||||
|
open={!!confirmUnload}
|
||||||
|
title="Unload Model"
|
||||||
|
message={
|
||||||
|
confirmUnload
|
||||||
|
? confirmUnload.inFlight > 0
|
||||||
|
? `"${confirmUnload.modelName}" currently has ${confirmUnload.inFlight} in-flight request(s). Unloading will interrupt them. Continue?`
|
||||||
|
: `Unload "${confirmUnload.modelName}" from ${node.name}?`
|
||||||
|
: ''
|
||||||
|
}
|
||||||
|
confirmLabel="Unload"
|
||||||
|
danger={confirmUnload?.inFlight > 0}
|
||||||
|
onConfirm={() => { if (confirmUnload) unload(confirmUnload.modelName); setConfirmUnload(null) }}
|
||||||
|
onCancel={() => setConfirmUnload(null)}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<ConfirmDialog
|
||||||
|
open={!!confirmDeleteBackend}
|
||||||
|
title="Delete Backend"
|
||||||
|
message={confirmDeleteBackend ? `Delete "${confirmDeleteBackend.backend}" from ${node.name}? This removes the backend files from this node only.` : ''}
|
||||||
|
confirmLabel="Delete"
|
||||||
|
danger
|
||||||
|
onConfirm={() => { if (confirmDeleteBackend) deleteBackend(confirmDeleteBackend.backend); setConfirmDeleteBackend(null) }}
|
||||||
|
onCancel={() => setConfirmDeleteBackend(null)}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<ConfirmDialog
|
||||||
|
open={!!confirmShrinkState}
|
||||||
|
title="Reduce replica capacity"
|
||||||
|
message={
|
||||||
|
confirmShrinkState
|
||||||
|
? `${node.name} currently has ${confirmShrinkState.currentLoaded} replica(s) of at least one model loaded. Reducing the cap to ${confirmShrinkState.newValue} won't evict anything immediately - running replicas keep going, but the reconciler will trim down on the next idle window. Continue?`
|
||||||
|
: ''
|
||||||
|
}
|
||||||
|
confirmLabel="Reduce"
|
||||||
|
onConfirm={() => { confirmShrinkState?.resolve(true); setConfirmShrinkState(null) }}
|
||||||
|
onCancel={() => { confirmShrinkState?.resolve(false); setConfirmShrinkState(null) }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
438
core/http/react-ui/src/pages/Scheduling.jsx
Normal file
438
core/http/react-ui/src/pages/Scheduling.jsx
Normal file
@@ -0,0 +1,438 @@
|
|||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { useOutletContext } from 'react-router-dom'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { nodesApi } from '../utils/api'
|
||||||
|
import PageHeader from '../components/PageHeader'
|
||||||
|
import ConfirmDialog from '../components/ConfirmDialog'
|
||||||
|
import ResponsiveTable from '../components/ResponsiveTable'
|
||||||
|
import SearchableModelSelect from '../components/SearchableModelSelect'
|
||||||
|
import KeyValueChips from '../components/nodes/KeyValueChips'
|
||||||
|
|
||||||
|
// Numeric input with quick-pick preset chips. Picked over a slider because
|
||||||
|
// replica counts are exact specs (operator math), not fuzzy estimates. The
|
||||||
|
// chips give one-click access to common values without the slider's
|
||||||
|
// precision/special-value problems (e.g. MaxReplicas=0 = "no limit").
|
||||||
|
function ReplicaInput({ id, label, value, onChange, presets }) {
|
||||||
|
return (
|
||||||
|
<div style={{ flex: 1 }}>
|
||||||
|
<label className="form-label" htmlFor={id}>{label}</label>
|
||||||
|
<input
|
||||||
|
id={id}
|
||||||
|
className="input"
|
||||||
|
type="number"
|
||||||
|
min={0}
|
||||||
|
value={value}
|
||||||
|
onChange={e => onChange(parseInt(e.target.value) || 0)}
|
||||||
|
/>
|
||||||
|
<div style={{ display: 'flex', gap: 4, flexWrap: 'wrap', marginTop: 6 }}>
|
||||||
|
{presets.map(({ v, l }) => {
|
||||||
|
const active = value === v
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
key={v}
|
||||||
|
type="button"
|
||||||
|
onClick={() => onChange(v)}
|
||||||
|
aria-pressed={active}
|
||||||
|
className="cell-mono"
|
||||||
|
style={{
|
||||||
|
padding: '2px 8px',
|
||||||
|
borderRadius: 'var(--radius-sm)',
|
||||||
|
fontSize: '0.6875rem',
|
||||||
|
fontWeight: 500,
|
||||||
|
cursor: 'pointer',
|
||||||
|
background: active ? 'var(--color-primary-light)' : 'transparent',
|
||||||
|
border: `1px solid ${active ? 'var(--color-primary-border)' : 'var(--color-border-subtle)'}`,
|
||||||
|
color: active ? 'var(--color-primary)' : 'var(--color-text-muted)',
|
||||||
|
}}
|
||||||
|
>{l || v}</button>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function SchedulingForm({ onSave, onCancel }) {
|
||||||
|
const [mode, setMode] = useState('placement')
|
||||||
|
const [modelName, setModelName] = useState('')
|
||||||
|
// Selector is now a chip-builder map instead of a comma-separated string.
|
||||||
|
// Operators were copying syntax from docs and missing commas; the chip UI
|
||||||
|
// makes the key=value structure self-documenting.
|
||||||
|
const [selector, setSelector] = useState({})
|
||||||
|
const [minReplicas, setMinReplicas] = useState(1)
|
||||||
|
const [maxReplicas, setMaxReplicas] = useState(0)
|
||||||
|
// Prefix-cache routing controls. Empty routePolicy means "inherit the
|
||||||
|
// cluster default"; the three thresholds at 0 likewise inherit, so they
|
||||||
|
// stay out of the POST body's effective override only when explicitly set.
|
||||||
|
const [routePolicy, setRoutePolicy] = useState('')
|
||||||
|
const [balanceAbsThreshold, setBalanceAbsThreshold] = useState(0)
|
||||||
|
const [balanceRelThreshold, setBalanceRelThreshold] = useState(0)
|
||||||
|
const [minPrefixMatch, setMinPrefixMatch] = useState(0)
|
||||||
|
|
||||||
|
const hasSelector = Object.keys(selector).length > 0
|
||||||
|
|
||||||
|
const isValid = () => {
|
||||||
|
if (!modelName) return false
|
||||||
|
if (mode === 'placement') return hasSelector
|
||||||
|
if (mode === 'spread') return true
|
||||||
|
return minReplicas > 0 || maxReplicas > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSubmit = () => {
|
||||||
|
onSave({
|
||||||
|
model_name: modelName,
|
||||||
|
node_selector: hasSelector ? selector : undefined,
|
||||||
|
min_replicas: mode === 'autoscaling' ? minReplicas : 0,
|
||||||
|
max_replicas: mode === 'autoscaling' ? maxReplicas : 0,
|
||||||
|
spread_all: mode === 'spread',
|
||||||
|
route_policy: routePolicy,
|
||||||
|
balance_abs_threshold: balanceAbsThreshold,
|
||||||
|
balance_rel_threshold: balanceRelThreshold,
|
||||||
|
min_prefix_match: minPrefixMatch,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="card" style={{ padding: 'var(--spacing-lg)', marginBottom: 'var(--spacing-md)' }}>
|
||||||
|
{/* Mode selector — uses the project's segmented control instead of two
|
||||||
|
50%-width filled buttons that competed visually with the actual
|
||||||
|
primary action (Save). */}
|
||||||
|
<div role="radiogroup" aria-label="Scheduling mode" className="segmented" style={{ marginBottom: 'var(--spacing-xs)' }}>
|
||||||
|
<button
|
||||||
|
type="button" role="radio" aria-checked={mode === 'placement'}
|
||||||
|
className={`segmented__item${mode === 'placement' ? ' is-active' : ''}`}
|
||||||
|
onClick={() => setMode('placement')}
|
||||||
|
>
|
||||||
|
<i className="fas fa-thumbtack" aria-hidden="true" /> Pin to nodes
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button" role="radio" aria-checked={mode === 'autoscaling'}
|
||||||
|
className={`segmented__item${mode === 'autoscaling' ? ' is-active' : ''}`}
|
||||||
|
onClick={() => setMode('autoscaling')}
|
||||||
|
>
|
||||||
|
<i className="fas fa-arrows-up-down" aria-hidden="true" /> Auto-scale
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button" role="radio" aria-checked={mode === 'spread'}
|
||||||
|
className={`segmented__item${mode === 'spread' ? ' is-active' : ''}`}
|
||||||
|
onClick={() => setMode('spread')}
|
||||||
|
>
|
||||||
|
<i className="fas fa-network-wired" aria-hidden="true" /> Spread to all
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)', margin: '0 0 var(--spacing-lg) 0' }}>
|
||||||
|
{mode === 'placement'
|
||||||
|
? 'Restrict this model to specific nodes. Loaded on demand, evictable when idle.'
|
||||||
|
: mode === 'spread'
|
||||||
|
? 'Run one replica on every node matching the selector (all healthy nodes when empty). Tracks nodes joining and leaving.'
|
||||||
|
: 'Maintain a target replica count across the cluster. Min ≥ 1 protects from eviction.'}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{/* Linear vertical flow — model picker is the visual focus, then the
|
||||||
|
mode-specific fields below. No 2-column grid (the mismatched widths
|
||||||
|
made the form look raw). */}
|
||||||
|
<div style={{ display: 'flex', flexDirection: 'column', gap: 'var(--spacing-md)' }}>
|
||||||
|
<div>
|
||||||
|
<label className="form-label" htmlFor="sched-model">Model</label>
|
||||||
|
{/* Searchable combobox so a long gallery doesn't force the operator
|
||||||
|
to scroll through hundreds of entries. Free-text is allowed —
|
||||||
|
you can pre-create a rule for a model that hasn't been
|
||||||
|
installed yet, which is a real workflow when standing up a new
|
||||||
|
node and pre-staging its scheduling policy. */}
|
||||||
|
<SearchableModelSelect
|
||||||
|
value={modelName}
|
||||||
|
onChange={setModelName}
|
||||||
|
placeholder="Type to search models, or paste a name..."
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label className="form-label">
|
||||||
|
Node selector{mode === 'placement' ? '' : ' (optional)'}
|
||||||
|
</label>
|
||||||
|
<KeyValueChips
|
||||||
|
pairs={selector}
|
||||||
|
onAdd={(k, v) => setSelector(prev => ({ ...prev, [k]: v }))}
|
||||||
|
onRemove={(k) => setSelector(prev => { const n = { ...prev }; delete n[k]; return n })}
|
||||||
|
placeholderKey="key (e.g. gpu.vendor)"
|
||||||
|
placeholderValue="value (e.g. nvidia)"
|
||||||
|
ariaLabel="Node selector"
|
||||||
|
/>
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', display: 'block', marginTop: 6 }}>
|
||||||
|
{mode === 'placement'
|
||||||
|
? 'Models will load only on nodes that match all listed labels.'
|
||||||
|
: (hasSelector ? 'Replicas land only on matching nodes.' : 'Empty = any healthy node.')}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{mode === 'autoscaling' && (
|
||||||
|
<div style={{ display: 'flex', gap: 'var(--spacing-md)' }}>
|
||||||
|
<ReplicaInput
|
||||||
|
id="sched-min"
|
||||||
|
label="Min replicas"
|
||||||
|
value={minReplicas}
|
||||||
|
onChange={setMinReplicas}
|
||||||
|
presets={[{ v: 1 }, { v: 2 }, { v: 3 }, { v: 4 }]}
|
||||||
|
/>
|
||||||
|
<ReplicaInput
|
||||||
|
id="sched-max"
|
||||||
|
label="Max replicas"
|
||||||
|
value={maxReplicas}
|
||||||
|
onChange={setMaxReplicas}
|
||||||
|
presets={[{ v: 0, l: 'no limit' }, { v: 2 }, { v: 4 }, { v: 8 }]}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Per-model routing policy. Left empty/zero these inherit the
|
||||||
|
cluster-wide defaults; set them to override how requests for this
|
||||||
|
model are spread across replicas. */}
|
||||||
|
<div>
|
||||||
|
<label className="form-label" htmlFor="sched-route-policy">Routing policy</label>
|
||||||
|
<select
|
||||||
|
id="sched-route-policy"
|
||||||
|
className="input"
|
||||||
|
value={routePolicy}
|
||||||
|
onChange={e => setRoutePolicy(e.target.value)}
|
||||||
|
>
|
||||||
|
<option value="">Default (cluster setting)</option>
|
||||||
|
<option value="round_robin">Round Robin</option>
|
||||||
|
<option value="prefix_cache">Prefix Cache</option>
|
||||||
|
</select>
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', display: 'block', marginTop: 6 }}>
|
||||||
|
Prefix Cache routes shared-prefix requests to the same replica to reuse its KV cache, falling back to round-robin when replicas are imbalanced.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{routePolicy === 'prefix_cache' && (
|
||||||
|
<div style={{ display: 'flex', gap: 'var(--spacing-md)' }}>
|
||||||
|
<div style={{ flex: 1 }}>
|
||||||
|
<label className="form-label" htmlFor="sched-min-prefix-match">Min prefix match</label>
|
||||||
|
<input
|
||||||
|
id="sched-min-prefix-match"
|
||||||
|
className="input"
|
||||||
|
type="number"
|
||||||
|
step="0.05"
|
||||||
|
min="0"
|
||||||
|
max="1"
|
||||||
|
value={minPrefixMatch}
|
||||||
|
onChange={e => setMinPrefixMatch(parseFloat(e.target.value) || 0)}
|
||||||
|
/>
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', display: 'block', marginTop: 6 }}>
|
||||||
|
Fraction of the prompt (0..1) that must match a cached prefix before affinity kicks in. 0 inherits the default.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div style={{ flex: 1 }}>
|
||||||
|
<label className="form-label" htmlFor="sched-balance-abs">Balance abs threshold</label>
|
||||||
|
<input
|
||||||
|
id="sched-balance-abs"
|
||||||
|
className="input"
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
value={balanceAbsThreshold}
|
||||||
|
onChange={e => setBalanceAbsThreshold(parseInt(e.target.value) || 0)}
|
||||||
|
/>
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', display: 'block', marginTop: 6 }}>
|
||||||
|
Max absolute in-flight gap allowed before falling back to round-robin. 0 inherits the default.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div style={{ flex: 1 }}>
|
||||||
|
<label className="form-label" htmlFor="sched-balance-rel">Balance rel threshold</label>
|
||||||
|
<input
|
||||||
|
id="sched-balance-rel"
|
||||||
|
className="input"
|
||||||
|
type="number"
|
||||||
|
step="0.1"
|
||||||
|
min="0"
|
||||||
|
value={balanceRelThreshold}
|
||||||
|
onChange={e => setBalanceRelThreshold(parseFloat(e.target.value) || 0)}
|
||||||
|
/>
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', display: 'block', marginTop: 6 }}>
|
||||||
|
Max relative in-flight ratio (>= 1) allowed before falling back to round-robin. 0 inherits the default.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Hairline divider above the actions, matching the project's form pattern. */}
|
||||||
|
<div style={{
|
||||||
|
display: 'flex', gap: 'var(--spacing-sm)', justifyContent: 'flex-end',
|
||||||
|
marginTop: 'var(--spacing-lg)', paddingTop: 'var(--spacing-md)',
|
||||||
|
borderTop: '1px solid var(--color-border-subtle)',
|
||||||
|
}}>
|
||||||
|
<button className="btn btn-secondary btn-sm" onClick={onCancel}>Cancel</button>
|
||||||
|
<button className="btn btn-primary btn-sm" onClick={handleSubmit} disabled={!isValid()}>Save rule</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function Scheduling() {
|
||||||
|
const { addToast } = useOutletContext()
|
||||||
|
const { t } = useTranslation('admin')
|
||||||
|
const [schedulingConfigs, setSchedulingConfigs] = useState([])
|
||||||
|
const [showForm, setShowForm] = useState(false)
|
||||||
|
const [confirmDelete, setConfirmDelete] = useState(null)
|
||||||
|
|
||||||
|
const fetchScheduling = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
const data = await nodesApi.listScheduling()
|
||||||
|
setSchedulingConfigs(Array.isArray(data) ? data : [])
|
||||||
|
} catch { setSchedulingConfigs([]) }
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => { fetchScheduling() }, [fetchScheduling])
|
||||||
|
|
||||||
|
const handleSave = async (config) => {
|
||||||
|
try {
|
||||||
|
await nodesApi.setScheduling(config)
|
||||||
|
addToast('Scheduling rule saved', 'success')
|
||||||
|
setShowForm(false)
|
||||||
|
fetchScheduling()
|
||||||
|
} catch (err) { addToast(`Failed to save rule: ${err.message}`, 'error') }
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDelete = async (model) => {
|
||||||
|
try {
|
||||||
|
await nodesApi.deleteScheduling(model)
|
||||||
|
addToast('Scheduling rule removed', 'success')
|
||||||
|
setConfirmDelete(null)
|
||||||
|
fetchScheduling()
|
||||||
|
} catch (err) { addToast(`Failed to remove rule: ${err.message}`, 'error') }
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="page page--wide">
|
||||||
|
<PageHeader
|
||||||
|
title={<><i className="fas fa-calendar-alt" style={{ marginRight: 'var(--spacing-sm)' }} />{t('scheduling.title')}</>}
|
||||||
|
supporting={t('scheduling.subtitle')}
|
||||||
|
/>
|
||||||
|
<div>
|
||||||
|
<button className="btn btn-primary btn-sm" style={{ marginBottom: 'var(--spacing-md)' }}
|
||||||
|
onClick={() => setShowForm(f => !f)}>
|
||||||
|
<i className="fas fa-plus" style={{ marginRight: 6 }} />
|
||||||
|
Add Scheduling Rule
|
||||||
|
</button>
|
||||||
|
{showForm && <SchedulingForm onSave={handleSave} onCancel={() => setShowForm(false)} />}
|
||||||
|
{schedulingConfigs.length === 0 && !showForm ? (
|
||||||
|
<p style={{ fontSize: '0.875rem', color: 'var(--color-text-muted)', textAlign: 'center', padding: 'var(--spacing-xl) 0' }}>
|
||||||
|
No scheduling rules configured. Add a rule to control how models are placed on nodes.
|
||||||
|
</p>
|
||||||
|
) : schedulingConfigs.length > 0 && (
|
||||||
|
<ResponsiveTable>
|
||||||
|
<thead><tr>
|
||||||
|
<th>Model</th>
|
||||||
|
<th>Mode</th>
|
||||||
|
<th>Node Selector</th>
|
||||||
|
<th>Min Replicas</th>
|
||||||
|
<th>Max Replicas</th>
|
||||||
|
<th>Routing</th>
|
||||||
|
<th>Thresholds</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th style={{ textAlign: 'right' }}>Actions</th>
|
||||||
|
</tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{schedulingConfigs.map(cfg => {
|
||||||
|
const isSpread = !!cfg.spread_all
|
||||||
|
const isAutoScaling = !isSpread && (cfg.min_replicas > 0 || cfg.max_replicas > 0)
|
||||||
|
const hasSelector = !!cfg.node_selector
|
||||||
|
const modeLabel = isSpread ? 'Spread' : isAutoScaling ? 'Auto-scaling' : hasSelector ? 'Placement' : 'Inactive'
|
||||||
|
const modeColor = isSpread ? 'var(--color-warning)' : isAutoScaling ? 'var(--color-success)' : hasSelector ? 'var(--color-primary)' : 'var(--color-text-muted)'
|
||||||
|
// Cooldown: reconciler tripped the circuit breaker because cluster
|
||||||
|
// capacity is exhausted. Surface so the operator sees it instead
|
||||||
|
// of the model silently failing to scale.
|
||||||
|
const unsatisfiableUntil = cfg.unsatisfiable_until ? new Date(cfg.unsatisfiable_until) : null
|
||||||
|
const isUnsatisfiable = unsatisfiableUntil && unsatisfiableUntil.getTime() > Date.now()
|
||||||
|
return (
|
||||||
|
<tr key={cfg.id || cfg.model_name}>
|
||||||
|
<td style={{ fontWeight: 600, fontSize: '0.875rem' }}>{cfg.model_name}</td>
|
||||||
|
<td>
|
||||||
|
<span style={{
|
||||||
|
display: 'inline-block', fontSize: '0.75rem', padding: '2px 8px', borderRadius: "var(--radius-sm)",
|
||||||
|
background: 'var(--color-bg-tertiary)', border: `1px solid ${modeColor}`,
|
||||||
|
color: modeColor, fontWeight: 600,
|
||||||
|
}}>{modeLabel}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{cfg.node_selector ? (() => {
|
||||||
|
try {
|
||||||
|
const sel = typeof cfg.node_selector === 'string' ? JSON.parse(cfg.node_selector) : cfg.node_selector
|
||||||
|
return Object.entries(sel).map(([k,v]) => (
|
||||||
|
<span key={k} style={{
|
||||||
|
display: 'inline-block', fontSize: '0.75rem', padding: '2px 6px', borderRadius: "var(--radius-sm)",
|
||||||
|
background: 'var(--color-bg-tertiary)', border: '1px solid var(--color-border-subtle)',
|
||||||
|
fontFamily: 'var(--font-mono)', marginRight: 4,
|
||||||
|
}}>{k}={v}</span>
|
||||||
|
))
|
||||||
|
} catch { return <span style={{ color: 'var(--color-text-muted)', fontSize: '0.8125rem' }}>{cfg.node_selector}</span> }
|
||||||
|
})() : <span style={{ color: 'var(--color-text-muted)', fontSize: '0.8125rem' }}>Any node</span>}
|
||||||
|
</td>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)' }}>
|
||||||
|
{isSpread
|
||||||
|
? <span style={{
|
||||||
|
display: 'inline-block', fontSize: '0.75rem', padding: '2px 8px', borderRadius: "var(--radius-sm)",
|
||||||
|
background: 'var(--color-bg-tertiary)', border: '1px solid var(--color-warning)',
|
||||||
|
color: 'var(--color-warning)', fontWeight: 600, fontFamily: 'var(--font-sans)',
|
||||||
|
}}>Spread: all matching nodes</span>
|
||||||
|
: isAutoScaling ? cfg.min_replicas : '-'}
|
||||||
|
</td>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)' }}>
|
||||||
|
{isSpread ? '-' : isAutoScaling ? (cfg.max_replicas || 'no limit') : '-'}
|
||||||
|
</td>
|
||||||
|
<td style={{ fontSize: '0.8125rem' }}>
|
||||||
|
{cfg.route_policy || 'default'}
|
||||||
|
</td>
|
||||||
|
<td style={{ fontFamily: 'var(--font-mono)', fontSize: '0.75rem', color: 'var(--color-text-muted)' }}>
|
||||||
|
{cfg.route_policy === 'prefix_cache' ? (
|
||||||
|
<>
|
||||||
|
<div>match: {cfg.min_prefix_match ? cfg.min_prefix_match : 'inherit'}</div>
|
||||||
|
<div>abs: {cfg.balance_abs_threshold ? cfg.balance_abs_threshold : 'inherit'}</div>
|
||||||
|
<div>rel: {cfg.balance_rel_threshold ? cfg.balance_rel_threshold : 'inherit'}</div>
|
||||||
|
</>
|
||||||
|
) : '-'}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{isUnsatisfiable ? (
|
||||||
|
<span
|
||||||
|
title={`Reconciler couldn't satisfy this rule (capacity exhausted). Will retry by ${unsatisfiableUntil.toLocaleString()}, or sooner on a node lifecycle change.`}
|
||||||
|
style={{
|
||||||
|
display: 'inline-block', fontSize: '0.75rem', padding: '2px 8px',
|
||||||
|
borderRadius: 'var(--radius-sm)', fontWeight: 600,
|
||||||
|
background: 'var(--color-bg-tertiary)',
|
||||||
|
border: '1px solid var(--color-warning, #d97706)',
|
||||||
|
color: 'var(--color-warning, #d97706)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<i className="fas fa-exclamation-triangle" style={{ marginRight: 4 }} />
|
||||||
|
Unsatisfiable until {unsatisfiableUntil.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)' }}>OK</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td style={{ textAlign: 'right' }}>
|
||||||
|
<button className="btn btn-danger btn-sm" onClick={() => setConfirmDelete(cfg.model_name)}>
|
||||||
|
<i className="fas fa-trash" />
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</tbody>
|
||||||
|
</ResponsiveTable>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ConfirmDialog
|
||||||
|
open={!!confirmDelete}
|
||||||
|
title="Remove scheduling rule"
|
||||||
|
message={confirmDelete ? `Remove the scheduling rule for "${confirmDelete}"?` : ''}
|
||||||
|
confirmLabel="Remove"
|
||||||
|
danger
|
||||||
|
onConfirm={() => confirmDelete && handleDelete(confirmDelete)}
|
||||||
|
onCancel={() => setConfirmDelete(null)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -69,7 +69,9 @@ const Studio = page('studio', () => import('./pages/Studio'))
|
|||||||
const FaceRecognition = page('face', () => import('./pages/FaceRecognition'))
|
const FaceRecognition = page('face', () => import('./pages/FaceRecognition'))
|
||||||
const VoiceRecognition = page('voice', () => import('./pages/VoiceRecognition'))
|
const VoiceRecognition = page('voice', () => import('./pages/VoiceRecognition'))
|
||||||
const Nodes = page('nodes', () => import('./pages/Nodes'))
|
const Nodes = page('nodes', () => import('./pages/Nodes'))
|
||||||
|
const Scheduling = page('scheduling', () => import('./pages/Scheduling'))
|
||||||
const NodeBackendLogs = page(null, () => import('./pages/NodeBackendLogs'))
|
const NodeBackendLogs = page(null, () => import('./pages/NodeBackendLogs'))
|
||||||
|
const NodeDetail = page(null, () => import('./pages/NodeDetail'))
|
||||||
const NotFound = page(null, () => import('./pages/NotFound'))
|
const NotFound = page(null, () => import('./pages/NotFound'))
|
||||||
const Usage = page('usage', () => import('./pages/Usage'))
|
const Usage = page('usage', () => import('./pages/Usage'))
|
||||||
const Users = page('users', () => import('./pages/Users'))
|
const Users = page('users', () => import('./pages/Users'))
|
||||||
@@ -152,6 +154,8 @@ const appChildren = [
|
|||||||
{ path: 'backend-logs/:modelId', element: <Admin><BackendLogs /></Admin> },
|
{ path: 'backend-logs/:modelId', element: <Admin><BackendLogs /></Admin> },
|
||||||
{ path: 'p2p', element: <Admin><P2P /></Admin> },
|
{ path: 'p2p', element: <Admin><P2P /></Admin> },
|
||||||
{ path: 'nodes', element: <Admin><Nodes /></Admin> },
|
{ path: 'nodes', element: <Admin><Nodes /></Admin> },
|
||||||
|
{ path: 'nodes/:id', element: <Admin><NodeDetail /></Admin> },
|
||||||
|
{ path: 'scheduling', element: <Admin><Scheduling /></Admin> },
|
||||||
{ path: 'node-backend-logs/:nodeId/:modelId', element: <Admin><NodeBackendLogs /></Admin> },
|
{ path: 'node-backend-logs/:nodeId/:modelId', element: <Admin><NodeBackendLogs /></Admin> },
|
||||||
{ path: 'usage', element: <Usage /> },
|
{ path: 'usage', element: <Usage /> },
|
||||||
{ path: 'users', element: <RequireAuthEnabled><Admin><Users /></Admin></RequireAuthEnabled> },
|
{ path: 'users', element: <RequireAuthEnabled><Admin><Users /></Admin></RequireAuthEnabled> },
|
||||||
|
|||||||
1
core/http/react-ui/src/utils/api.js
vendored
1
core/http/react-ui/src/utils/api.js
vendored
@@ -568,6 +568,7 @@ export const nodesApi = {
|
|||||||
method: 'DELETE',
|
method: 'DELETE',
|
||||||
}),
|
}),
|
||||||
listScheduling: () => fetchJSON(API_CONFIG.endpoints.nodesScheduling),
|
listScheduling: () => fetchJSON(API_CONFIG.endpoints.nodesScheduling),
|
||||||
|
allModels: () => fetchJSON(API_CONFIG.endpoints.nodesModels),
|
||||||
setScheduling: (config) => postJSON(API_CONFIG.endpoints.nodesScheduling, config),
|
setScheduling: (config) => postJSON(API_CONFIG.endpoints.nodesScheduling, config),
|
||||||
deleteScheduling: (model) => fetchJSON(API_CONFIG.endpoints.nodesSchedulingModel(model), { method: 'DELETE' }),
|
deleteScheduling: (model) => fetchJSON(API_CONFIG.endpoints.nodesSchedulingModel(model), { method: 'DELETE' }),
|
||||||
}
|
}
|
||||||
|
|||||||
1
core/http/react-ui/src/utils/config.js
vendored
1
core/http/react-ui/src/utils/config.js
vendored
@@ -144,6 +144,7 @@ export const API_CONFIG = {
|
|||||||
nodeLabelKey: (id, key) => `/api/nodes/${id}/labels/${key}`,
|
nodeLabelKey: (id, key) => `/api/nodes/${id}/labels/${key}`,
|
||||||
nodeMaxReplicasPerModel: (id) => `/api/nodes/${id}/max-replicas-per-model`,
|
nodeMaxReplicasPerModel: (id) => `/api/nodes/${id}/max-replicas-per-model`,
|
||||||
nodesScheduling: '/api/nodes/scheduling',
|
nodesScheduling: '/api/nodes/scheduling',
|
||||||
|
nodesModels: '/api/nodes/models',
|
||||||
nodesSchedulingModel: (model) => `/api/nodes/scheduling/${encodeURIComponent(model)}`,
|
nodesSchedulingModel: (model) => `/api/nodes/scheduling/${encodeURIComponent(model)}`,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,6 +71,9 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade
|
|||||||
admin := e.Group("/api/nodes", readyMw, adminMw)
|
admin := e.Group("/api/nodes", readyMw, adminMw)
|
||||||
admin.GET("", localai.ListNodesEndpoint(registry))
|
admin.GET("", localai.ListNodesEndpoint(registry))
|
||||||
|
|
||||||
|
// Cluster-wide loaded models (registered before /:id to avoid route conflicts)
|
||||||
|
admin.GET("/models", localai.ListAllNodeModelsEndpoint(registry))
|
||||||
|
|
||||||
// Model scheduling (registered before /:id to avoid route conflicts)
|
// Model scheduling (registered before /:id to avoid route conflicts)
|
||||||
admin.GET("/scheduling", localai.ListSchedulingEndpoint(registry))
|
admin.GET("/scheduling", localai.ListSchedulingEndpoint(registry))
|
||||||
admin.GET("/scheduling/:model", localai.GetSchedulingEndpoint(registry))
|
admin.GET("/scheduling/:model", localai.GetSchedulingEndpoint(registry))
|
||||||
|
|||||||
@@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
|
|||||||
}).Create(op).Error
|
}).Create(op).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateProgress updates progress for an operation.
|
// UpdateProgress updates progress for an operation. The cancellable flag is
|
||||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
|
// persisted on every tick so a replica that restarts mid-install rehydrates the
|
||||||
|
// op as still cancellable — otherwise the column keeps its Create-time zero
|
||||||
|
// value (false), the UI hides the cancel button, and the orphaned op can only
|
||||||
|
// be dismissed by waiting for the 30-minute stale reaper.
|
||||||
|
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
|
||||||
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
|
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||||
"progress": progress,
|
"progress": progress,
|
||||||
"message": message,
|
"message": message,
|
||||||
"downloaded_file_size": downloadedSize,
|
"downloaded_file_size": downloadedSize,
|
||||||
|
"cancellable": cancellable,
|
||||||
"updated_at": time.Now(),
|
"updated_at": time.Now(),
|
||||||
}).Error
|
}).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateStatus updates the status of an operation.
|
// UpdateStatus updates the status of an operation. A terminal status is never
|
||||||
|
// cancellable, so the flag is cleared here to keep the persisted row consistent
|
||||||
|
// with what the UI should offer.
|
||||||
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
|
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
|
||||||
updates := map[string]any{
|
updates := map[string]any{
|
||||||
"status": status,
|
"status": status,
|
||||||
"updated_at": time.Now(),
|
"cancellable": false,
|
||||||
|
"updated_at": time.Now(),
|
||||||
}
|
}
|
||||||
if errMsg != "" {
|
if errMsg != "" {
|
||||||
updates["error"] = errMsg
|
updates["error"] = errMsg
|
||||||
|
|||||||
56
core/services/galleryop/cancellable_persist_test.go
Normal file
56
core/services/galleryop/cancellable_persist_test.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
package galleryop_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Reproduces "an in-flight install can't be cancelled after a restart". The
|
||||||
|
// live install path marks OpStatus.Cancellable=true on every progress tick, but
|
||||||
|
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
|
||||||
|
// cancellable flag, and Create defaulted it to false. So after a replica
|
||||||
|
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
|
||||||
|
// reported cancellable:false, and the UI hid the cancel button — the orphaned
|
||||||
|
// op lingered until the 30-minute stale reaper expired it. The cancellable
|
||||||
|
// state must be persisted so a rehydrated in-flight op stays cancellable.
|
||||||
|
var _ = Describe("GalleryService cancellable persistence across restart", func() {
|
||||||
|
It("rehydrates an in-flight op as still cancellable", func() {
|
||||||
|
db := testutil.SetupTestDB()
|
||||||
|
store, err := distributed.NewGalleryStore(db)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||||
|
svc.SetGalleryStore(store)
|
||||||
|
|
||||||
|
// Seed the in-flight op row as the worker goroutine does on admission.
|
||||||
|
Expect(store.Create(&distributed.GalleryOperationRecord{
|
||||||
|
ID: "op-inflight",
|
||||||
|
GalleryElementName: "llama-cpp-development",
|
||||||
|
OpType: "backend_install",
|
||||||
|
Status: "pending",
|
||||||
|
})).To(Succeed())
|
||||||
|
|
||||||
|
// Simulate a progress tick: the live path always marks installs
|
||||||
|
// cancellable while they are downloading/processing.
|
||||||
|
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
|
||||||
|
Message: "downloading",
|
||||||
|
Progress: 25,
|
||||||
|
Cancellable: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
// A fresh replica boots and hydrates from the store.
|
||||||
|
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||||
|
fresh.SetGalleryStore(store)
|
||||||
|
Expect(fresh.Hydrate()).To(Succeed())
|
||||||
|
|
||||||
|
st := fresh.GetStatus("op-inflight")
|
||||||
|
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
|
||||||
|
Expect(st.Cancellable).To(BeTrue(),
|
||||||
|
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
|
|||||||
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
|
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
|
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
|
||||||
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
|
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
|||||||
GalleryElementName: op.GalleryElementName,
|
GalleryElementName: op.GalleryElementName,
|
||||||
OpType: "backend_install",
|
OpType: "backend_install",
|
||||||
Status: "pending",
|
Status: "pending",
|
||||||
|
Cancellable: true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
err := g.backendHandler(&op, systemState)
|
err := g.backendHandler(&op, systemState)
|
||||||
@@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
|||||||
GalleryElementName: op.GalleryElementName,
|
GalleryElementName: op.GalleryElementName,
|
||||||
OpType: opType,
|
OpType: opType,
|
||||||
Status: "pending",
|
Status: "pending",
|
||||||
|
// A delete is not cancellable; an install is.
|
||||||
|
Cancellable: !op.Delete,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
err := g.modelHandler(&op, cl, systemState)
|
err := g.modelHandler(&op, cl, systemState)
|
||||||
|
|||||||
@@ -64,6 +64,22 @@ func SubjectGalleryProgress(opID string) string {
|
|||||||
return subjectGalleryPrefix + sanitizeSubjectToken(opID) + ".progress"
|
return subjectGalleryPrefix + sanitizeSubjectToken(opID) + ".progress"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SubjectStagingProgress returns the NATS subject a frontend replica publishes
|
||||||
|
// file-staging progress on. Staging progress is otherwise per-process state
|
||||||
|
// (the SmartRouter's in-memory StagingTracker), so without this broadcast a
|
||||||
|
// /api/operations poll that round-robins onto a replica that did not originate
|
||||||
|
// the staging op sees nothing - the progress row flickers in multi-replica
|
||||||
|
// deployments. Peers subscribe to the wildcard and merge.
|
||||||
|
func SubjectStagingProgress(modelID string) string {
|
||||||
|
return subjectStagingPrefix + sanitizeSubjectToken(modelID) + ".progress"
|
||||||
|
}
|
||||||
|
|
||||||
|
const subjectStagingPrefix = "staging."
|
||||||
|
|
||||||
|
// SubjectStagingProgressWildcard matches every replica's staging-progress
|
||||||
|
// broadcasts so a peer can mirror staging ops it did not originate.
|
||||||
|
const SubjectStagingProgressWildcard = "staging.*.progress"
|
||||||
|
|
||||||
// SubjectGalleryOpStart and SubjectGalleryOpEnd are broadcast subjects for the
|
// SubjectGalleryOpStart and SubjectGalleryOpEnd are broadcast subjects for the
|
||||||
// in-memory OpCache lifecycle. Frontend replicas publish to these when an
|
// in-memory OpCache lifecycle. Frontend replicas publish to these when an
|
||||||
// admin admits a new install/delete (Start) and when an operation is
|
// admin admits a new install/delete (Start) and when an operation is
|
||||||
|
|||||||
@@ -359,8 +359,21 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2: Model not loaded — schedule loading with distributed lock to prevent duplicates
|
// Step 2: Model not loaded — schedule loading with distributed lock to prevent duplicates.
|
||||||
loadModel := func() (*RouteResult, error) {
|
//
|
||||||
|
// Detach the cold-load from the caller's context. Staging a model can
|
||||||
|
// transfer multiple GB to a worker, which takes far longer than any client
|
||||||
|
// keeps its HTTP request open — a browser refresh, an ingress/LB idle
|
||||||
|
// timeout, or a round-robined retry landing on another replica all cancel
|
||||||
|
// the request context. If staging were bound to it, the multi-GB upload
|
||||||
|
// aborts with "context canceled" mid-transfer and large models can never
|
||||||
|
// finish staging (the model-load outage). WithoutCancel keeps the request's
|
||||||
|
// values (prefix chain, etc.) but drops its cancellation/deadline. Each
|
||||||
|
// long step still has its own bound (the file stager's resume budget,
|
||||||
|
// LoadModel's 5m timeout), and the per-model advisory lock below de-dupes
|
||||||
|
// concurrent loaders across replicas.
|
||||||
|
loadCtx := context.WithoutCancel(ctx)
|
||||||
|
loadModel := func(ctx context.Context) (*RouteResult, error) {
|
||||||
// Re-check after acquiring lock — another request may have loaded it
|
// Re-check after acquiring lock — another request may have loaded it
|
||||||
node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
|
node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
|
||||||
if err == nil && node != nil {
|
if err == nil && node != nil {
|
||||||
@@ -433,9 +446,9 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
|
|||||||
if r.db != nil {
|
if r.db != nil {
|
||||||
lockKey := advisorylock.KeyFromString("model-load:" + trackingKey)
|
lockKey := advisorylock.KeyFromString("model-load:" + trackingKey)
|
||||||
var result *RouteResult
|
var result *RouteResult
|
||||||
lockErr := advisorylock.WithLockCtx(ctx, r.db, lockKey, func() error {
|
lockErr := advisorylock.WithLockCtx(loadCtx, r.db, lockKey, func() error {
|
||||||
var err error
|
var err error
|
||||||
result, err = loadModel()
|
result, err = loadModel(loadCtx)
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
if lockErr != nil {
|
if lockErr != nil {
|
||||||
@@ -444,7 +457,7 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
|
|||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
// No DB (non-distributed) — proceed without lock
|
// No DB (non-distributed) — proceed without lock
|
||||||
return loadModel()
|
return loadModel(loadCtx)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseSelectorJSON decodes a JSON node selector string into a map.
|
// parseSelectorJSON decodes a JSON node selector string into a map.
|
||||||
|
|||||||
80
core/services/nodes/router_staging_context_test.go
Normal file
80
core/services/nodes/router_staging_context_test.go
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
package nodes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// cancelOnStageStager simulates the triggering HTTP request being abandoned
|
||||||
|
// (client disconnect, ingress idle-timeout) the moment a multi-GB file starts
|
||||||
|
// staging. It cancels the request context and records whether the context the
|
||||||
|
// stager itself received was cancelled as a result.
|
||||||
|
type cancelOnStageStager struct {
|
||||||
|
fakeFileStager
|
||||||
|
cancelRequest context.CancelFunc
|
||||||
|
staged bool
|
||||||
|
ctxErrOnStage error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *cancelOnStageStager) EnsureRemote(ctx context.Context, _, _, key string) (string, error) {
|
||||||
|
s.staged = true
|
||||||
|
// Mid-transfer: the client gives up on the (minutes-long) request.
|
||||||
|
if s.cancelRequest != nil {
|
||||||
|
s.cancelRequest()
|
||||||
|
}
|
||||||
|
// A multi-GB upload must survive this. If staging were bound to the
|
||||||
|
// request context, ctx is now cancelled and the real HTTP stager would
|
||||||
|
// abort with "context canceled" — exactly the production outage.
|
||||||
|
s.ctxErrOnStage = ctx.Err()
|
||||||
|
return "/remote/" + key, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("Route cold-load staging context", func() {
|
||||||
|
It("detaches staging from the request context so a client disconnect cannot abort a multi-GB transfer", func() {
|
||||||
|
// A real model file so stageModelFiles actually calls the stager
|
||||||
|
// (non-existent paths are skipped).
|
||||||
|
tmp := GinkgoT().TempDir()
|
||||||
|
modelFile := filepath.Join(tmp, "big.gguf")
|
||||||
|
Expect(os.WriteFile(modelFile, []byte("weights"), 0o644)).To(Succeed())
|
||||||
|
|
||||||
|
reg := &fakeModelRouter{
|
||||||
|
findAndLockErr: errors.New("not loaded"),
|
||||||
|
findIdleNode: &BackendNode{ID: "n1", Name: "worker-1", Address: "10.0.0.1:50051"},
|
||||||
|
}
|
||||||
|
backend := &stubBackend{loadResult: &pb.Result{Success: true}}
|
||||||
|
factory := &stubClientFactory{client: backend}
|
||||||
|
unloader := &fakeUnloader{installReply: &messaging.BackendInstallReply{
|
||||||
|
Success: true,
|
||||||
|
Address: "10.0.0.1:9001",
|
||||||
|
}}
|
||||||
|
stager := &cancelOnStageStager{}
|
||||||
|
|
||||||
|
router := NewSmartRouter(reg, SmartRouterOptions{
|
||||||
|
Unloader: unloader,
|
||||||
|
ClientFactory: factory,
|
||||||
|
FileStager: stager,
|
||||||
|
// DB nil: no advisory lock, exercises the same detached load ctx.
|
||||||
|
})
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
stager.cancelRequest = cancel
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
result, err := router.Route(ctx, "big-model", filepath.Join("models", "big.gguf"), "llama-cpp",
|
||||||
|
&pb.ModelOptions{Model: "big.gguf", ModelFile: modelFile}, false)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(result).ToNot(BeNil())
|
||||||
|
Expect(stager.staged).To(BeTrue(), "staging must have been attempted")
|
||||||
|
Expect(stager.ctxErrOnStage).ToNot(HaveOccurred(),
|
||||||
|
"staging context must survive cancellation of the triggering request")
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -5,58 +5,138 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StagingStatus represents the current progress of a model staging operation.
|
// StagingStatus represents the current progress of a model staging operation.
|
||||||
type StagingStatus struct {
|
type StagingStatus struct {
|
||||||
ModelID string `json:"model_id"`
|
ModelID string `json:"model_id"`
|
||||||
NodeName string `json:"node_name"`
|
NodeName string `json:"node_name"`
|
||||||
FileName string `json:"file_name"`
|
FileName string `json:"file_name"`
|
||||||
BytesSent int64 `json:"bytes_sent"`
|
BytesSent int64 `json:"bytes_sent"`
|
||||||
TotalBytes int64 `json:"total_bytes"`
|
TotalBytes int64 `json:"total_bytes"`
|
||||||
Progress float64 `json:"progress"` // 0-100 overall progress
|
Progress float64 `json:"progress"` // 0-100 overall progress
|
||||||
Speed string `json:"speed"`
|
Speed string `json:"speed"`
|
||||||
FileIndex int `json:"file_index"`
|
FileIndex int `json:"file_index"`
|
||||||
TotalFiles int `json:"total_files"`
|
TotalFiles int `json:"total_files"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
StartedAt time.Time `json:"started_at"`
|
StartedAt time.Time `json:"started_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// stagingBroadcastInterval bounds how often byte-level UpdateFile ticks are
|
||||||
|
// re-broadcast to peers (leading-edge debounce). State transitions (Start,
|
||||||
|
// FileComplete, Complete) always publish so peers never miss them.
|
||||||
|
stagingBroadcastInterval = time.Second
|
||||||
|
// stagingRemoteTTL drops a mirrored (remote) op whose last update is older
|
||||||
|
// than this. NATS pub/sub is fire-and-forget, so a missed Done event would
|
||||||
|
// otherwise leave a phantom staging row on a peer forever; a live op
|
||||||
|
// refreshes its mirror at least every stagingBroadcastInterval.
|
||||||
|
stagingRemoteTTL = 60 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
// stagingEntry wraps a StagingStatus with the bookkeeping needed to keep peer
|
||||||
|
// replicas consistent: whether this op is mirrored from a peer (remote) vs.
|
||||||
|
// owned locally, when it was last updated (for remote-mirror expiry), and when
|
||||||
|
// its byte progress was last broadcast (for debounce).
|
||||||
|
type stagingEntry struct {
|
||||||
|
status StagingStatus
|
||||||
|
remote bool
|
||||||
|
updatedAt time.Time
|
||||||
|
lastPub time.Time
|
||||||
|
}
|
||||||
|
|
||||||
// StagingTracker tracks active file staging operations in-memory.
|
// StagingTracker tracks active file staging operations in-memory.
|
||||||
// Used by SmartRouter to publish progress and by /api/operations to surface it.
|
// Used by SmartRouter to publish progress and by /api/operations to surface it.
|
||||||
|
//
|
||||||
|
// In distributed mode each frontend replica runs its own tracker. The replica
|
||||||
|
// performing a transfer owns the op locally and broadcasts progress over NATS
|
||||||
|
// (SetPublisher); peers mirror it via ApplyRemote (SubscribeBroadcasts) so a
|
||||||
|
// /api/operations poll that round-robins onto any replica surfaces the op.
|
||||||
type StagingTracker struct {
|
type StagingTracker struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
active map[string]*StagingStatus
|
active map[string]*stagingEntry
|
||||||
|
publisher messaging.Publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
// StagingProgressEvent is the wire payload a frontend replica broadcasts on
|
||||||
|
// SubjectStagingProgress so peer replicas can mirror a staging op they did not
|
||||||
|
// originate. Done signals the op finished (peers drop their mirrored copy).
|
||||||
|
type StagingProgressEvent struct {
|
||||||
|
ModelID string `json:"model_id"`
|
||||||
|
Status *StagingStatus `json:"status,omitempty"`
|
||||||
|
Done bool `json:"done"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewStagingTracker creates a new tracker.
|
// NewStagingTracker creates a new tracker.
|
||||||
func NewStagingTracker() *StagingTracker {
|
func NewStagingTracker() *StagingTracker {
|
||||||
return &StagingTracker{
|
return &StagingTracker{
|
||||||
active: make(map[string]*StagingStatus),
|
active: make(map[string]*stagingEntry),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetPublisher wires the NATS publisher used to broadcast staging progress to
|
||||||
|
// peer replicas. No-op publisher (nil) keeps the tracker standalone.
|
||||||
|
func (t *StagingTracker) SetPublisher(p messaging.Publisher) {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
t.publisher = p
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubscribeBroadcasts subscribes to peer replicas' staging-progress broadcasts
|
||||||
|
// and mirrors them into this tracker, so /api/operations on any replica surfaces
|
||||||
|
// staging ops it did not originate. Returns the subscription for cleanup.
|
||||||
|
func (t *StagingTracker) SubscribeBroadcasts(nc messaging.MessagingClient) (messaging.Subscription, error) {
|
||||||
|
return messaging.SubscribeJSON(nc, messaging.SubjectStagingProgressWildcard, func(evt StagingProgressEvent) {
|
||||||
|
if evt.ModelID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.ApplyRemote(evt)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// publishStaging emits an event to the per-model staging subject. The publisher
|
||||||
|
// is captured by the caller under the lock and passed in, so publishing happens
|
||||||
|
// outside the lock (a slow NATS link must not stall the staging copy loop).
|
||||||
|
func publishStaging(p messaging.Publisher, evt StagingProgressEvent) {
|
||||||
|
if p == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = p.Publish(messaging.SubjectStagingProgress(evt.ModelID), evt)
|
||||||
|
}
|
||||||
|
|
||||||
// Start registers a new staging operation for the given model.
|
// Start registers a new staging operation for the given model.
|
||||||
func (t *StagingTracker) Start(modelID, nodeName string, totalFiles int) {
|
func (t *StagingTracker) Start(modelID, nodeName string, totalFiles int) {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
e := &stagingEntry{
|
||||||
t.active[modelID] = &StagingStatus{
|
status: StagingStatus{
|
||||||
ModelID: modelID,
|
ModelID: modelID,
|
||||||
NodeName: nodeName,
|
NodeName: nodeName,
|
||||||
TotalFiles: totalFiles,
|
TotalFiles: totalFiles,
|
||||||
StartedAt: time.Now(),
|
StartedAt: time.Now(),
|
||||||
Message: "Preparing to stage model files",
|
Message: "Preparing to stage model files",
|
||||||
|
},
|
||||||
|
updatedAt: time.Now(),
|
||||||
|
// lastPub stays zero so the first UpdateFile tick always broadcasts.
|
||||||
}
|
}
|
||||||
|
t.active[modelID] = e
|
||||||
|
pub := t.publisher
|
||||||
|
snap := e.status
|
||||||
|
t.mu.Unlock()
|
||||||
|
|
||||||
|
publishStaging(pub, StagingProgressEvent{ModelID: modelID, Status: &snap})
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateFile updates the tracker with current file transfer progress.
|
// UpdateFile updates the tracker with current file transfer progress.
|
||||||
func (t *StagingTracker) UpdateFile(modelID, fileName string, fileIndex int, bytesSent, totalBytes int64, speed string) {
|
func (t *StagingTracker) UpdateFile(modelID, fileName string, fileIndex int, bytesSent, totalBytes int64, speed string) {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
e, ok := t.active[modelID]
|
||||||
s, ok := t.active[modelID]
|
|
||||||
if !ok {
|
if !ok {
|
||||||
|
t.mu.Unlock()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
s := &e.status
|
||||||
s.FileName = fileName
|
s.FileName = fileName
|
||||||
s.FileIndex = fileIndex
|
s.FileIndex = fileIndex
|
||||||
s.BytesSent = bytesSent
|
s.BytesSent = bytesSent
|
||||||
@@ -79,52 +159,121 @@ func (t *StagingTracker) UpdateFile(modelID, fileName string, fileIndex int, byt
|
|||||||
} else {
|
} else {
|
||||||
s.Message = fmt.Sprintf("Staging %s", fileName)
|
s.Message = fmt.Sprintf("Staging %s", fileName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
e.updatedAt = time.Now()
|
||||||
|
// Leading-edge debounce: byte ticks fire many times per second; only
|
||||||
|
// re-broadcast at most once per stagingBroadcastInterval.
|
||||||
|
var pub messaging.Publisher
|
||||||
|
var snap StagingStatus
|
||||||
|
if time.Since(e.lastPub) >= stagingBroadcastInterval {
|
||||||
|
e.lastPub = time.Now()
|
||||||
|
pub = t.publisher
|
||||||
|
snap = e.status
|
||||||
|
}
|
||||||
|
t.mu.Unlock()
|
||||||
|
|
||||||
|
if pub != nil {
|
||||||
|
publishStaging(pub, StagingProgressEvent{ModelID: modelID, Status: &snap})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FileComplete marks a single file as done within a staging operation.
|
// FileComplete marks a single file as done within a staging operation.
|
||||||
func (t *StagingTracker) FileComplete(modelID string, fileIndex, totalFiles int) {
|
func (t *StagingTracker) FileComplete(modelID string, fileIndex, totalFiles int) {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
e, ok := t.active[modelID]
|
||||||
s, ok := t.active[modelID]
|
|
||||||
if !ok {
|
if !ok {
|
||||||
|
t.mu.Unlock()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
s := &e.status
|
||||||
if totalFiles > 0 {
|
if totalFiles > 0 {
|
||||||
s.Progress = float64(fileIndex) / float64(totalFiles) * 100
|
s.Progress = float64(fileIndex) / float64(totalFiles) * 100
|
||||||
}
|
}
|
||||||
s.BytesSent = 0
|
s.BytesSent = 0
|
||||||
s.TotalBytes = 0
|
s.TotalBytes = 0
|
||||||
s.Speed = ""
|
s.Speed = ""
|
||||||
|
e.updatedAt = time.Now()
|
||||||
|
e.lastPub = time.Now()
|
||||||
|
pub := t.publisher
|
||||||
|
snap := e.status
|
||||||
|
t.mu.Unlock()
|
||||||
|
|
||||||
|
// Always broadcast a per-file completion so peers' progress bars advance.
|
||||||
|
publishStaging(pub, StagingProgressEvent{ModelID: modelID, Status: &snap})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete removes a staging operation (it's done).
|
// Complete removes a staging operation (it's done).
|
||||||
func (t *StagingTracker) Complete(modelID string) {
|
func (t *StagingTracker) Complete(modelID string) {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
_, ok := t.active[modelID]
|
||||||
delete(t.active, modelID)
|
delete(t.active, modelID)
|
||||||
|
pub := t.publisher
|
||||||
|
t.mu.Unlock()
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
// Tell peers to drop their mirrored copy.
|
||||||
|
publishStaging(pub, StagingProgressEvent{ModelID: modelID, Done: true})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAll returns a snapshot of all active staging operations.
|
// ApplyRemote merges a peer replica's staging broadcast into this tracker. It
|
||||||
|
// never re-broadcasts (no echo loop). A locally-owned op is authoritative: a
|
||||||
|
// remote event for the same model is ignored, so the origin replica receiving
|
||||||
|
// its own broadcast (and any stray peer event) cannot clobber or delete it.
|
||||||
|
func (t *StagingTracker) ApplyRemote(evt StagingProgressEvent) {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
|
||||||
|
if existing, ok := t.active[evt.ModelID]; ok && !existing.remote {
|
||||||
|
// We own this op locally — ignore peer chatter about it.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if evt.Done {
|
||||||
|
delete(t.active, evt.ModelID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if evt.Status == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.active[evt.ModelID] = &stagingEntry{
|
||||||
|
status: *evt.Status,
|
||||||
|
remote: true,
|
||||||
|
updatedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAll returns a snapshot of all active staging operations. Stale remote
|
||||||
|
// mirrors (a peer op whose Done event was missed) are pruned here so they don't
|
||||||
|
// linger in the UI.
|
||||||
func (t *StagingTracker) GetAll() map[string]StagingStatus {
|
func (t *StagingTracker) GetAll() map[string]StagingStatus {
|
||||||
t.mu.RLock()
|
t.mu.Lock()
|
||||||
defer t.mu.RUnlock()
|
defer t.mu.Unlock()
|
||||||
|
now := time.Now()
|
||||||
result := make(map[string]StagingStatus, len(t.active))
|
result := make(map[string]StagingStatus, len(t.active))
|
||||||
for k, v := range t.active {
|
for k, e := range t.active {
|
||||||
result[k] = *v
|
if e.remote && now.Sub(e.updatedAt) > stagingRemoteTTL {
|
||||||
|
delete(t.active, k)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[k] = e.status
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get returns the status of a specific staging operation, or nil if not active.
|
// Get returns the status of a specific staging operation, or nil if not active
|
||||||
|
// (or a stale remote mirror).
|
||||||
func (t *StagingTracker) Get(modelID string) *StagingStatus {
|
func (t *StagingTracker) Get(modelID string) *StagingStatus {
|
||||||
t.mu.RLock()
|
t.mu.RLock()
|
||||||
defer t.mu.RUnlock()
|
defer t.mu.RUnlock()
|
||||||
s, ok := t.active[modelID]
|
e, ok := t.active[modelID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
copy := *s
|
if e.remote && time.Since(e.updatedAt) > stagingRemoteTTL {
|
||||||
return ©
|
return nil
|
||||||
|
}
|
||||||
|
s := e.status
|
||||||
|
return &s
|
||||||
}
|
}
|
||||||
|
|
||||||
// StagingProgressCallback is called by file stagers to report byte-level progress.
|
// StagingProgressCallback is called by file stagers to report byte-level progress.
|
||||||
|
|||||||
109
core/services/nodes/staging_progress_broadcast_test.go
Normal file
109
core/services/nodes/staging_progress_broadcast_test.go
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
package nodes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
)
|
||||||
|
|
||||||
|
// decodeStagingEvents extracts every StagingProgressEvent the fake messaging
|
||||||
|
// client captured, in publish order.
|
||||||
|
func decodeStagingEvents(mc *fakeMessagingClient) []StagingProgressEvent {
|
||||||
|
mc.mu.Lock()
|
||||||
|
defer mc.mu.Unlock()
|
||||||
|
var out []StagingProgressEvent
|
||||||
|
for _, p := range mc.published {
|
||||||
|
var evt StagingProgressEvent
|
||||||
|
if err := json.Unmarshal(p.Data, &evt); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if evt.ModelID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, evt)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("StagingTracker cross-replica broadcast", func() {
|
||||||
|
Context("when a publisher is wired (distributed mode)", func() {
|
||||||
|
It("broadcasts staging progress so a peer replica surfaces an op it did not originate", func() {
|
||||||
|
mc := &fakeMessagingClient{}
|
||||||
|
origin := NewStagingTracker()
|
||||||
|
origin.SetPublisher(mc)
|
||||||
|
|
||||||
|
origin.Start("model-x", "worker-1", 1)
|
||||||
|
origin.UpdateFile("model-x", "weights.gguf", 1, 5<<30, 10<<30, "100 MiB/s")
|
||||||
|
|
||||||
|
events := decodeStagingEvents(mc)
|
||||||
|
Expect(events).ToNot(BeEmpty(), "writes must be broadcast over NATS")
|
||||||
|
Expect(mc.published[0].Subject).To(Equal(messaging.SubjectStagingProgress("model-x")))
|
||||||
|
|
||||||
|
// A peer replica that never ran the op merges the broadcast.
|
||||||
|
peer := NewStagingTracker()
|
||||||
|
for _, evt := range events {
|
||||||
|
peer.ApplyRemote(evt)
|
||||||
|
}
|
||||||
|
|
||||||
|
all := peer.GetAll()
|
||||||
|
Expect(all).To(HaveKey("model-x"))
|
||||||
|
Expect(all["model-x"].NodeName).To(Equal("worker-1"))
|
||||||
|
Expect(all["model-x"].FileName).To(Equal("weights.gguf"))
|
||||||
|
Expect(all["model-x"].TotalBytes).To(Equal(int64(10 << 30)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes the op from the peer when the origin completes it", func() {
|
||||||
|
mc := &fakeMessagingClient{}
|
||||||
|
origin := NewStagingTracker()
|
||||||
|
origin.SetPublisher(mc)
|
||||||
|
|
||||||
|
origin.Start("model-x", "worker-1", 1)
|
||||||
|
origin.Complete("model-x")
|
||||||
|
|
||||||
|
peer := NewStagingTracker()
|
||||||
|
for _, evt := range decodeStagingEvents(mc) {
|
||||||
|
peer.ApplyRemote(evt)
|
||||||
|
}
|
||||||
|
Expect(peer.GetAll()).ToNot(HaveKey("model-x"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does not let a peer broadcast clobber an op this replica is itself running", func() {
|
||||||
|
local := NewStagingTracker()
|
||||||
|
local.Start("model-x", "worker-local", 2)
|
||||||
|
local.UpdateFile("model-x", "weights.gguf", 1, 9<<30, 10<<30, "")
|
||||||
|
|
||||||
|
// A stray/older remote event for the SAME modelID must not overwrite
|
||||||
|
// the authoritative local state, nor delete it.
|
||||||
|
local.ApplyRemote(StagingProgressEvent{
|
||||||
|
ModelID: "model-x",
|
||||||
|
Status: &StagingStatus{ModelID: "model-x", NodeName: "worker-other", FileName: "stale.gguf"},
|
||||||
|
})
|
||||||
|
local.ApplyRemote(StagingProgressEvent{ModelID: "model-x", Done: true})
|
||||||
|
|
||||||
|
all := local.GetAll()
|
||||||
|
Expect(all).To(HaveKey("model-x"))
|
||||||
|
Expect(all["model-x"].NodeName).To(Equal("worker-local"))
|
||||||
|
Expect(all["model-x"].FileName).To(Equal("weights.gguf"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when no publisher is wired (standalone mode)", func() {
|
||||||
|
It("does not broadcast", func() {
|
||||||
|
mc := &fakeMessagingClient{}
|
||||||
|
t := NewStagingTracker()
|
||||||
|
t.Start("model-x", "worker-1", 1)
|
||||||
|
t.UpdateFile("model-x", "weights.gguf", 1, 1<<30, 10<<30, "")
|
||||||
|
Expect(mc.published).To(BeEmpty())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("SubjectStagingProgress", func() {
|
||||||
|
It("namespaces by model id and matches the wildcard prefix", func() {
|
||||||
|
Expect(messaging.SubjectStagingProgress("model-x")).To(Equal("staging.model-x.progress"))
|
||||||
|
Expect(messaging.SubjectStagingProgressWildcard).To(Equal("staging.*.progress"))
|
||||||
|
})
|
||||||
|
})
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user