mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-19 22:29:54 -04:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42a8db3573 | ||
|
|
0353d3bd77 | ||
|
|
ec49995190 | ||
|
|
67c34bbb96 | ||
|
|
4430fae779 | ||
|
|
ab01ed1a3e | ||
|
|
6bfe7f8c05 | ||
|
|
5a42dbf3ec | ||
|
|
c2fe0a6475 | ||
|
|
ddbbdf45b9 | ||
|
|
b4fdb41dcc | ||
|
|
0245b33eab | ||
|
|
a2940e5d47 | ||
|
|
a645c1f4aa | ||
|
|
957619af53 | ||
|
|
ad0ab37230 | ||
|
|
0b81e36504 | ||
|
|
602866a9d8 | ||
|
|
8521af145f | ||
|
|
bc4cd3dd85 | ||
|
|
86a7f6c9fa | ||
|
|
a57e73691d | ||
|
|
a689100d61 | ||
|
|
03815e3b59 | ||
|
|
37991c8a18 | ||
|
|
61c9b187fa | ||
|
|
c66014312e | ||
|
|
abc2a51641 | ||
|
|
cd7d163178 | ||
|
|
7aac599deb | ||
|
|
d75173dd2a | ||
|
|
9be5310394 | ||
|
|
cdf50fd723 |
@@ -284,7 +284,17 @@ Also bump the expected-length count in `api_instructions_test.go` and add the na
|
||||
|
||||
### 3. `capabilities.js` symbol (for new model-config FLAG_* flags)
|
||||
|
||||
If your feature needs a new `FLAG_*` usecase flag in `core/config/model_config.go` (so users can filter gallery models by it, and so `/v1/models` surfaces it), also declare the matching symbol in `core/http/react-ui/src/utils/capabilities.js`:
|
||||
If your feature needs a new `FLAG_*` usecase flag in `core/config/model_config.go` (so users can filter gallery models by it, and so `/v1/models` surfaces it), you need to update **all** of:
|
||||
|
||||
- `Usecase<Name>` string constant in `core/config/backend_capabilities.go`
|
||||
- `UsecaseInfoMap` entry mapping the string to its flag + gRPC method
|
||||
- `FLAG_<NAME>` bitmask in `core/config/model_config.go`
|
||||
- `GetAllModelConfigUsecases()` map entry (otherwise the YAML loader silently ignores the string)
|
||||
- `ModalityGroups` membership if the flag should affect `IsMultimodal()` (e.g. realtime_audio is in both speech-input and audio-output groups so a lone flag still reads as multimodal)
|
||||
- `GuessUsecases()` branch listing the backends that own this capability
|
||||
- `usecaseFilters` in `core/http/routes/ui_api.go` (drives the gallery filter dropdown)
|
||||
- `Models.jsx` `FILTERS` array + matching `filters.<camelCase>` i18n key in `core/http/react-ui/public/locales/en/models.json`
|
||||
- `core/http/react-ui/src/utils/capabilities.js`:
|
||||
|
||||
```js
|
||||
export const CAP_MY_CAPABILITY = 'FLAG_MY_CAPABILITY'
|
||||
|
||||
97
.github/backend-matrix.yml
vendored
97
.github/backend-matrix.yml
vendored
@@ -278,6 +278,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-liquid-audio'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
@@ -389,7 +402,12 @@ include:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64'
|
||||
runs-on: 'ubuntu-latest'
|
||||
# bigger-runner: cold builds for this entry consistently take 5h+ on
|
||||
# ubuntu-latest (observed 5h36m on v4.2.1). Move back to bigger-runner
|
||||
# so the build finishes well within GHA's 6h job timeout. Phase 5.3 of
|
||||
# the free-tier migration (PR #9730) flipped this to ubuntu-latest as
|
||||
# a 'highest-risk batch' with explicit per-entry revert.
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
@@ -403,7 +421,9 @@ include:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-turboquant'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64'
|
||||
runs-on: 'ubuntu-latest'
|
||||
# bigger-runner: same rationale as -gpu-nvidia-cuda-12-llama-cpp above
|
||||
# (observed 6h5m wall-clock on v4.2.1, just past the 6h job timeout).
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "turboquant"
|
||||
@@ -801,6 +821,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-liquid-audio'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -899,7 +932,9 @@ include:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64'
|
||||
runs-on: 'ubuntu-latest'
|
||||
# bigger-runner: cold builds for this entry take 5h+ on ubuntu-latest
|
||||
# (observed 5h37m on v4.2.1). Same rationale as the cuda-12 variant.
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
@@ -913,7 +948,8 @@ include:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-turboquant'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64'
|
||||
runs-on: 'ubuntu-latest'
|
||||
# bigger-runner: observed 6h5m wall-clock on v4.2.1 — at the GHA timeout.
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "turboquant"
|
||||
@@ -1078,6 +1114,19 @@ include:
|
||||
backend: "vibevoice"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-liquid-audio'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
ubuntu-version: '2404'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -1719,6 +1768,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-rocm-hipblas-liquid-audio'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2167,6 +2229,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'intel'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-liquid-audio'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'intel'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -3493,6 +3568,20 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-liquid-audio'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "liquid-audio"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
|
||||
46
.github/scripts/anchor-digest-in-cache.sh
vendored
Executable file
46
.github/scripts/anchor-digest-in-cache.sh
vendored
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
# Anchor a backend per-arch digest in quay.io/go-skynet/ci-cache so quay's
|
||||
# garbage collector won't reap the manifest before backend_merge.yml runs.
|
||||
#
|
||||
# Context: backend_build.yml pushes by canonical digest only
|
||||
# (push-by-digest=true). Unreferenced manifests on quay can be reaped within
|
||||
# ~1-2h, but backend-merge-jobs runs only after the *entire* per-arch build
|
||||
# matrix drains (max-parallel: 8 × dozens of entries → ~2h+). Without an
|
||||
# anchoring tag, the earliest digests are gone by the time `imagetools create`
|
||||
# tries to read them, producing "manifest not found" merge failures.
|
||||
#
|
||||
# We tag the digest under our internal ci-cache image; quay does not GC tagged
|
||||
# manifests. The user-facing manifest list still references the original
|
||||
# digest in local-ai-backends. backend_merge.yml deletes the anchor tag after
|
||||
# the user-facing manifest is published — see cleanup-keepalive-tags.sh.
|
||||
#
|
||||
# Required env:
|
||||
# GITHUB_RUN_ID - current workflow run id (set automatically by GHA)
|
||||
# TAG_SUFFIX - matrix entry's tag-suffix (e.g. -gpu-nvidia-cuda-12-vllm)
|
||||
# PLATFORM_TAG - amd64 / arm64 / single (single = singleton matrix entry)
|
||||
# DIGEST - canonical content digest from build step (sha256:...)
|
||||
#
|
||||
# Optional env:
|
||||
# ANCHOR_IMAGE - target image (default: quay.io/go-skynet/ci-cache)
|
||||
# SOURCE_IMAGE - source image (default: quay.io/go-skynet/local-ai-backends)
|
||||
# GITHUB_STEP_SUMMARY - if set, an anchored-by line is appended to it
|
||||
set -euo pipefail
|
||||
|
||||
: "${GITHUB_RUN_ID:?}"
|
||||
: "${TAG_SUFFIX:?}"
|
||||
: "${PLATFORM_TAG:?}"
|
||||
: "${DIGEST:?}"
|
||||
|
||||
anchor_image="${ANCHOR_IMAGE:-quay.io/go-skynet/ci-cache}"
|
||||
source_image="${SOURCE_IMAGE:-quay.io/go-skynet/local-ai-backends}"
|
||||
|
||||
tag="keepalive-${GITHUB_RUN_ID}${TAG_SUFFIX}-${PLATFORM_TAG}"
|
||||
|
||||
docker buildx imagetools create \
|
||||
-t "${anchor_image}:${tag}" \
|
||||
"${source_image}@${DIGEST}"
|
||||
|
||||
echo "anchored ${DIGEST} as ${anchor_image}:${tag}"
|
||||
if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
|
||||
echo "anchored \`${DIGEST}\` as \`${anchor_image}:${tag}\`" >> "${GITHUB_STEP_SUMMARY}"
|
||||
fi
|
||||
49
.github/scripts/cleanup-keepalive-tags.sh
vendored
Executable file
49
.github/scripts/cleanup-keepalive-tags.sh
vendored
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
# Best-effort cleanup of the keepalive anchor tags written by
|
||||
# anchor-digest-in-cache.sh. Called from backend_merge.yml after the
|
||||
# user-facing manifest list has been published.
|
||||
#
|
||||
# Quay's docker registry v2 doesn't allow tag deletes — only digest deletes.
|
||||
# The proper delete is the quay REST API, which requires an OAuth-scoped
|
||||
# token. We try QUAY_TOKEN as a bearer token: if the secret is an OAuth app
|
||||
# token (typical for service accounts) the delete succeeds; otherwise this
|
||||
# is a soft no-op and the tag persists until manually pruned.
|
||||
#
|
||||
# Cleanup failure MUST NOT fail the merge — the merge has already produced
|
||||
# the user-facing manifest list at this point and the keepalive tags are
|
||||
# pure overhead. We always exit 0.
|
||||
#
|
||||
# Required env:
|
||||
# GITHUB_RUN_ID - current workflow run id (set automatically by GHA)
|
||||
# TAG_SUFFIX - matrix entry's tag-suffix (e.g. -gpu-nvidia-cuda-12-vllm)
|
||||
# QUAY_TOKEN - bearer token for quay's REST API
|
||||
#
|
||||
# Optional env:
|
||||
# QUAY_REPO - target repo (default: go-skynet/ci-cache)
|
||||
# PLATFORM_TAGS - space-separated list of platform-tag values to try
|
||||
# (default: "amd64 arm64 single")
|
||||
# We don't know which platform-tag(s) exist for this
|
||||
# tag-suffix without an extra API call, so we just try
|
||||
# all three and ignore 404s for the ones that don't.
|
||||
set -uo pipefail
|
||||
|
||||
: "${GITHUB_RUN_ID:?}"
|
||||
: "${TAG_SUFFIX:?}"
|
||||
: "${QUAY_TOKEN:?}"
|
||||
|
||||
quay_repo="${QUAY_REPO:-go-skynet/ci-cache}"
|
||||
platform_tags="${PLATFORM_TAGS:-amd64 arm64 single}"
|
||||
|
||||
for plat in $platform_tags; do
|
||||
tag="keepalive-${GITHUB_RUN_ID}${TAG_SUFFIX}-${plat}"
|
||||
url="https://quay.io/api/v1/repository/${quay_repo}/tag/${tag}"
|
||||
http=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||
-X DELETE -H "Authorization: Bearer ${QUAY_TOKEN}" "$url" || echo "000")
|
||||
case "$http" in
|
||||
204|200) echo "deleted $tag" ;;
|
||||
404) echo "not present: $tag" ;;
|
||||
401|403) echo "auth not OAuth-scoped (http $http) for $tag - skipping; orphan tag will persist" ;;
|
||||
*) echo "unexpected http $http deleting $tag - skipping" ;;
|
||||
esac
|
||||
done
|
||||
exit 0
|
||||
11
.github/workflows/backend.yml
vendored
11
.github/workflows/backend.yml
vendored
@@ -154,7 +154,13 @@ jobs:
|
||||
# digest only — no tags are applied at build time.
|
||||
backend-merge-jobs-multiarch:
|
||||
needs: [generate-matrix, backend-jobs-multiarch]
|
||||
if: needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
|
||||
# !cancelled() lets the merge run even when a few build legs failed.
|
||||
# Without it, GHA's default `needs:` cascade skips the entire merge
|
||||
# matrix on a single failed/cancelled cell. We still want to publish
|
||||
# the manifest lists for tag-suffixes whose legs all succeeded.
|
||||
# Observed in v4.2.1: 2 singlearch build failures cascade-skipped all
|
||||
# ~199 singlearch merge entries.
|
||||
if: ${{ !cancelled() && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true' }}
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
@@ -170,7 +176,8 @@ jobs:
|
||||
|
||||
backend-merge-jobs-singlearch:
|
||||
needs: [generate-matrix, backend-jobs-singlearch]
|
||||
if: needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
|
||||
# See note on backend-merge-jobs-multiarch above for !cancelled().
|
||||
if: ${{ !cancelled() && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true' }}
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
|
||||
12
.github/workflows/backend_build.yml
vendored
12
.github/workflows/backend_build.yml
vendored
@@ -228,6 +228,16 @@ jobs:
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
# See .github/scripts/anchor-digest-in-cache.sh for why this is needed
|
||||
# and how it interacts with backend_merge.yml's cleanup step.
|
||||
- name: Anchor digest in ci-cache so quay GC won't reap before merge
|
||||
if: github.event_name != 'pull_request'
|
||||
env:
|
||||
TAG_SUFFIX: ${{ inputs.tag-suffix }}
|
||||
PLATFORM_TAG: ${{ inputs.platform-tag || 'single' }}
|
||||
DIGEST: ${{ steps.build.outputs.digest }}
|
||||
run: .github/scripts/anchor-digest-in-cache.sh
|
||||
|
||||
# Artifact name uses a `--` separator between tag-suffix and platform-tag
|
||||
# to avoid prefix collisions during the merge job's pattern-based download.
|
||||
# Tag-suffixes are not prefix-disjoint (e.g. -gpu-nvidia-cuda-12-vllm is a
|
||||
@@ -237,7 +247,7 @@ jobs:
|
||||
# platform-tag (single-arch entries) keeps the artifact name non-trailing.
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: digests${{ inputs.tag-suffix }}--${{ inputs.platform-tag || 'single' }}
|
||||
path: /tmp/digests/*
|
||||
|
||||
7
.github/workflows/backend_build_darwin.yml
vendored
7
.github/workflows/backend_build_darwin.yml
vendored
@@ -116,6 +116,13 @@ jobs:
|
||||
# already), we don't have to chase missing dylibs one at a time.
|
||||
# The downloads cache makes the reinstall fast (~5s on a hit).
|
||||
brew reinstall ccache
|
||||
# Same pattern for grpc: its CMake config (used by the llama-cpp
|
||||
# `grpc-server` target) does find_package(absl). The cache restores
|
||||
# /opt/homebrew/Cellar/grpc so brew above no-ops the install, but
|
||||
# abseil isn't in our Cellar cache list and never gets installed
|
||||
# alongside, leaving grpc's CMake unable to resolve it. Reinstalling
|
||||
# grpc re-validates and pulls abseil in, mirroring the ccache fix.
|
||||
brew reinstall grpc
|
||||
# The brew cache restores the Cellar dirs but NOT the bin symlinks
|
||||
# at /opt/homebrew/bin/*. brew install above sees the Cellar present
|
||||
# and decides "already installed" without re-linking, so on a cache-
|
||||
|
||||
41
.github/workflows/backend_merge.yml
vendored
41
.github/workflows/backend_merge.yml
vendored
@@ -34,12 +34,21 @@ jobs:
|
||||
env:
|
||||
quay_username: ${{ secrets.quayUsername }}
|
||||
steps:
|
||||
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
||||
# keepalive cleanup script) but none of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
# `--` separator anchors the glob so we don't over-match sibling
|
||||
# backends whose tag-suffix happens to be a prefix of ours
|
||||
# (e.g. -cpu-vllm vs -cpu-vllm-omni). Must stay in sync with the
|
||||
# upload-artifact name in backend_build.yml.
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
pattern: digests${{ inputs.tag-suffix }}--*
|
||||
merge-multiple: true
|
||||
@@ -79,6 +88,25 @@ jobs:
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||
|
||||
# Source from ci-cache, not local-ai-backends.
|
||||
#
|
||||
# The build job pushes per-arch manifests to local-ai-backends with
|
||||
# push-by-digest=true (no tag), then anchors a tagged copy into
|
||||
# ci-cache so the manifest can be retrieved hours later when this
|
||||
# merge runs. Quay's manifest GC, however, is per-repository: the
|
||||
# anchor tag in ci-cache protects the manifest there, but the same
|
||||
# digest in local-ai-backends has no tag in *that* repo and gets
|
||||
# reaped independently. Sourcing local-ai-backends@<digest> here
|
||||
# then fails with "manifest not found" — exactly the regression
|
||||
# we hit on v4.2.2 (19/37 multiarch merges failed).
|
||||
#
|
||||
# ci-cache@<digest> resolves because we anchored it there. buildx
|
||||
# imagetools create copies the manifest into local-ai-backends
|
||||
# (cross-repo within the same registry, blobs already cross-mounted
|
||||
# from the original push so no transfer needed) and publishes the
|
||||
# manifest list with the user-facing tags. The resulting manifest
|
||||
# list is fully self-contained in local-ai-backends — child digests
|
||||
# only, no embedded references to ci-cache.
|
||||
- name: Create manifest list and push (quay)
|
||||
if: github.event_name != 'pull_request'
|
||||
working-directory: /tmp/digests
|
||||
@@ -95,7 +123,7 @@ jobs:
|
||||
else
|
||||
# shellcheck disable=SC2086
|
||||
docker buildx imagetools create $tags \
|
||||
$(printf 'quay.io/go-skynet/local-ai-backends@sha256:%s ' *)
|
||||
$(printf 'quay.io/go-skynet/ci-cache@sha256:%s ' *)
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push (dockerhub)
|
||||
@@ -126,6 +154,15 @@ jobs:
|
||||
docker buildx imagetools inspect "$first_tag"
|
||||
fi
|
||||
|
||||
# See .github/scripts/cleanup-keepalive-tags.sh for why this is
|
||||
# best-effort and what the failure modes are.
|
||||
- name: Cleanup keepalive tags in ci-cache
|
||||
if: github.event_name != 'pull_request' && success()
|
||||
env:
|
||||
TAG_SUFFIX: ${{ inputs.tag-suffix }}
|
||||
QUAY_TOKEN: ${{ secrets.quayPassword }}
|
||||
run: .github/scripts/cleanup-keepalive-tags.sh
|
||||
|
||||
- name: Job summary
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
|
||||
6
.github/workflows/backend_pr.yml
vendored
6
.github/workflows/backend_pr.yml
vendored
@@ -104,7 +104,9 @@ jobs:
|
||||
# backend_merge.yml's push-side steps are all gated on
|
||||
# github.event_name != 'pull_request', so on a PR the merge job would
|
||||
# do nothing. Skip it entirely to avoid spinning up an empty runner.
|
||||
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
|
||||
# !cancelled() lets the merge run even when a few build legs fail —
|
||||
# see the matching note in backend.yml.
|
||||
if: ${{ !cancelled() && github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true' }}
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
@@ -118,7 +120,7 @@ jobs:
|
||||
|
||||
backend-merge-jobs-singlearch:
|
||||
needs: [generate-matrix, backend-jobs-singlearch]
|
||||
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
|
||||
if: ${{ !cancelled() && github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true' }}
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
|
||||
94
.github/workflows/image.yml
vendored
94
.github/workflows/image.yml
vendored
@@ -151,7 +151,11 @@
|
||||
ubuntu-codename: 'noble'
|
||||
|
||||
core-image-merge:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
# !cancelled(): without it, GHA's default `needs:` cascade skips the
|
||||
# merge whenever any matrix cell of the parent build fails or is
|
||||
# cancelled. Same fix as backend.yml's merge jobs — we still want to
|
||||
# publish the manifest list for tag-suffixes whose legs all succeeded.
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: core-image-build
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
@@ -164,7 +168,7 @@
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
gpu-vulkan-image-merge:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: core-image-build
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
@@ -175,7 +179,91 @@
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
|
||||
# Single-arch server-image merges. Same conceptual fix as the backend
|
||||
# singletons in PR #9781: image_build.yml pushes by canonical digest
|
||||
# only, so without a downstream merge step there's no tag for consumers
|
||||
# (no :latest-gpu-nvidia-cuda-12, no :v<X>-gpu-nvidia-cuda-12, etc.).
|
||||
# Each merge job needs only its parent build matrix and is filtered by
|
||||
# tag-suffix in image_merge.yml's artifact-download pattern.
|
||||
gpu-nvidia-cuda-12-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: core-image-build
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
gpu-nvidia-cuda-13-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: core-image-build
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
gpu-intel-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: core-image-build
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
gpu-hipblas-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: hipblas-jobs
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-hipblas'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
nvidia-l4t-arm64-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: gh-runner
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
nvidia-l4t-arm64-cuda-13-image-merge:
|
||||
if: ${{ !cancelled() && github.repository == 'mudler/LocalAI' }}
|
||||
needs: gh-runner
|
||||
uses: ./.github/workflows/image_merge.yml
|
||||
with:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
gh-runner:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
|
||||
21
.github/workflows/image_build.yml
vendored
21
.github/workflows/image_build.yml
vendored
@@ -185,11 +185,28 @@ jobs:
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
# See .github/scripts/anchor-digest-in-cache.sh for why this is needed
|
||||
# and how it interacts with image_merge.yml's cleanup step. Mirrors the
|
||||
# same anchor in backend_build.yml — quay's per-repo manifest GC reaps
|
||||
# untagged manifests in local-ai before the merge runs.
|
||||
- name: Anchor digest in ci-cache so quay GC won't reap before merge
|
||||
if: github.event_name != 'pull_request'
|
||||
env:
|
||||
TAG_SUFFIX: ${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}
|
||||
PLATFORM_TAG: ${{ inputs.platform-tag || 'single' }}
|
||||
DIGEST: ${{ steps.build.outputs.digest }}
|
||||
SOURCE_IMAGE: quay.io/go-skynet/local-ai
|
||||
run: .github/scripts/anchor-digest-in-cache.sh
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}-${{ inputs.platform-tag }}
|
||||
# `--` separator + 'single' placeholder for empty platform-tag —
|
||||
# same pattern as backend_build.yml. Prevents prefix collisions
|
||||
# in the merge-side glob (e.g. -nvidia-l4t-arm64 is a prefix of
|
||||
# -nvidia-l4t-arm64-cuda-13).
|
||||
name: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}--${{ inputs.platform-tag || 'single' }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
36
.github/workflows/image_merge.yml
vendored
36
.github/workflows/image_merge.yml
vendored
@@ -33,10 +33,22 @@ jobs:
|
||||
env:
|
||||
quay_username: ${{ secrets.quayUsername }}
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
||||
# script). Skips the rest of the source tree.
|
||||
- name: Checkout (.github/scripts only)
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
pattern: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}-*
|
||||
sparse-checkout: |
|
||||
.github/scripts
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
# `--` separator anchors the glob so we don't over-match sibling
|
||||
# tag-suffixes (e.g. -nvidia-l4t-arm64 vs -nvidia-l4t-arm64-cuda-13).
|
||||
# Must stay in sync with image_build.yml's upload-artifact name.
|
||||
pattern: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}--*
|
||||
merge-multiple: true
|
||||
path: /tmp/digests
|
||||
|
||||
@@ -72,6 +84,13 @@ jobs:
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||
|
||||
# Source from ci-cache, not local-ai. See backend_merge.yml for the
|
||||
# detailed rationale — quay's manifest GC is per-repository, so the
|
||||
# untagged digest in local-ai gets reaped while the same content lives
|
||||
# tagged under ci-cache (anchored by image_build.yml). buildx imagetools
|
||||
# create copies the manifest into local-ai (blobs already cross-mounted)
|
||||
# and publishes the manifest list with user-facing tags. End state in
|
||||
# local-ai is self-contained; no embedded reference to ci-cache.
|
||||
- name: Create manifest list and push (quay)
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
@@ -82,7 +101,7 @@ jobs:
|
||||
else
|
||||
# shellcheck disable=SC2086
|
||||
docker buildx imagetools create $tags \
|
||||
$(printf 'quay.io/go-skynet/local-ai@sha256:%s ' *)
|
||||
$(printf 'quay.io/go-skynet/ci-cache@sha256:%s ' *)
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push (dockerhub)
|
||||
@@ -107,6 +126,15 @@ jobs:
|
||||
docker buildx imagetools inspect "$first_tag"
|
||||
fi
|
||||
|
||||
# See .github/scripts/cleanup-keepalive-tags.sh for the best-effort
|
||||
# semantics — fails soft when the registry credential isn't OAuth-scoped.
|
||||
- name: Cleanup keepalive tags in ci-cache
|
||||
if: github.event_name != 'pull_request' && success()
|
||||
env:
|
||||
TAG_SUFFIX: ${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}
|
||||
QUAY_TOKEN: ${{ secrets.quayPassword }}
|
||||
run: .github/scripts/cleanup-keepalive-tags.sh
|
||||
|
||||
- name: Job summary
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
27
.github/workflows/test-extra.yml
vendored
27
.github/workflows/test-extra.yml
vendored
@@ -28,6 +28,7 @@ jobs:
|
||||
qwen-asr: ${{ steps.detect.outputs.qwen-asr }}
|
||||
nemo: ${{ steps.detect.outputs.nemo }}
|
||||
voxcpm: ${{ steps.detect.outputs.voxcpm }}
|
||||
liquid-audio: ${{ steps.detect.outputs.liquid-audio }}
|
||||
llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }}
|
||||
llama-cpp: ${{ steps.detect.outputs.llama-cpp }}
|
||||
ik-llama-cpp: ${{ steps.detect.outputs.ik-llama-cpp }}
|
||||
@@ -447,6 +448,32 @@ jobs:
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm
|
||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm test
|
||||
# liquid-audio: LFM2.5-Audio any-to-any backend. The CI smoke test
|
||||
# exercises Health() and LoadModel(mode:finetune) — fine-tune mode
|
||||
# short-circuits before pulling weights (backend.py:192), so no
|
||||
# HuggingFace download or GPU is needed. The full-inference path is
|
||||
# gated on LIQUID_AUDIO_MODEL_ID, which we don't set here.
|
||||
tests-liquid-audio:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.liquid-audio == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential ffmpeg
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test liquid-audio
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/liquid-audio
|
||||
make --jobs=5 --output-sync=target -C backend/python/liquid-audio test
|
||||
tests-llama-cpp-quantization:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.llama-cpp-quantization == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
|
||||
@@ -305,7 +305,7 @@ EOT
|
||||
###################################
|
||||
|
||||
# Build React UI
|
||||
FROM node:25-slim AS react-ui-builder
|
||||
FROM node:26-slim AS react-ui-builder
|
||||
WORKDIR /app
|
||||
COPY core/http/react-ui/package*.json ./
|
||||
RUN npm install
|
||||
|
||||
8
Makefile
8
Makefile
@@ -1,5 +1,5 @@
|
||||
# Disable parallel execution for backend builds
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
|
||||
|
||||
GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -463,6 +463,7 @@ prepare-test-extra: protogen-python
|
||||
$(MAKE) -C backend/python/vllm-omni
|
||||
$(MAKE) -C backend/python/sglang
|
||||
$(MAKE) -C backend/python/vibevoice
|
||||
$(MAKE) -C backend/python/liquid-audio
|
||||
$(MAKE) -C backend/python/moonshine
|
||||
$(MAKE) -C backend/python/pocket-tts
|
||||
$(MAKE) -C backend/python/qwen-tts
|
||||
@@ -488,6 +489,7 @@ test-extra: prepare-test-extra
|
||||
$(MAKE) -C backend/python/vllm test
|
||||
$(MAKE) -C backend/python/vllm-omni test
|
||||
$(MAKE) -C backend/python/vibevoice test
|
||||
$(MAKE) -C backend/python/liquid-audio test
|
||||
$(MAKE) -C backend/python/moonshine test
|
||||
$(MAKE) -C backend/python/pocket-tts test
|
||||
$(MAKE) -C backend/python/qwen-tts test
|
||||
@@ -1092,6 +1094,7 @@ BACKEND_SGLANG = sglang|python|.|false|true
|
||||
BACKEND_DIFFUSERS = diffusers|python|.|--progress=plain|true
|
||||
BACKEND_CHATTERBOX = chatterbox|python|.|false|true
|
||||
BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true
|
||||
BACKEND_LIQUID_AUDIO = liquid-audio|python|.|--progress=plain|true
|
||||
BACKEND_MOONSHINE = moonshine|python|.|false|true
|
||||
BACKEND_POCKET_TTS = pocket-tts|python|.|false|true
|
||||
BACKEND_QWEN_TTS = qwen-tts|python|.|false|true
|
||||
@@ -1169,6 +1172,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_SGLANG)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_LIQUID_AUDIO)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS)))
|
||||
@@ -1197,7 +1201,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_SHERPA_ONNX)))
|
||||
docker-save-%: backend-images
|
||||
docker save local-ai-backend:$* -o backend-images/$*.tar
|
||||
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx
|
||||
|
||||
########################################################
|
||||
### Mock Backend for E2E Tests
|
||||
|
||||
@@ -117,6 +117,12 @@ ARG CUDA_DOCKER_ARCH
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
ARG CMAKE_ARGS
|
||||
ENV CMAKE_ARGS=${CMAKE_ARGS}
|
||||
# AMDGPU_TARGETS must be forwarded into the env here too — backend/cpp/llama-cpp/Makefile
|
||||
# (which the turboquant Makefile reuses via a sibling build dir) errors out when the var
|
||||
# is empty on a hipblas build, and the prebuilt path is what CI exercises most of the
|
||||
# time. The builder-fromsource stage above already does this; mirror it here.
|
||||
ARG AMDGPU_TARGETS
|
||||
ENV AMDGPU_TARGETS=${AMDGPU_TARGETS}
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
|
||||
@@ -48,6 +48,11 @@ service Backend {
|
||||
|
||||
rpc AudioTransform(AudioTransformRequest) returns (AudioTransformResult) {}
|
||||
rpc AudioTransformStream(stream AudioTransformFrameRequest) returns (stream AudioTransformFrameResponse) {}
|
||||
// AudioToAudioStream is the bidirectional any-to-any S2S RPC. Backends
|
||||
// that load a speech-to-speech model consume input audio frames and emit
|
||||
// interleaved audio + transcript + tool-call deltas as typed events.
|
||||
// Backends without S2S support return UNIMPLEMENTED.
|
||||
rpc AudioToAudioStream(stream AudioToAudioRequest) returns (stream AudioToAudioResponse) {}
|
||||
|
||||
rpc ModelMetadata(ModelOptions) returns (ModelMetadataResponse) {}
|
||||
|
||||
@@ -768,6 +773,93 @@ message AudioTransformFrameResponse {
|
||||
int64 frame_index = 2;
|
||||
}
|
||||
|
||||
// === AudioToAudioStream messages =========================================
|
||||
//
|
||||
// Bidirectional stream between the LocalAI core and an any-to-any audio
|
||||
// model. The client opens the stream with a Config payload, then alternates
|
||||
// Frame (input audio) and Control (turn boundaries, function-call results,
|
||||
// session updates) payloads. The server streams back typed events: audio
|
||||
// frames carry PCM in `pcm`; transcript / tool-call deltas carry JSON in
|
||||
// `meta`; the stream ends with a `response.done` (success) or `error` event.
|
||||
|
||||
message AudioToAudioRequest {
|
||||
oneof payload {
|
||||
AudioToAudioConfig config = 1;
|
||||
AudioToAudioFrame frame = 2;
|
||||
AudioToAudioControl control = 3;
|
||||
}
|
||||
}
|
||||
|
||||
message AudioToAudioConfig {
|
||||
// PCM format for client→server audio. 0 => backend default
|
||||
// (16 kHz for the LFM2-Audio Conformer encoder).
|
||||
int32 input_sample_rate = 1;
|
||||
// Preferred server→client audio rate. 0 => backend default
|
||||
// (24 kHz for the LFM2-Audio vocoder).
|
||||
int32 output_sample_rate = 2;
|
||||
// Optional system prompt override. Empty => backend chooses based on
|
||||
// mode (e.g. "Respond with interleaved text and audio.").
|
||||
string system_prompt = 3;
|
||||
// Optional baked-voice id. Models that only ship a fixed set of
|
||||
// voices (e.g. LFM2-Audio: us_male/us_female/uk_male/uk_female) match
|
||||
// this against their voice table; an empty string keeps the default.
|
||||
string voice = 4;
|
||||
// JSON-encoded array of tool definitions in OpenAI Chat Completions
|
||||
// format. Empty => no tools.
|
||||
string tools = 5;
|
||||
// Free-form sampling / decoding parameters (temperature, top_k,
|
||||
// max_new_tokens, audio_top_k, etc).
|
||||
map<string, string> params = 6;
|
||||
// True => reset any session-scoped state before processing further
|
||||
// frames on this stream. The first Config implicitly resets.
|
||||
bool reset = 7;
|
||||
}
|
||||
|
||||
message AudioToAudioFrame {
|
||||
// Raw PCM s16le mono at config.input_sample_rate. Empty pcm + end_of_input
|
||||
// is a valid "user finished speaking" marker without trailing audio.
|
||||
bytes pcm = 1;
|
||||
// Marks the last frame of a user turn. The backend may begin emitting
|
||||
// a response immediately after seeing this.
|
||||
bool end_of_input = 2;
|
||||
}
|
||||
|
||||
message AudioToAudioControl {
|
||||
// Free-form control event names. Initial set:
|
||||
// "input_audio_buffer.commit" — user finished speaking
|
||||
// "response.cancel" — abort in-flight generation
|
||||
// "conversation.item.create" — inject a non-audio item (e.g.
|
||||
// function_call_output as JSON in
|
||||
// `payload`)
|
||||
// "session.update" — re-configure mid-stream
|
||||
string event = 1;
|
||||
// Event-specific JSON payload.
|
||||
bytes payload = 2;
|
||||
}
|
||||
|
||||
message AudioToAudioResponse {
|
||||
// Event identifies what this frame carries. Mirrors the OpenAI Realtime
|
||||
// API server-event names where applicable. Initial set:
|
||||
// "response.audio.delta"
|
||||
// "response.audio_transcript.delta"
|
||||
// "response.function_call_arguments.delta"
|
||||
// "response.function_call_arguments.done"
|
||||
// "response.done"
|
||||
// "error"
|
||||
string event = 1;
|
||||
// Populated when event = response.audio.delta.
|
||||
bytes pcm = 2;
|
||||
// Populated alongside pcm to identify its rate. 0 => same as the
|
||||
// session's negotiated output_sample_rate.
|
||||
int32 sample_rate = 3;
|
||||
// JSON payload for non-PCM events (transcript chunk, tool args, error
|
||||
// body).
|
||||
bytes meta = 4;
|
||||
// Monotonic per-stream counter, useful for client reordering and
|
||||
// debugging.
|
||||
int64 sequence = 5;
|
||||
}
|
||||
|
||||
message ModelMetadataResponse {
|
||||
bool supports_thinking = 1;
|
||||
string rendered_template = 2; // The rendered chat template with enable_thinking=true (empty if not applicable)
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# ds4 backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as DS4_VERSION?= so the bump-deps bot
|
||||
# Upstream pin lives below as DS4_VERSION?=0cba357ca1bc0e7510421cc26888e420ea942123
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# llama-cpp / ik-llama-cpp / turboquant convention.
|
||||
|
||||
DS4_VERSION?=ae302c2fa18cc6d9aefc021d0f27ae03c9ad2fc0
|
||||
DS4_VERSION?=0cba357ca1bc0e7510421cc26888e420ea942123
|
||||
DS4_REPO?=https://github.com/antirez/ds4
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=eb570eb96689c235933b813693ca28ab9d3d26de
|
||||
IK_LLAMA_VERSION?=949bb8f1d660fc1264c137a6f3dbd619375f6134
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=389ff61d77b5c71cec0cf92fe4e5d01ace80b797
|
||||
LLAMA_VERSION?=a9883db8ee021cf16783016a60996d41820b5195
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <signal.h>
|
||||
#include <thread>
|
||||
@@ -443,10 +445,22 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
// Draft model for speculative decoding
|
||||
if (!request->draftmodel().empty()) {
|
||||
params.speculative.draft.mparams.path = request->draftmodel();
|
||||
// Default to draft type if a draft model is set but no explicit type
|
||||
// Default to draft type if a draft model is set but no explicit type.
|
||||
// Upstream (post ggml-org/llama.cpp#22838) made the speculative type a
|
||||
// vector; the turboquant fork still uses the legacy scalar. The
|
||||
// LOCALAI_LEGACY_LLAMA_CPP_SPEC macro is injected by
|
||||
// backend/cpp/turboquant/patch-grpc-server.sh for fork builds only.
|
||||
#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
if (params.speculative.type == COMMON_SPECULATIVE_TYPE_NONE) {
|
||||
params.speculative.type = COMMON_SPECULATIVE_TYPE_DRAFT;
|
||||
}
|
||||
#else
|
||||
const bool no_spec_type = params.speculative.types.empty() ||
|
||||
(params.speculative.types.size() == 1 && params.speculative.types[0] == COMMON_SPECULATIVE_TYPE_NONE);
|
||||
if (no_spec_type) {
|
||||
params.speculative.types = { COMMON_SPECULATIVE_TYPE_DRAFT };
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// params.model_alias ??
|
||||
@@ -673,10 +687,35 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
}
|
||||
// Speculative decoding options
|
||||
} else if (!strcmp(optname, "spec_type") || !strcmp(optname, "speculative_type")) {
|
||||
auto type = common_speculative_type_from_name(optval_str);
|
||||
#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
// Fork only knows a single scalar `type`. Take the first comma-
|
||||
// separated value and assign it via the singular helper.
|
||||
std::string first = optval_str;
|
||||
const auto comma = first.find(',');
|
||||
if (comma != std::string::npos) first = first.substr(0, comma);
|
||||
auto type = common_speculative_type_from_name(first);
|
||||
if (type != COMMON_SPECULATIVE_TYPE_COUNT) {
|
||||
params.speculative.type = type;
|
||||
}
|
||||
#else
|
||||
// Upstream switched to a vector of types (comma-separated for multi-type
|
||||
// chaining via common_speculative_types_from_names). We keep accepting a
|
||||
// single value here, but also tolerate comma-separated lists.
|
||||
std::vector<std::string> names;
|
||||
std::string item;
|
||||
for (char c : optval_str) {
|
||||
if (c == ',') {
|
||||
if (!item.empty()) { names.push_back(item); item.clear(); }
|
||||
} else {
|
||||
item.push_back(c);
|
||||
}
|
||||
}
|
||||
if (!item.empty()) names.push_back(item);
|
||||
auto parsed = common_speculative_types_from_names(names);
|
||||
if (!parsed.empty()) {
|
||||
params.speculative.types = parsed;
|
||||
}
|
||||
#endif
|
||||
} else if (!strcmp(optname, "spec_n_max") || !strcmp(optname, "draft_max")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.draft.n_max = std::stoi(optval_str); } catch (...) {}
|
||||
@@ -710,10 +749,155 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
try { params.speculative.draft.n_gpu_layers = std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "draft_ctx_size")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.draft.n_ctx = std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
// The draft context size is no longer a separate field upstream: the draft
|
||||
// shares the target context size. Accept the option for backward
|
||||
// compatibility but silently ignore it.
|
||||
|
||||
// Everything below relies on struct shape introduced in ggml-org/llama.cpp#22838
|
||||
// (parallel drafting): `ngram_mod`, `ngram_map_k`, `ngram_map_k4v`,
|
||||
// `ngram_cache`, and the `draft.{cache_type_*, cpuparams*, tensor_buft_overrides}`
|
||||
// fields. The turboquant fork branched before that, so its build defines
|
||||
// LOCALAI_LEGACY_LLAMA_CPP_SPEC via patch-grpc-server.sh and these option
|
||||
// keys become unrecognized (silently dropped, like any unknown opt) for it.
|
||||
//
|
||||
// The `#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC` / `#else` split below sits at the
|
||||
// closing-brace position of the `draft_ctx_size` branch on purpose: in the
|
||||
// legacy build the chain ends here (the brace closes draft_ctx_size), and in
|
||||
// the modern build the chain continues with `} else if (...)` instead, so the
|
||||
// brace count stays balanced under both branches of the preprocessor.
|
||||
#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
|
||||
}
|
||||
#else
|
||||
// --- ngram_mod family (upstream --spec-ngram-mod-*) ---
|
||||
} else if (!strcmp(optname, "spec_ngram_mod_n_min")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_mod.n_min = std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_mod_n_max")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_mod.n_max = std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_mod_n_match")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_mod.n_match = std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
|
||||
// --- ngram_map_k family (upstream --spec-ngram-map-k-*) ---
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k_size_n")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k.size_n = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k_size_m")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k.size_m = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k_min_hits")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k.min_hits = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
|
||||
// --- ngram_map_k4v family (upstream --spec-ngram-map-k4v-*) ---
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k4v_size_n")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k4v.size_n = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k4v_size_m")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k4v.size_m = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "spec_ngram_map_k4v_min_hits")) {
|
||||
if (optval != NULL) {
|
||||
try { params.speculative.ngram_map_k4v.min_hits = (uint16_t)std::stoi(optval_str); } catch (...) {}
|
||||
}
|
||||
|
||||
// --- ngram lookup caches (upstream --lookup-cache-static / -dynamic) ---
|
||||
} else if (!strcmp(optname, "spec_lookup_cache_static") || !strcmp(optname, "lookup_cache_static")) {
|
||||
params.speculative.ngram_cache.lookup_cache_static = optval_str;
|
||||
} else if (!strcmp(optname, "spec_lookup_cache_dynamic") || !strcmp(optname, "lookup_cache_dynamic")) {
|
||||
params.speculative.ngram_cache.lookup_cache_dynamic = optval_str;
|
||||
|
||||
// --- draft model KV cache types (upstream --spec-draft-type-k / -v) ---
|
||||
} else if (!strcmp(optname, "draft_cache_type_k") || !strcmp(optname, "spec_draft_cache_type_k")) {
|
||||
params.speculative.draft.cache_type_k = kv_cache_type_from_str(optval_str);
|
||||
} else if (!strcmp(optname, "draft_cache_type_v") || !strcmp(optname, "spec_draft_cache_type_v")) {
|
||||
params.speculative.draft.cache_type_v = kv_cache_type_from_str(optval_str);
|
||||
|
||||
// --- draft model thread counts (upstream --spec-draft-threads / -batch) ---
|
||||
} else if (!strcmp(optname, "draft_threads") || !strcmp(optname, "spec_draft_threads")) {
|
||||
if (optval != NULL) {
|
||||
try {
|
||||
int n = std::stoi(optval_str);
|
||||
if (n <= 0) n = (int)std::thread::hardware_concurrency();
|
||||
params.speculative.draft.cpuparams.n_threads = n;
|
||||
} catch (...) {}
|
||||
}
|
||||
} else if (!strcmp(optname, "draft_threads_batch") || !strcmp(optname, "spec_draft_threads_batch")) {
|
||||
if (optval != NULL) {
|
||||
try {
|
||||
int n = std::stoi(optval_str);
|
||||
if (n <= 0) n = (int)std::thread::hardware_concurrency();
|
||||
params.speculative.draft.cpuparams_batch.n_threads = n;
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- draft model MoE on CPU (upstream --spec-draft-cpu-moe / --spec-draft-n-cpu-moe) ---
|
||||
} else if (!strcmp(optname, "draft_cpu_moe") || !strcmp(optname, "spec_draft_cpu_moe")) {
|
||||
// Bool-style flag: optval may be missing, "true"/"1"/"yes" enables.
|
||||
const bool enable = (optval == NULL) ||
|
||||
optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
|
||||
optval_str == "on" || optval_str == "enabled";
|
||||
if (enable) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
|
||||
}
|
||||
} else if (!strcmp(optname, "draft_n_cpu_moe") || !strcmp(optname, "spec_draft_n_cpu_moe")) {
|
||||
if (optval != NULL) {
|
||||
try {
|
||||
int n = std::stoi(optval_str);
|
||||
if (n < 0) n = 0;
|
||||
// Keep override-name storage alive for the lifetime of the params struct
|
||||
// (mirrors upstream arg.cpp behavior with a function-local static).
|
||||
static std::list<std::string> buft_overrides_draft;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
buft_overrides_draft.push_back(llm_ffn_exps_block_regex(i));
|
||||
params.speculative.draft.tensor_buft_overrides.push_back(
|
||||
{buft_overrides_draft.back().c_str(), ggml_backend_cpu_buffer_type()});
|
||||
}
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
|
||||
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
|
||||
// Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
|
||||
// We replicate upstream's parse_tensor_buffer_overrides (static in arg.cpp).
|
||||
ggml_backend_load_all();
|
||||
std::map<std::string, ggml_backend_buffer_type_t> buft_list;
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||
auto * dev = ggml_backend_dev_get(i);
|
||||
auto * buft = ggml_backend_dev_buffer_type(dev);
|
||||
if (buft) {
|
||||
buft_list[ggml_backend_buft_name(buft)] = buft;
|
||||
}
|
||||
}
|
||||
static std::list<std::string> draft_override_names;
|
||||
std::string cur;
|
||||
auto flush = [&](const std::string & spec) {
|
||||
auto pos = spec.find('=');
|
||||
if (pos == std::string::npos) return;
|
||||
const std::string name = spec.substr(0, pos);
|
||||
const std::string type = spec.substr(pos + 1);
|
||||
auto it = buft_list.find(type);
|
||||
if (it == buft_list.end()) return; // unknown buffer type: ignore
|
||||
draft_override_names.push_back(name);
|
||||
params.speculative.draft.tensor_buft_overrides.push_back(
|
||||
{draft_override_names.back().c_str(), it->second});
|
||||
};
|
||||
for (char c : optval_str) {
|
||||
if (c == ',') { if (!cur.empty()) { flush(cur); cur.clear(); } }
|
||||
else { cur.push_back(c); }
|
||||
}
|
||||
if (!cur.empty()) flush(cur);
|
||||
}
|
||||
#endif // LOCALAI_LEGACY_LLAMA_CPP_SPEC — closes the `else`/`#ifdef` opened at draft_ctx_size
|
||||
}
|
||||
|
||||
// Set params.n_parallel from environment variable if not set via options (fallback)
|
||||
@@ -2704,7 +2888,7 @@ public:
|
||||
|
||||
tasks.reserve(documents.size());
|
||||
for (size_t i = 0; i < documents.size(); i++) {
|
||||
auto tmp = format_prompt_rerank(ctx_server.impl->model, ctx_server.impl->vocab, ctx_server.impl->mctx, request->query(), documents[i]);
|
||||
auto tmp = format_prompt_rerank(ctx_server.impl->model_tgt, ctx_server.impl->vocab, ctx_server.impl->mctx, request->query(), documents[i]);
|
||||
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
|
||||
task.id = rd.queue_tasks.get_new_id();
|
||||
task.index = i;
|
||||
@@ -2882,7 +3066,7 @@ public:
|
||||
// Get template source and reconstruct a common_chat_template for analysis
|
||||
std::string tmpl_src = common_chat_templates_source(ctx_server.impl->chat_params.tmpls.get());
|
||||
if (!tmpl_src.empty()) {
|
||||
const auto * vocab = llama_model_get_vocab(ctx_server.impl->model);
|
||||
const auto * vocab = llama_model_get_vocab(ctx_server.impl->model_tgt);
|
||||
std::string token_bos, token_eos;
|
||||
if (vocab) {
|
||||
auto bos_id = llama_vocab_bos(vocab);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
# Pinned to the HEAD of feature/turboquant-kv-cache on https://github.com/TheTom/llama-cpp-turboquant.
|
||||
# Auto-bumped nightly by .github/workflows/bump_deps.yaml.
|
||||
TURBOQUANT_VERSION?=69d8e4be47243e83b3d0d71e932bc7aa61c644dc
|
||||
TURBOQUANT_VERSION?=5aeb2fdbe26cd4c534c6fa15de73cb5749bd0403
|
||||
LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -108,4 +108,47 @@ else
|
||||
echo "==> $SRC has no post-#22397 speculative field refs, skipping spec rename patch"
|
||||
fi
|
||||
|
||||
# 4. Revert the `ctx_server.impl->model_tgt` rename introduced by upstream
|
||||
# ggml-org/llama.cpp#22838 (parallel drafting). The turboquant fork still
|
||||
# exposes the field as `model` on `server_context_impl`. The two call sites
|
||||
# are in the Rerank and ModelMetadata RPC handlers.
|
||||
if grep -q 'ctx_server\.impl->model_tgt' "$SRC"; then
|
||||
echo "==> patching $SRC to revert ctx_server.impl->model_tgt -> ctx_server.impl->model"
|
||||
sed -E 's/ctx_server\.impl->model_tgt/ctx_server.impl->model/g' "$SRC" > "$SRC.tmp"
|
||||
mv "$SRC.tmp" "$SRC"
|
||||
echo "==> model_tgt rename OK"
|
||||
else
|
||||
echo "==> $SRC has no ctx_server.impl->model_tgt refs, skipping model_tgt rename patch"
|
||||
fi
|
||||
|
||||
# 5. Define LOCALAI_LEGACY_LLAMA_CPP_SPEC at the top of the file so the
|
||||
# grpc-server option parser skips the new option-handler blocks (ngram_mod,
|
||||
# ngram_map_k, ngram_map_k4v, ngram_cache, draft.cache_type_*, draft.cpuparams*,
|
||||
# draft.tensor_buft_overrides) introduced for the post-#22838 layout. Those
|
||||
# blocks reference struct fields that simply do not exist in the fork.
|
||||
if grep -q '^#define LOCALAI_LEGACY_LLAMA_CPP_SPEC' "$SRC"; then
|
||||
echo "==> $SRC already defines LOCALAI_LEGACY_LLAMA_CPP_SPEC, skipping"
|
||||
else
|
||||
echo "==> patching $SRC to define LOCALAI_LEGACY_LLAMA_CPP_SPEC at the top"
|
||||
# Insert the define before the very first `#include` so it precedes all the
|
||||
# speculative-decoding code paths.
|
||||
awk '
|
||||
!done && /^#include/ {
|
||||
print "#define LOCALAI_LEGACY_LLAMA_CPP_SPEC 1"
|
||||
print "// ^ injected by backend/cpp/turboquant/patch-grpc-server.sh"
|
||||
print ""
|
||||
done = 1
|
||||
}
|
||||
{ print }
|
||||
END {
|
||||
if (!done) {
|
||||
print "patch-grpc-server.sh: no #include anchor found to insert LOCALAI_LEGACY_LLAMA_CPP_SPEC" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
' "$SRC" > "$SRC.tmp"
|
||||
mv "$SRC.tmp" "$SRC"
|
||||
echo "==> LOCALAI_LEGACY_LLAMA_CPP_SPEC define OK"
|
||||
fi
|
||||
|
||||
echo "==> all patches applied"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=c33c5618b72bb345df029b730b36bc0e369845a3
|
||||
WHISPER_CPP_VERSION?=3e9b7d0fef3528ee2208da3cdb873a2c53d2ae2f
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -847,6 +847,35 @@
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-vibevoice"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vibevoice"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4
|
||||
- &liquid-audio
|
||||
urls:
|
||||
- https://github.com/Liquid4All/liquid-audio
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
description: |
|
||||
LiquidAI LFM2 / LFM2.5 Audio Python backend. End-to-end speech-to-speech, ASR,
|
||||
TTS (4 baked voices), and text chat from a single 1.5B model. Wraps the
|
||||
upstream `liquid-audio` package; supports fine-tuning via LocalAI's
|
||||
/v1/fine-tuning/jobs endpoint.
|
||||
tags:
|
||||
- speech-to-speech
|
||||
- any-to-any
|
||||
- text-to-speech
|
||||
- speech-to-text
|
||||
- TTS
|
||||
- ASR
|
||||
- realtime
|
||||
license: LFM-Open-License-v1.0
|
||||
name: "liquid-audio"
|
||||
alias: "liquid-audio"
|
||||
capabilities:
|
||||
nvidia: "cuda12-liquid-audio"
|
||||
intel: "intel-liquid-audio"
|
||||
amd: "rocm-liquid-audio"
|
||||
default: "cpu-liquid-audio"
|
||||
nvidia-cuda-13: "cuda13-liquid-audio"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||
- &qwen-tts
|
||||
urls:
|
||||
- https://github.com/QwenLM/Qwen3-TTS
|
||||
@@ -3437,6 +3466,77 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-vibevoice"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-vibevoice
|
||||
## liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "liquid-audio-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-liquid-audio-development"
|
||||
intel: "intel-liquid-audio-development"
|
||||
amd: "rocm-liquid-audio-development"
|
||||
default: "cpu-liquid-audio-development"
|
||||
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cpu-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cpu-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda12-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda12-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-12-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda13-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda13-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "intel-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "intel-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "rocm-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-rocm-hipblas-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "rocm-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-rocm-hipblas-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-liquid-audio
|
||||
## qwen-tts
|
||||
- !!merge <<: *qwen-tts
|
||||
name: "qwen-tts-development"
|
||||
|
||||
23
backend/python/liquid-audio/Makefile
Normal file
23
backend/python/liquid-audio/Makefile
Normal file
@@ -0,0 +1,23 @@
|
||||
.PHONY: liquid-audio
|
||||
liquid-audio:
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: liquid-audio
|
||||
@echo "Running liquid-audio..."
|
||||
bash run.sh
|
||||
@echo "liquid-audio run."
|
||||
|
||||
.PHONY: test
|
||||
test: liquid-audio
|
||||
@echo "Testing liquid-audio..."
|
||||
bash test.sh
|
||||
@echo "liquid-audio tested."
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
871
backend/python/liquid-audio/backend.py
Normal file
871
backend/python/liquid-audio/backend.py
Normal file
@@ -0,0 +1,871 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Liquid Audio backend for LocalAI.
|
||||
|
||||
Wraps LiquidAI's `liquid-audio` Python package (https://github.com/Liquid4All/liquid-audio).
|
||||
The same model serves four roles, selected by the `mode` option at load time:
|
||||
chat, asr, tts, s2s. Fine-tuning is exposed via StartFineTune.
|
||||
"""
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
|
||||
import grpc
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'common'))
|
||||
from grpc_auth import get_auth_interceptors # noqa: E402
|
||||
from python_utils import parse_options # noqa: E402
|
||||
|
||||
import backend_pb2 # noqa: E402
|
||||
import backend_pb2_grpc # noqa: E402
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Voice id → system-prompt suffix. The model only ships these four voices.
|
||||
VOICE_PROMPTS = {
|
||||
"us_male": "Perform TTS. Use the US male voice.",
|
||||
"us_female": "Perform TTS. Use the US female voice.",
|
||||
"uk_male": "Perform TTS. Use the UK male voice.",
|
||||
"uk_female": "Perform TTS. Use the UK female voice.",
|
||||
}
|
||||
DEFAULT_VOICE = "us_female"
|
||||
|
||||
# Special-token IDs that LFM2-Audio emits to delimit modality boundaries.
|
||||
# Sourced from liquid_audio/model/lfm2_audio.py (see generate_sequential/_sample_*).
|
||||
TEXT_END_TOKEN = 130 # <|text_end|>
|
||||
AUDIO_START_TOKEN = 128 # <|audio_start|>
|
||||
IM_END_TOKEN = 7 # <|im_end|>
|
||||
AUDIO_EOS_CODE = 2048 # signals end-of-audio in any codebook position
|
||||
|
||||
_PATCHED_LOCAL_PATHS = False
|
||||
|
||||
|
||||
def _patch_liquid_audio_local_paths():
|
||||
"""Make liquid_audio.utils.get_model_dir() tolerate local directories.
|
||||
|
||||
Upstream always passes its argument to huggingface_hub.snapshot_download,
|
||||
which only accepts `owner/repo` ids. LocalAI's gallery hands us absolute
|
||||
paths under <ModelPath>/<owner>/<repo>, so we intercept snapshot_download
|
||||
in the liquid_audio.utils namespace and return the directory as-is when
|
||||
it already exists on disk. Idempotent.
|
||||
"""
|
||||
global _PATCHED_LOCAL_PATHS
|
||||
if _PATCHED_LOCAL_PATHS:
|
||||
return
|
||||
import liquid_audio.utils as _la_utils
|
||||
_orig_snapshot_download = _la_utils.snapshot_download
|
||||
|
||||
def _local_first_snapshot_download(repo_id, revision=None, **kwargs):
|
||||
if isinstance(repo_id, (str, os.PathLike)) and os.path.isdir(str(repo_id)):
|
||||
return str(repo_id)
|
||||
return _orig_snapshot_download(repo_id, revision=revision, **kwargs)
|
||||
|
||||
_la_utils.snapshot_download = _local_first_snapshot_download
|
||||
_PATCHED_LOCAL_PATHS = True
|
||||
|
||||
|
||||
def _select_device():
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
return "cuda"
|
||||
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
||||
return "mps"
|
||||
return "cpu"
|
||||
|
||||
|
||||
class ActiveJob:
|
||||
"""Tracks an in-flight fine-tune so FineTuneProgress can stream from its queue."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
self.job_id = job_id
|
||||
self.progress_queue = queue.Queue()
|
||||
self.thread = None
|
||||
self.stopped = False
|
||||
self.completed = False
|
||||
self.error = None
|
||||
|
||||
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def __init__(self):
|
||||
self.processor = None
|
||||
self.model = None
|
||||
self.device = "cpu"
|
||||
self.dtype = None
|
||||
self.options = {}
|
||||
self.model_id = None
|
||||
self.active_job = None
|
||||
|
||||
@property
|
||||
def mode(self):
|
||||
return str(self.options.get("mode", "chat")).lower()
|
||||
|
||||
@property
|
||||
def voice(self):
|
||||
v = str(self.options.get("voice", DEFAULT_VOICE)).lower()
|
||||
return v if v in VOICE_PROMPTS else DEFAULT_VOICE
|
||||
|
||||
|
||||
def Free(self, request, context):
|
||||
# Called by LocalAI when unloading the model. Drop GPU tensors so the
|
||||
# next load starts from a clean state instead of bumping into OOM.
|
||||
try:
|
||||
for attr in ("model", "processor", "tokenizer"):
|
||||
if hasattr(self, attr):
|
||||
try:
|
||||
delattr(self, attr)
|
||||
except Exception:
|
||||
pass
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
return backend_pb2.Result(success=True, message="OK")
|
||||
except Exception as exc:
|
||||
print(f"Free failed: {exc}", file=sys.stderr)
|
||||
return backend_pb2.Result(success=False, message=str(exc))
|
||||
|
||||
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
import torch
|
||||
|
||||
self.options = parse_options(request.Options)
|
||||
if self.options.get("voice") and self.options["voice"] not in VOICE_PROMPTS:
|
||||
print(f"Warning: unknown voice '{self.options['voice']}'; defaulting to '{DEFAULT_VOICE}'",
|
||||
file=sys.stderr)
|
||||
|
||||
requested_device = self.options.get("device")
|
||||
self.device = requested_device or _select_device()
|
||||
if self.device == "cuda" and not torch.cuda.is_available():
|
||||
return backend_pb2.Result(success=False, message="CUDA requested but not available")
|
||||
if self.device == "mps" and not (hasattr(torch.backends, "mps") and
|
||||
torch.backends.mps.is_available()):
|
||||
print("MPS not available; falling back to CPU", file=sys.stderr)
|
||||
self.device = "cpu"
|
||||
|
||||
dtype_name = str(self.options.get("dtype", "bfloat16")).lower()
|
||||
self.dtype = {
|
||||
"bfloat16": torch.bfloat16,
|
||||
"bf16": torch.bfloat16,
|
||||
"float16": torch.float16,
|
||||
"fp16": torch.float16,
|
||||
"half": torch.float16,
|
||||
"float32": torch.float32,
|
||||
"fp32": torch.float32,
|
||||
}.get(dtype_name, torch.bfloat16)
|
||||
|
||||
# request.Model holds the raw `parameters.model` value (an HF
|
||||
# repo id like "LiquidAI/LFM2.5-Audio-1.5B"); request.ModelFile
|
||||
# is LocalAI's ModelPath-prefixed local copy that exists only
|
||||
# when the gallery supplied a `files:` list. Mirror the
|
||||
# transformers/vibevoice convention: prefer the repo id and
|
||||
# only switch to the local path if it's been staged on disk.
|
||||
model_id = request.Model
|
||||
if not model_id:
|
||||
model_id = request.ModelFile
|
||||
if not model_id:
|
||||
return backend_pb2.Result(success=False, message="No model identifier provided")
|
||||
if request.ModelFile and os.path.isdir(request.ModelFile):
|
||||
model_id = request.ModelFile
|
||||
self.model_id = model_id
|
||||
|
||||
# Pure fine-tune jobs don't need an in-memory inference model — the
|
||||
# Trainer instantiates its own copy at StartFineTune time.
|
||||
if self.mode == "finetune":
|
||||
print(f"Loaded liquid-audio backend in fine-tune mode (model id: {model_id})",
|
||||
file=sys.stderr)
|
||||
return backend_pb2.Result(success=True, message="OK")
|
||||
|
||||
from liquid_audio import LFM2AudioModel, LFM2AudioProcessor
|
||||
|
||||
# liquid_audio's from_pretrained unconditionally routes through
|
||||
# huggingface_hub.snapshot_download, which rejects local paths
|
||||
# (HFValidationError on `/models/LiquidAI/LFM2.5-Audio-1.5B`).
|
||||
# When LocalAI's gallery has already staged the weights on disk,
|
||||
# short-circuit the download to return the local directory.
|
||||
_patch_liquid_audio_local_paths()
|
||||
|
||||
print(f"Loading liquid-audio model '{model_id}' on {self.device} ({self.dtype})",
|
||||
file=sys.stderr)
|
||||
self.processor = LFM2AudioProcessor.from_pretrained(model_id, device=self.device).eval()
|
||||
self.model = LFM2AudioModel.from_pretrained(
|
||||
model_id, device=self.device, dtype=self.dtype
|
||||
).eval()
|
||||
|
||||
print(f"Liquid-audio mode={self.mode}, voice={self.voice}", file=sys.stderr)
|
||||
return backend_pb2.Result(success=True, message="OK")
|
||||
|
||||
except Exception as exc:
|
||||
print(f"LoadModel failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
return backend_pb2.Result(success=False, message=str(exc))
|
||||
|
||||
|
||||
def Predict(self, request, context):
|
||||
try:
|
||||
text = "".join(self._generate_text_stream(request))
|
||||
return backend_pb2.Reply(message=text.encode("utf-8"))
|
||||
except Exception as exc:
|
||||
print(f"Predict failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
context.set_code(grpc.StatusCode.INTERNAL)
|
||||
context.set_details(str(exc))
|
||||
return backend_pb2.Reply()
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
try:
|
||||
for delta in self._generate_text_stream(request):
|
||||
yield backend_pb2.Reply(message=delta.encode("utf-8"))
|
||||
except Exception as exc:
|
||||
print(f"PredictStream failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
context.set_code(grpc.StatusCode.INTERNAL)
|
||||
context.set_details(str(exc))
|
||||
|
||||
|
||||
def VAD(self, request, context):
|
||||
# Stub voice-activity detector: RMS-energy threshold over 30ms frames at
|
||||
# 16 kHz. Good enough for the realtime endpoint's handleVAD loop, which
|
||||
# only inspects segment presence + last segment end. The proper signal
|
||||
# would come from the model's audio encoder, but that ride-along is a
|
||||
# PR-D scope item — until then this keeps the legacy pipeline path
|
||||
# working without forcing the operator to install a separate VAD model.
|
||||
import numpy as np
|
||||
try:
|
||||
audio = np.asarray(request.audio, dtype=np.float32)
|
||||
if audio.size == 0:
|
||||
return backend_pb2.VADResponse(segments=[])
|
||||
|
||||
sample_rate = 16000
|
||||
frame_size = sample_rate * 30 // 1000 # 30ms → 480 samples
|
||||
threshold = float(self.options.get("vad_rms_threshold", 0.01))
|
||||
min_speech_frames = int(self.options.get("vad_min_speech_frames", 2)) # ≥60ms
|
||||
# handleVAD ticks every 300 ms and only inspects segment presence
|
||||
# + last segment end relative to silence_threshold (~500 ms). Cap
|
||||
# the analysed window to the tail of the buffer so we don't redo
|
||||
# the entire growing utterance every tick.
|
||||
window_s = float(self.options.get("vad_window_s", 5.0))
|
||||
window_samples = int(window_s * sample_rate)
|
||||
time_offset_s = 0.0
|
||||
if audio.size > window_samples:
|
||||
time_offset_s = (audio.size - window_samples) / sample_rate
|
||||
audio = audio[-window_samples:]
|
||||
|
||||
n_frames = audio.size // frame_size
|
||||
if n_frames == 0:
|
||||
return backend_pb2.VADResponse(segments=[])
|
||||
frames = audio[: n_frames * frame_size].reshape(n_frames, frame_size)
|
||||
rms = np.sqrt(np.mean(frames ** 2, axis=1))
|
||||
speech = rms > threshold
|
||||
|
||||
def _emit(start_idx, end_idx, out):
|
||||
if end_idx - start_idx >= min_speech_frames:
|
||||
out.append(backend_pb2.VADSegment(
|
||||
start=time_offset_s + start_idx * frame_size / sample_rate,
|
||||
end=time_offset_s + end_idx * frame_size / sample_rate,
|
||||
))
|
||||
|
||||
segments = []
|
||||
start_idx = None
|
||||
for i, is_speech in enumerate(speech):
|
||||
if is_speech and start_idx is None:
|
||||
start_idx = i
|
||||
elif not is_speech and start_idx is not None:
|
||||
_emit(start_idx, i, segments)
|
||||
start_idx = None
|
||||
if start_idx is not None:
|
||||
_emit(start_idx, n_frames, segments)
|
||||
return backend_pb2.VADResponse(segments=segments)
|
||||
except Exception as exc:
|
||||
print(f"VAD failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
context.set_code(grpc.StatusCode.INTERNAL)
|
||||
context.set_details(str(exc))
|
||||
return backend_pb2.VADResponse(segments=[])
|
||||
|
||||
|
||||
def TTS(self, request, context):
|
||||
try:
|
||||
if self.model is None or self.processor is None:
|
||||
return backend_pb2.Result(success=False, message="Model not loaded")
|
||||
|
||||
import torch
|
||||
import torchaudio
|
||||
from liquid_audio import ChatState
|
||||
|
||||
voice = request.voice.lower() if request.voice else self.voice
|
||||
voice = voice.removeprefix("lfm2:").removeprefix("lfm:")
|
||||
if voice not in VOICE_PROMPTS:
|
||||
voice = self.voice
|
||||
system_prompt = VOICE_PROMPTS[voice]
|
||||
|
||||
chat = ChatState(self.processor)
|
||||
chat.new_turn("system")
|
||||
chat.add_text(system_prompt)
|
||||
chat.end_turn()
|
||||
chat.new_turn("user")
|
||||
chat.add_text(request.text or "")
|
||||
chat.end_turn()
|
||||
chat.new_turn("assistant")
|
||||
|
||||
audio_top_k = int(self.options.get("audio_top_k", 64))
|
||||
audio_temp = float(self.options.get("audio_temperature", 0.8))
|
||||
max_new = int(self.options.get("max_new_tokens", 2048))
|
||||
|
||||
audio_out = []
|
||||
for tok in self.model.generate_sequential(
|
||||
**chat,
|
||||
max_new_tokens=max_new,
|
||||
audio_temperature=audio_temp,
|
||||
audio_top_k=audio_top_k,
|
||||
):
|
||||
if tok.numel() > 1:
|
||||
audio_out.append(tok)
|
||||
|
||||
if len(audio_out) <= 1:
|
||||
return backend_pb2.Result(success=False, message="No audio frames generated")
|
||||
|
||||
# Drop the trailing end-of-audio frame, matching the package's examples.
|
||||
audio_codes = torch.stack(audio_out[:-1], 1).unsqueeze(0)
|
||||
waveform = self.processor.decode(audio_codes)
|
||||
|
||||
out_path = request.dst
|
||||
if not out_path:
|
||||
return backend_pb2.Result(success=False, message="dst path is required")
|
||||
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
|
||||
# soundfile in preference to torchaudio.save — the latter routes
|
||||
# through torchcodec, whose native libs need NVIDIA NPP that we
|
||||
# don't bundle in the cuda13 image.
|
||||
import soundfile as _sf
|
||||
_sf.write(out_path, waveform.cpu().numpy().squeeze(0).T, 24_000)
|
||||
|
||||
return backend_pb2.Result(success=True)
|
||||
except Exception as exc:
|
||||
print(f"TTS failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
return backend_pb2.Result(success=False, message=str(exc))
|
||||
|
||||
|
||||
def AudioToAudioStream(self, request_iterator, context):
|
||||
"""Bidirectional any-to-any speech-to-speech stream.
|
||||
|
||||
See `backend.proto` AudioToAudioStream for the wire protocol. Audio
|
||||
is decoded once per turn here; chunked detokenization for sub-second
|
||||
TTFB is left to a future iteration once the LFM2AudioDetokenizer
|
||||
gains a streaming entry point.
|
||||
"""
|
||||
try:
|
||||
yield from self._audio_to_audio_stream(request_iterator, context)
|
||||
except Exception as exc:
|
||||
print(f"AudioToAudioStream failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
yield backend_pb2.AudioToAudioResponse(
|
||||
event="error",
|
||||
meta=json.dumps({"message": str(exc)}).encode("utf-8"),
|
||||
)
|
||||
|
||||
def _audio_to_audio_stream(self, request_iterator, context):
|
||||
if self.model is None or self.processor is None:
|
||||
raise RuntimeError("Model not loaded")
|
||||
|
||||
import torch
|
||||
import torchaudio
|
||||
from liquid_audio import ChatState
|
||||
|
||||
cfg = None
|
||||
chat = None
|
||||
input_sample_rate = 16000
|
||||
output_sample_rate = 24000
|
||||
sequence = 0
|
||||
|
||||
def _new_event(event, **kwargs):
|
||||
nonlocal sequence
|
||||
sequence += 1
|
||||
kwargs.setdefault("sequence", sequence)
|
||||
return backend_pb2.AudioToAudioResponse(event=event, **kwargs)
|
||||
|
||||
def _ensure_chat():
|
||||
"""Build a fresh ChatState seeded with the system prompt."""
|
||||
nonlocal chat
|
||||
chat = ChatState(self.processor)
|
||||
system_prompt = (cfg.system_prompt if cfg and cfg.system_prompt
|
||||
else "Respond with interleaved text and audio.")
|
||||
chat.new_turn("system")
|
||||
chat.add_text(system_prompt)
|
||||
chat.end_turn()
|
||||
|
||||
# Buffers for the in-flight user turn
|
||||
pcm_buffer = bytearray()
|
||||
|
||||
def _consume_user_turn():
|
||||
nonlocal pcm_buffer
|
||||
if not pcm_buffer:
|
||||
return
|
||||
# Avoid the bytes(pcm_buffer) copy and let the float widen happen
|
||||
# in-place: numpy view → torch view → in-place divide.
|
||||
import numpy as np
|
||||
arr = np.frombuffer(memoryview(pcm_buffer), dtype=np.int16)
|
||||
wav = torch.from_numpy(arr).to(torch.float32).div_(32768.0).unsqueeze(0)
|
||||
chat.new_turn("user")
|
||||
chat.add_audio(wav, input_sample_rate)
|
||||
chat.end_turn()
|
||||
pcm_buffer = bytearray()
|
||||
|
||||
def _run_generation():
|
||||
"""Run generate_interleaved; yield response events as we go."""
|
||||
chat.new_turn("assistant")
|
||||
audio_top_k = int(self.options.get("audio_top_k", 4))
|
||||
audio_temp = float(self.options.get("audio_temperature", 1.0))
|
||||
text_top_k = int(self.options.get("text_top_k", 0)) or None
|
||||
text_temp = float(self.options.get("text_temperature", 0)) or None
|
||||
max_new = int(self.options.get("max_new_tokens", 512))
|
||||
|
||||
audio_tokens = []
|
||||
for tok in self.model.generate_interleaved(
|
||||
**chat,
|
||||
max_new_tokens=max_new,
|
||||
text_temperature=text_temp,
|
||||
text_top_k=text_top_k,
|
||||
audio_temperature=audio_temp,
|
||||
audio_top_k=audio_top_k,
|
||||
):
|
||||
if tok.numel() == 1:
|
||||
if tok.item() == IM_END_TOKEN:
|
||||
break
|
||||
text = self.processor.text.decode(tok)
|
||||
if not text:
|
||||
continue
|
||||
yield _new_event(
|
||||
"response.audio_transcript.delta",
|
||||
meta=json.dumps({"delta": text}).encode("utf-8"),
|
||||
)
|
||||
else:
|
||||
audio_tokens.append(tok)
|
||||
|
||||
# Detokenize the accumulated audio at end-of-turn — the
|
||||
# LFM2AudioDetokenizer is non-streaming today.
|
||||
if len(audio_tokens) > 1:
|
||||
audio_codes = torch.stack(audio_tokens[:-1], 1).unsqueeze(0)
|
||||
waveform = self.processor.decode(audio_codes)
|
||||
# Convert to s16le PCM bytes at output_sample_rate
|
||||
if output_sample_rate != 24000:
|
||||
waveform = torchaudio.functional.resample(
|
||||
waveform.cpu(), 24000, output_sample_rate
|
||||
)
|
||||
pcm = (waveform.cpu().squeeze(0).clamp(-1, 1) * 32767.0).to(
|
||||
torch.int16
|
||||
).numpy().tobytes()
|
||||
yield _new_event(
|
||||
"response.audio.delta",
|
||||
pcm=pcm,
|
||||
sample_rate=output_sample_rate,
|
||||
)
|
||||
|
||||
yield _new_event("response.done", meta=b"{}")
|
||||
|
||||
for req in request_iterator:
|
||||
if not context.is_active():
|
||||
return
|
||||
payload = req.WhichOneof("payload")
|
||||
if payload == "config":
|
||||
cfg = req.config
|
||||
if cfg.input_sample_rate > 0:
|
||||
input_sample_rate = cfg.input_sample_rate
|
||||
if cfg.output_sample_rate > 0:
|
||||
output_sample_rate = cfg.output_sample_rate
|
||||
# The first config implicitly resets state.
|
||||
_ensure_chat()
|
||||
pcm_buffer = bytearray()
|
||||
elif payload == "frame":
|
||||
if chat is None:
|
||||
_ensure_chat()
|
||||
if req.frame.pcm:
|
||||
pcm_buffer.extend(req.frame.pcm)
|
||||
if req.frame.end_of_input:
|
||||
_consume_user_turn()
|
||||
yield from _run_generation()
|
||||
elif payload == "control":
|
||||
event = req.control.event
|
||||
if event == "input_audio_buffer.commit":
|
||||
_consume_user_turn()
|
||||
yield from _run_generation()
|
||||
elif event == "response.cancel":
|
||||
# Synchronous generation here means cancel can only
|
||||
# take effect between turns; we ack so the client unblocks.
|
||||
yield _new_event("response.done", meta=b'{"cancelled":true}')
|
||||
elif event == "session.update":
|
||||
# Free-form session re-config; treat as a soft reset.
|
||||
_ensure_chat()
|
||||
pcm_buffer = bytearray()
|
||||
# Unknown events are ignored — forward-compatible.
|
||||
|
||||
|
||||
def AudioTranscription(self, request, context):
|
||||
try:
|
||||
if self.model is None or self.processor is None:
|
||||
return backend_pb2.TranscriptResult(segments=[], text="")
|
||||
|
||||
import torchaudio
|
||||
from liquid_audio import ChatState
|
||||
|
||||
audio_path = request.dst
|
||||
if not audio_path:
|
||||
return backend_pb2.TranscriptResult(segments=[], text="")
|
||||
|
||||
chat = ChatState(self.processor)
|
||||
chat.new_turn("system")
|
||||
chat.add_text("Perform ASR.")
|
||||
chat.end_turn()
|
||||
chat.new_turn("user")
|
||||
# soundfile in preference to torchaudio.load — the latter routes
|
||||
# through torchcodec which needs NVIDIA NPP libs we don't bundle.
|
||||
import soundfile as _sf
|
||||
import torch
|
||||
audio_np, sr = _sf.read(audio_path, dtype="float32", always_2d=True)
|
||||
wav = torch.from_numpy(audio_np.T) # (channels, samples)
|
||||
if wav.shape[0] > 1:
|
||||
# Down-mix to mono — the processor expects a single channel
|
||||
wav = wav.mean(dim=0, keepdim=True)
|
||||
chat.add_audio(wav, sr)
|
||||
chat.end_turn()
|
||||
chat.new_turn("assistant")
|
||||
|
||||
max_new = int(self.options.get("max_new_tokens", 1024))
|
||||
|
||||
pieces = []
|
||||
for tok in self.model.generate_sequential(**chat, max_new_tokens=max_new):
|
||||
if tok.numel() == 1:
|
||||
if tok.item() == IM_END_TOKEN:
|
||||
break
|
||||
pieces.append(self.processor.text.decode(tok))
|
||||
|
||||
text = "".join(pieces).strip()
|
||||
duration_ms = int((wav.shape[1] / sr) * 1000)
|
||||
segment = backend_pb2.TranscriptSegment(
|
||||
id=0, start=0, end=duration_ms, text=text, tokens=[],
|
||||
)
|
||||
return backend_pb2.TranscriptResult(segments=[segment], text=text)
|
||||
except Exception as exc:
|
||||
print(f"AudioTranscription failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
return backend_pb2.TranscriptResult(segments=[], text="")
|
||||
|
||||
|
||||
def StartFineTune(self, request, context):
|
||||
if self.active_job is not None and not self.active_job.completed:
|
||||
return backend_pb2.FineTuneJobResult(
|
||||
job_id="", success=False,
|
||||
message="A fine-tuning job is already running",
|
||||
)
|
||||
|
||||
job_id = request.job_id or str(uuid.uuid4())
|
||||
job = ActiveJob(job_id)
|
||||
self.active_job = job
|
||||
|
||||
thread = threading.Thread(target=self._run_training, args=(request, job), daemon=True)
|
||||
job.thread = thread
|
||||
thread.start()
|
||||
|
||||
return backend_pb2.FineTuneJobResult(
|
||||
job_id=job_id, success=True, message="Training started",
|
||||
)
|
||||
|
||||
def FineTuneProgress(self, request, context):
|
||||
if self.active_job is None or self.active_job.job_id != request.job_id:
|
||||
context.set_code(grpc.StatusCode.NOT_FOUND)
|
||||
context.set_details(f"Job {request.job_id} not found")
|
||||
return
|
||||
|
||||
job = self.active_job
|
||||
while True:
|
||||
try:
|
||||
update = job.progress_queue.get(timeout=1.0)
|
||||
except queue.Empty:
|
||||
if job.completed or job.stopped:
|
||||
break
|
||||
if not context.is_active():
|
||||
break
|
||||
continue
|
||||
if update is None:
|
||||
break
|
||||
yield update
|
||||
if update.status in ("completed", "failed", "stopped"):
|
||||
break
|
||||
|
||||
def StopFineTune(self, request, context):
|
||||
# We can't kill the Accelerate training loop mid-step cleanly from here;
|
||||
# LocalAI's job manager kills the backend process on stop. The flag below
|
||||
# at least lets the progress stream terminate quickly.
|
||||
if self.active_job is not None and self.active_job.job_id == request.job_id:
|
||||
self.active_job.stopped = True
|
||||
self.active_job.progress_queue.put(None)
|
||||
return backend_pb2.Result(success=True, message="OK")
|
||||
|
||||
def _run_training(self, request, job):
|
||||
try:
|
||||
self._do_train(request, job)
|
||||
job.completed = True
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="completed", message="Training completed",
|
||||
progress_percent=100.0,
|
||||
))
|
||||
except Exception as exc:
|
||||
job.error = str(exc)
|
||||
job.completed = True
|
||||
print(f"Training failed: {exc}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="failed", message=str(exc),
|
||||
))
|
||||
finally:
|
||||
job.progress_queue.put(None)
|
||||
|
||||
def _do_train(self, request, job):
|
||||
from liquid_audio import LFM2AudioModel # noqa: F401 (sanity import)
|
||||
from liquid_audio.data.dataloader import LFM2DataLoader
|
||||
from liquid_audio.trainer import Trainer
|
||||
|
||||
model_id = request.model or self.model_id or "LiquidAI/LFM2.5-Audio-1.5B"
|
||||
|
||||
dataset_path = request.dataset_source
|
||||
if not dataset_path:
|
||||
raise ValueError("dataset_source is required (path to a preprocessed dataset)")
|
||||
|
||||
extras = dict(request.extra_options) if request.extra_options else {}
|
||||
val_path = extras.get("val_dataset")
|
||||
|
||||
# Map FineTuneRequest hyperparameters to liquid_audio.Trainer constructor args
|
||||
lr = request.learning_rate or 3e-5
|
||||
max_steps = request.max_steps or 1000
|
||||
warmup_steps = request.warmup_steps or min(100, max_steps // 10)
|
||||
batch_size = request.batch_size or 16
|
||||
save_interval = request.save_steps or max(1, max_steps // 4)
|
||||
|
||||
output_dir = request.output_dir or os.path.join(
|
||||
os.environ.get("LIQUID_AUDIO_OUTPUT_DIR", "/tmp"),
|
||||
f"liquid-audio-{job.job_id}",
|
||||
)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="loading_dataset",
|
||||
message=f"Loading preprocessed dataset from {dataset_path}",
|
||||
))
|
||||
train_data = LFM2DataLoader(dataset_path)
|
||||
val_data = LFM2DataLoader(val_path) if val_path else None
|
||||
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="loading_model",
|
||||
message=f"Loading base model {model_id}",
|
||||
))
|
||||
|
||||
# The Liquid Trainer logs via self.accelerator.print; we subclass it to
|
||||
# also push progress events onto the queue every logging_interval steps.
|
||||
progress_q = job.progress_queue
|
||||
|
||||
class QueuedTrainer(Trainer):
|
||||
def log(self_, model_output):
|
||||
if self_.step > 0 and self_.step % self_.logging_interval == 0:
|
||||
try:
|
||||
loss = self_.accelerator.reduce(
|
||||
model_output.loss.detach(), reduction="mean"
|
||||
).item()
|
||||
except Exception:
|
||||
loss = float("nan")
|
||||
lr_now = self_.optimizer.param_groups[0]["lr"]
|
||||
pct = (self_.step / self_.max_steps * 100.0) if self_.max_steps else 0.0
|
||||
progress_q.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id,
|
||||
current_step=int(self_.step),
|
||||
total_steps=int(self_.max_steps),
|
||||
current_epoch=float(self_.epoch),
|
||||
loss=float(loss),
|
||||
learning_rate=float(lr_now),
|
||||
progress_percent=float(pct),
|
||||
status="training",
|
||||
))
|
||||
# Honour stop requests: raising here terminates the loop cleanly
|
||||
if job.stopped:
|
||||
raise KeyboardInterrupt("stop requested")
|
||||
return super().log(model_output)
|
||||
|
||||
def validate(self_):
|
||||
progress_q.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, current_step=int(self_.step),
|
||||
total_steps=int(self_.max_steps), status="training",
|
||||
message=f"Running validation at step {self_.step}",
|
||||
))
|
||||
return super().validate()
|
||||
|
||||
trainer = QueuedTrainer(
|
||||
model_id=model_id,
|
||||
train_data=train_data,
|
||||
val_data=val_data,
|
||||
lr=lr,
|
||||
max_steps=max_steps,
|
||||
warmup_steps=warmup_steps,
|
||||
batch_size=batch_size,
|
||||
save_interval=save_interval,
|
||||
output_dir=output_dir,
|
||||
weight_decay=request.weight_decay or 0.1,
|
||||
)
|
||||
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="training", message="Training started",
|
||||
total_steps=int(max_steps),
|
||||
))
|
||||
trainer.train()
|
||||
|
||||
job.progress_queue.put(backend_pb2.FineTuneProgressUpdate(
|
||||
job_id=job.job_id, status="saving",
|
||||
message=f"Saved final model to {output_dir}",
|
||||
checkpoint_path=os.path.join(output_dir, "final"),
|
||||
))
|
||||
|
||||
|
||||
def _build_chat_state(self, messages, user_prompt, tools_prelude=None):
|
||||
"""Build a ChatState from a list of (role, content) tuples plus an optional final user turn.
|
||||
|
||||
tools_prelude, when non-empty, is prepended as an extra system turn carrying
|
||||
the LFM2 tool-list block — mirrors gallery/lfm.yaml's `function:` template
|
||||
so the model sees the same prompt shape whether served via llama-cpp or here.
|
||||
"""
|
||||
from liquid_audio import ChatState
|
||||
chat = ChatState(self.processor)
|
||||
if tools_prelude:
|
||||
chat.new_turn("system")
|
||||
chat.add_text(tools_prelude)
|
||||
chat.end_turn()
|
||||
for role, content in messages:
|
||||
chat.new_turn(role)
|
||||
chat.add_text(content)
|
||||
chat.end_turn()
|
||||
if user_prompt:
|
||||
chat.new_turn("user")
|
||||
chat.add_text(user_prompt)
|
||||
chat.end_turn()
|
||||
chat.new_turn("assistant")
|
||||
return chat
|
||||
|
||||
def _collect_messages(self, request):
|
||||
"""Translate PredictOptions.Messages into (role, content) tuples."""
|
||||
out = []
|
||||
for m in request.Messages:
|
||||
role = (m.role or "user").lower()
|
||||
if role not in ("system", "user", "assistant"):
|
||||
role = "user"
|
||||
out.append((role, m.content or ""))
|
||||
return out
|
||||
|
||||
def _render_tools_prelude(self, request):
|
||||
"""Build the LFM2 `<|tool_list_start|>…<|tool_list_end|>` system prelude
|
||||
from request.Tools (OpenAI Chat-Completions tool JSON). Returns "" when
|
||||
no tools are attached. Output mirrors gallery/lfm.yaml's `function:`
|
||||
template so the model sees the same prompt whether routed via llama-cpp
|
||||
or this backend."""
|
||||
tools_raw = getattr(request, "Tools", "") or ""
|
||||
if not tools_raw:
|
||||
return ""
|
||||
try:
|
||||
tools = json.loads(tools_raw)
|
||||
except json.JSONDecodeError:
|
||||
print(f"liquid-audio: ignoring malformed Tools JSON: {tools_raw[:200]!r}",
|
||||
file=sys.stderr)
|
||||
return ""
|
||||
if not isinstance(tools, list) or not tools:
|
||||
return ""
|
||||
# The LFM2 chat template uses single-quoted Python-dict-ish syntax in
|
||||
# examples, but the tokenizer treats this whole block as opaque text;
|
||||
# JSON works fine and is what other backends emit.
|
||||
return (
|
||||
"You are a function calling AI model. You are provided with functions to "
|
||||
"execute. You may call one or more functions to assist with the user query. "
|
||||
"Don't make assumptions about what values to plug into functions.\n"
|
||||
"List of tools: <|tool_list_start|>"
|
||||
+ json.dumps(tools, separators=(",", ":"))
|
||||
+ "<|tool_list_end|>"
|
||||
)
|
||||
|
||||
def _generate_text_stream(self, request):
|
||||
"""Yield text-only deltas from generate_sequential. Caller joins for unary Predict."""
|
||||
if self.model is None or self.processor is None:
|
||||
raise RuntimeError("Model not loaded")
|
||||
messages = self._collect_messages(request)
|
||||
user_prompt = request.Prompt or None
|
||||
tools_prelude = self._render_tools_prelude(request)
|
||||
# If the request already carries Messages, Prompt is the templated form
|
||||
# of the same content — don't append a duplicate user turn.
|
||||
chat = self._build_chat_state(
|
||||
messages,
|
||||
user_prompt if not messages else None,
|
||||
tools_prelude=tools_prelude,
|
||||
)
|
||||
|
||||
max_new = request.Tokens if request.Tokens > 0 else int(self.options.get("max_new_tokens", 512))
|
||||
temperature = request.Temperature if request.Temperature > 0 else None
|
||||
top_k = request.TopK if request.TopK > 0 else None
|
||||
|
||||
for tok in self.model.generate_sequential(
|
||||
**chat,
|
||||
max_new_tokens=max_new,
|
||||
text_temperature=temperature,
|
||||
text_top_k=top_k,
|
||||
):
|
||||
if tok.numel() == 1:
|
||||
if tok.item() == IM_END_TOKEN:
|
||||
break
|
||||
yield self.processor.text.decode(tok)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(
|
||||
futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024),
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024),
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024),
|
||||
],
|
||||
interceptors=get_auth_interceptors(),
|
||||
)
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print(f"Liquid-audio backend listening on {address}", file=sys.stderr, flush=True)
|
||||
|
||||
def stop(_signum, _frame):
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, stop)
|
||||
signal.signal(signal.SIGINT, stop)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Liquid Audio gRPC backend")
|
||||
parser.add_argument("--addr", default="localhost:50051", help="gRPC server address")
|
||||
args = parser.parse_args()
|
||||
serve(args.addr)
|
||||
18
backend/python/liquid-audio/install.sh
Executable file
18
backend/python/liquid-audio/install.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# liquid-audio requires Python ≥ 3.12 (per its pyproject.toml); the default
|
||||
# portable Python in libbackend.sh is 3.10. Override before sourcing.
|
||||
export PYTHON_VERSION="${PYTHON_VERSION:-3.12}"
|
||||
export PYTHON_PATCH="${PYTHON_PATCH:-11}"
|
||||
|
||||
backend_dir=$(dirname $0)
|
||||
if [ -d $backend_dir/common ]; then
|
||||
source $backend_dir/common/libbackend.sh
|
||||
else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
installRequirements
|
||||
11
backend/python/liquid-audio/protogen.sh
Executable file
11
backend/python/liquid-audio/protogen.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
backend_dir=$(dirname $0)
|
||||
if [ -d $backend_dir/common ]; then
|
||||
source $backend_dir/common/libbackend.sh
|
||||
else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
runProtogen
|
||||
13
backend/python/liquid-audio/requirements-cpu.txt
Normal file
13
backend/python/liquid-audio/requirements-cpu.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
13
backend/python/liquid-audio/requirements-cublas12.txt
Normal file
13
backend/python/liquid-audio/requirements-cublas12.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu121
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
13
backend/python/liquid-audio/requirements-cublas13.txt
Normal file
13
backend/python/liquid-audio/requirements-cublas13.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
13
backend/python/liquid-audio/requirements-hipblas.txt
Normal file
13
backend/python/liquid-audio/requirements-hipblas.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
13
backend/python/liquid-audio/requirements-l4t13.txt
Normal file
13
backend/python/liquid-audio/requirements-l4t13.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp7/cu130
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
12
backend/python/liquid-audio/requirements-mps.txt
Normal file
12
backend/python/liquid-audio/requirements-mps.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
transformers>=4.55.4
|
||||
accelerate>=1.10.1
|
||||
datasets>=4.8.4
|
||||
einops>=0.8.1
|
||||
librosa>=0.11.0
|
||||
soundfile>=0.12.1
|
||||
sentencepiece>=0.2.1
|
||||
huggingface-hub>=1.3.0
|
||||
liquid-audio>=1.2.0
|
||||
3
backend/python/liquid-audio/requirements.txt
Normal file
3
backend/python/liquid-audio/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
grpcio==1.78.1
|
||||
protobuf
|
||||
certifi
|
||||
10
backend/python/liquid-audio/run.sh
Executable file
10
backend/python/liquid-audio/run.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
backend_dir=$(dirname $0)
|
||||
if [ -d $backend_dir/common ]; then
|
||||
source $backend_dir/common/libbackend.sh
|
||||
else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
89
backend/python/liquid-audio/test.py
Normal file
89
backend/python/liquid-audio/test.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""Smoke tests for the liquid-audio backend.
|
||||
|
||||
These run without contacting HuggingFace or loading model weights:
|
||||
they only verify that the gRPC service starts and Health() responds.
|
||||
|
||||
To run an end-to-end inference test, set LIQUID_AUDIO_MODEL_ID
|
||||
(e.g. "LiquidAI/LFM2.5-Audio-1.5B") in the environment — see test_inference().
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
|
||||
import grpc
|
||||
|
||||
# Ensure generated protobuf stubs are importable
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
|
||||
class TestBackend(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
addr = os.environ.get("LIQUID_AUDIO_TEST_ADDR", "localhost:50053")
|
||||
cls.addr = addr
|
||||
cls.server = subprocess.Popen(
|
||||
[sys.executable, os.path.join(os.path.dirname(__file__), "backend.py"), "--addr", addr],
|
||||
)
|
||||
time.sleep(2) # Give the server a moment to bind
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
cls.server.terminate()
|
||||
try:
|
||||
cls.server.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
cls.server.kill()
|
||||
|
||||
def _stub(self):
|
||||
channel = grpc.insecure_channel(self.addr)
|
||||
return backend_pb2_grpc.BackendStub(channel)
|
||||
|
||||
def test_health(self):
|
||||
stub = self._stub()
|
||||
reply = stub.Health(backend_pb2.HealthMessage(), timeout=5)
|
||||
self.assertEqual(reply.message, b"OK")
|
||||
|
||||
def test_load_finetune_mode_without_weights(self):
|
||||
"""Loading in fine-tune mode should succeed without pulling model weights."""
|
||||
stub = self._stub()
|
||||
result = stub.LoadModel(
|
||||
backend_pb2.ModelOptions(
|
||||
Model="LiquidAI/LFM2.5-Audio-1.5B",
|
||||
Options=["mode:finetune"],
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
self.assertTrue(result.success, msg=result.message)
|
||||
|
||||
@unittest.skipUnless(os.environ.get("LIQUID_AUDIO_MODEL_ID"),
|
||||
"Set LIQUID_AUDIO_MODEL_ID to run an end-to-end inference smoke test")
|
||||
def test_inference(self):
|
||||
"""End-to-end: load a real LFM2-Audio model and run one short prediction."""
|
||||
stub = self._stub()
|
||||
model_id = os.environ["LIQUID_AUDIO_MODEL_ID"]
|
||||
result = stub.LoadModel(
|
||||
backend_pb2.ModelOptions(
|
||||
Model=model_id,
|
||||
Options=["mode:chat"],
|
||||
),
|
||||
timeout=600,
|
||||
)
|
||||
self.assertTrue(result.success, msg=result.message)
|
||||
reply = stub.Predict(
|
||||
backend_pb2.PredictOptions(
|
||||
Prompt="Hello!",
|
||||
Tokens=8,
|
||||
Temperature=0.0,
|
||||
),
|
||||
timeout=120,
|
||||
)
|
||||
self.assertGreater(len(reply.message), 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
11
backend/python/liquid-audio/test.sh
Executable file
11
backend/python/liquid-audio/test.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
backend_dir=$(dirname $0)
|
||||
if [ -d $backend_dir/common ]; then
|
||||
source $backend_dir/common/libbackend.sh
|
||||
else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
runUnittests
|
||||
@@ -2,7 +2,7 @@ torch==2.7.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
|
||||
@@ -2,7 +2,7 @@ torch==2.7.1
|
||||
accelerate
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
torch==2.9.0
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
accelerate
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
bitsandbytes
|
||||
|
||||
@@ -3,7 +3,7 @@ torch
|
||||
optimum[openvino]
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
|
||||
@@ -2,7 +2,7 @@ torch==2.7.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers>=5.0.0
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
|
||||
@@ -33,7 +33,7 @@ dependencies = [
|
||||
"certifi",
|
||||
"setuptools",
|
||||
"pillow",
|
||||
"charset-normalizer>=3.4.0",
|
||||
"charset-normalizer>=3.4.7",
|
||||
"chardet",
|
||||
# L4T-specific accelerator stack (sourced from jetson-ai-lab below).
|
||||
"torch",
|
||||
|
||||
@@ -3,5 +3,5 @@ protobuf
|
||||
certifi
|
||||
setuptools
|
||||
pillow
|
||||
charset-normalizer>=3.4.0
|
||||
charset-normalizer>=3.4.7
|
||||
chardet
|
||||
@@ -169,7 +169,7 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
||||
cfg.Distributed.HealthCheckIntervalOrDefault(),
|
||||
cfg.Distributed.StaleNodeThresholdOrDefault(),
|
||||
routerAuthToken,
|
||||
cfg.Distributed.PerModelHealthCheck,
|
||||
!cfg.Distributed.DisablePerModelHealthCheck,
|
||||
)
|
||||
|
||||
// Initialize job store
|
||||
|
||||
@@ -24,6 +24,7 @@ const (
|
||||
UsecaseVAD = "vad"
|
||||
UsecaseAudioTransform = "audio_transform"
|
||||
UsecaseDiarization = "diarization"
|
||||
UsecaseRealtimeAudio = "realtime_audio"
|
||||
)
|
||||
|
||||
// GRPCMethod identifies a Backend service RPC from backend.proto.
|
||||
@@ -45,6 +46,7 @@ const (
|
||||
MethodVAD GRPCMethod = "VAD"
|
||||
MethodAudioTransform GRPCMethod = "AudioTransform"
|
||||
MethodDiarize GRPCMethod = "Diarize"
|
||||
MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream"
|
||||
)
|
||||
|
||||
// UsecaseInfo describes a single known_usecase value and how it maps
|
||||
@@ -147,6 +149,11 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
|
||||
GRPCMethod: MethodDiarize,
|
||||
Description: "Speaker diarization (who-spoke-when, per-speaker segments) via the Diarize RPC.",
|
||||
},
|
||||
UsecaseRealtimeAudio: {
|
||||
Flag: FLAG_REALTIME_AUDIO,
|
||||
GRPCMethod: MethodAudioToAudioStream,
|
||||
Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.",
|
||||
},
|
||||
}
|
||||
|
||||
// BackendCapability describes which gRPC methods and usecases a backend supports.
|
||||
@@ -397,6 +404,15 @@ var BackendCapabilities = map[string]BackendCapability{
|
||||
Description: "Meta MusicGen via transformers — music generation from text",
|
||||
},
|
||||
|
||||
// --- Any-to-any audio backends ---
|
||||
"liquid-audio": {
|
||||
GRPCMethods: []GRPCMethod{MethodPredict, MethodPredictStream, MethodAudioTranscription, MethodTTS, MethodAudioToAudioStream, MethodVAD},
|
||||
PossibleUsecases: []string{UsecaseChat, UsecaseCompletion, UsecaseTranscript, UsecaseTTS, UsecaseRealtimeAudio, UsecaseVAD},
|
||||
DefaultUsecases: []string{UsecaseRealtimeAudio, UsecaseChat, UsecaseTranscript, UsecaseTTS, UsecaseVAD},
|
||||
AcceptsAudios: true,
|
||||
Description: "LFM2 / LFM2.5-Audio — self-contained any-to-any audio model for the Realtime API; also exposes chat, transcription, TTS and a stub energy-based VAD endpoint",
|
||||
},
|
||||
|
||||
// --- Audio transform backends ---
|
||||
"localvqe": {
|
||||
GRPCMethods: []GRPCMethod{MethodAudioTransform},
|
||||
|
||||
@@ -31,7 +31,15 @@ type DistributedConfig struct {
|
||||
DrainTimeout time.Duration // Time to wait for in-flight requests during drain (default 30s)
|
||||
HealthCheckInterval time.Duration // Health monitor check interval (default 15s)
|
||||
StaleNodeThreshold time.Duration // Time before a node is considered stale (default 60s)
|
||||
PerModelHealthCheck bool // Enable per-model backend health checking (default false)
|
||||
// DisablePerModelHealthCheck turns off the health monitor's per-model
|
||||
// gRPC probe. When enabled (the default), the monitor pings each model's
|
||||
// gRPC address and removes stale node_models rows whose backend has
|
||||
// crashed even though the worker's node-level heartbeat is still arriving.
|
||||
// Without per-model probing, /embeddings and /completions can be dispatched
|
||||
// to a backend that silently returns garbage (see also the cascading
|
||||
// model-row cleanup on MarkUnhealthy / MarkDraining).
|
||||
DisablePerModelHealthCheck bool
|
||||
|
||||
MCPCIJobTimeout time.Duration // MCP CI job execution timeout (default 10m)
|
||||
|
||||
MaxUploadSize int64 // Maximum upload body size in bytes (default 50 GB)
|
||||
|
||||
@@ -636,6 +636,7 @@ const (
|
||||
FLAG_SPEAKER_RECOGNITION ModelConfigUsecase = 0b1000000000000000
|
||||
FLAG_AUDIO_TRANSFORM ModelConfigUsecase = 0b10000000000000000
|
||||
FLAG_DIARIZATION ModelConfigUsecase = 0b100000000000000000
|
||||
FLAG_REALTIME_AUDIO ModelConfigUsecase = 0b1000000000000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
@@ -645,12 +646,12 @@ const (
|
||||
// Flags within the same group are NOT orthogonal (e.g., chat and completion are
|
||||
// both text/language). A model is multimodal when its usecases span 2+ groups.
|
||||
var ModalityGroups = []ModelConfigUsecase{
|
||||
FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT, // text/language
|
||||
FLAG_VISION | FLAG_DETECTION, // visual understanding
|
||||
FLAG_TRANSCRIPT, // speech input
|
||||
FLAG_TTS | FLAG_SOUND_GENERATION, // audio output
|
||||
FLAG_AUDIO_TRANSFORM, // audio in/out transforms
|
||||
FLAG_IMAGE | FLAG_VIDEO, // visual generation
|
||||
FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT, // text/language
|
||||
FLAG_VISION | FLAG_DETECTION, // visual understanding
|
||||
FLAG_TRANSCRIPT | FLAG_REALTIME_AUDIO, // speech input — realtime_audio is any-to-any, so it counts here too
|
||||
FLAG_TTS | FLAG_SOUND_GENERATION | FLAG_REALTIME_AUDIO, // audio output — and here, so a lone realtime_audio flag still reads as multimodal
|
||||
FLAG_AUDIO_TRANSFORM, // audio in/out transforms
|
||||
FLAG_IMAGE | FLAG_VIDEO, // visual generation
|
||||
}
|
||||
|
||||
// IsMultimodal returns true if the given usecases span two or more orthogonal
|
||||
@@ -692,6 +693,7 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
|
||||
"FLAG_SPEAKER_RECOGNITION": FLAG_SPEAKER_RECOGNITION,
|
||||
"FLAG_AUDIO_TRANSFORM": FLAG_AUDIO_TRANSFORM,
|
||||
"FLAG_DIARIZATION": FLAG_DIARIZATION,
|
||||
"FLAG_REALTIME_AUDIO": FLAG_REALTIME_AUDIO,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -866,6 +868,16 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_REALTIME_AUDIO) == FLAG_REALTIME_AUDIO {
|
||||
// Backends that own a single any-to-any loop and implement
|
||||
// AudioToAudioStream — listed here so models without an explicit
|
||||
// known_usecases still surface on the Talk page.
|
||||
realtimeAudioBackends := []string{"liquid-audio"}
|
||||
if !slices.Contains(realtimeAudioBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,8 @@ var defaultImporters = []Importer{
|
||||
// and would otherwise swallow the C++ port's GGUF bundles.
|
||||
&VibeVoiceCppImporter{},
|
||||
&VibeVoiceImporter{},
|
||||
// LiquidAudio (Python) — keep before LlamaCPP so non-GGUF LFM2-Audio repos route here.
|
||||
&LiquidAudioImporter{},
|
||||
&CoquiImporter{},
|
||||
// Image/Video (Batch 3)
|
||||
&StableDiffusionGGMLImporter{},
|
||||
|
||||
145
core/gallery/importers/liquid-audio.go
Normal file
145
core/gallery/importers/liquid-audio.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package importers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"go.yaml.in/yaml/v2"
|
||||
)
|
||||
|
||||
var _ Importer = &LiquidAudioImporter{}
|
||||
|
||||
// LiquidAudioImporter recognises LiquidAI's LFM2-Audio family (LFM2-Audio-1.5B,
|
||||
// LFM2.5-Audio-1.5B, community finetunes) and routes them to the Python
|
||||
// `liquid-audio` backend. Detection is by repo-name substring so third-party
|
||||
// mirrors still match. preferences.backend="liquid-audio" overrides detection.
|
||||
//
|
||||
// Once upstream llama.cpp PR #18641 lands and the GGUF gallery entries are
|
||||
// added, GGUF mirrors of these models should route to llama-cpp; that's
|
||||
// handled by ordering LlamaCPPImporter after this one and by the explicit
|
||||
// "-gguf" exclusion below.
|
||||
type LiquidAudioImporter struct{}
|
||||
|
||||
func (i *LiquidAudioImporter) Name() string { return "liquid-audio" }
|
||||
func (i *LiquidAudioImporter) Modality() string { return "tts" }
|
||||
func (i *LiquidAudioImporter) AutoDetects() bool { return true }
|
||||
|
||||
func (i *LiquidAudioImporter) Match(details Details) bool {
|
||||
preferences, err := details.Preferences.MarshalJSON()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
preferencesMap := make(map[string]any)
|
||||
if len(preferences) > 0 {
|
||||
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if b, ok := preferencesMap["backend"].(string); ok && b == "liquid-audio" {
|
||||
return true
|
||||
}
|
||||
|
||||
matchRepo := func(repo string) bool {
|
||||
r := strings.ToLower(repo)
|
||||
// Cede GGUF mirrors to the (later-ordered) llama-cpp importer.
|
||||
if strings.HasSuffix(r, "-gguf") {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(r, "lfm2-audio") || strings.Contains(r, "lfm2.5-audio")
|
||||
}
|
||||
|
||||
if details.HuggingFace != nil {
|
||||
repoName := details.HuggingFace.ModelID
|
||||
if idx := strings.Index(repoName, "/"); idx >= 0 {
|
||||
repoName = repoName[idx+1:]
|
||||
}
|
||||
if matchRepo(repoName) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if _, repo, ok := HFOwnerRepoFromURI(details.URI); ok {
|
||||
return matchRepo(repo)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *LiquidAudioImporter) Import(details Details) (gallery.ModelConfig, error) {
|
||||
preferences, err := details.Preferences.MarshalJSON()
|
||||
if err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
preferencesMap := make(map[string]any)
|
||||
if len(preferences) > 0 {
|
||||
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
}
|
||||
|
||||
name, ok := preferencesMap["name"].(string)
|
||||
if !ok {
|
||||
name = filepath.Base(details.URI)
|
||||
}
|
||||
|
||||
description, ok := preferencesMap["description"].(string)
|
||||
if !ok {
|
||||
description = "Imported from " + details.URI
|
||||
}
|
||||
|
||||
model := details.URI
|
||||
if details.HuggingFace != nil && details.HuggingFace.ModelID != "" {
|
||||
model = details.HuggingFace.ModelID
|
||||
}
|
||||
|
||||
// Preferences may pin the mode (chat / asr / tts / s2s / finetune).
|
||||
// Default to s2s — the headline any-to-any use case.
|
||||
mode, _ := preferencesMap["mode"].(string)
|
||||
if mode == "" {
|
||||
mode = "s2s"
|
||||
}
|
||||
|
||||
options := []string{"mode:" + mode}
|
||||
if voice, ok := preferencesMap["voice"].(string); ok && voice != "" {
|
||||
options = append(options, "voice:"+voice)
|
||||
}
|
||||
|
||||
usecases := []string{"chat"}
|
||||
switch mode {
|
||||
case "asr":
|
||||
usecases = []string{"transcript"}
|
||||
case "tts":
|
||||
usecases = []string{"tts"}
|
||||
case "s2s":
|
||||
// realtime_audio surfaces the model on the Talk page; chat/tts/
|
||||
// transcript/vad keep the standalone OpenAI-compatible endpoints
|
||||
// working since liquid-audio implements all of them.
|
||||
usecases = []string{"realtime_audio", "chat", "tts", "transcript", "vad"}
|
||||
}
|
||||
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
Backend: "liquid-audio",
|
||||
KnownUsecaseStrings: usecases,
|
||||
Options: options,
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{Model: model},
|
||||
},
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(modelConfig)
|
||||
if err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
|
||||
return gallery.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
ConfigFile: string(data),
|
||||
}, nil
|
||||
}
|
||||
91
core/gallery/importers/liquid-audio_test.go
Normal file
91
core/gallery/importers/liquid-audio_test.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package importers_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery/importers"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("LiquidAudioImporter", func() {
|
||||
Context("detection from HuggingFace", func() {
|
||||
It("matches LiquidAI/LFM2.5-Audio-1.5B", func() {
|
||||
uri := "https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B"
|
||||
preferences := json.RawMessage(`{}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: liquid-audio"))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("LiquidAI/LFM2.5-Audio-1.5B"))
|
||||
})
|
||||
|
||||
It("matches LiquidAI/LFM2-Audio-1.5B (older variant)", func() {
|
||||
uri := "https://huggingface.co/LiquidAI/LFM2-Audio-1.5B"
|
||||
preferences := json.RawMessage(`{}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: liquid-audio"))
|
||||
})
|
||||
|
||||
It("cedes -GGUF mirrors to the llama-cpp importer", func() {
|
||||
// LiquidAI/LFM2.5-Audio-1.5B-GGUF should NOT route to liquid-audio.
|
||||
// Once upstream PR #18641 lands and the GGUF gallery entry exists,
|
||||
// this is the path that lets users opt into the C++ runtime.
|
||||
uri := "https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-GGUF"
|
||||
preferences := json.RawMessage(`{}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("backend: liquid-audio"),
|
||||
fmt.Sprintf("GGUF repo should not match Python importer; got: %s", modelConfig.ConfigFile))
|
||||
})
|
||||
})
|
||||
|
||||
Context("preference override", func() {
|
||||
It("honours preferences.backend=liquid-audio for arbitrary URIs", func() {
|
||||
uri := "https://example.com/some-unrelated-model"
|
||||
preferences := json.RawMessage(`{"backend": "liquid-audio"}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: liquid-audio"))
|
||||
})
|
||||
|
||||
It("picks up the mode preference", func() {
|
||||
uri := "https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B"
|
||||
preferences := json.RawMessage(`{"mode": "asr"}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("mode:asr"))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript"))
|
||||
})
|
||||
|
||||
It("picks up the voice preference", func() {
|
||||
uri := "https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B"
|
||||
preferences := json.RawMessage(`{"mode": "tts", "voice": "uk_male"}`)
|
||||
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("voice:uk_male"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Importer interface metadata", func() {
|
||||
It("exposes name/modality/autodetect", func() {
|
||||
imp := &importers.LiquidAudioImporter{}
|
||||
Expect(imp.Name()).To(Equal("liquid-audio"))
|
||||
Expect(imp.Modality()).To(Equal("tts"))
|
||||
Expect(imp.AutoDetects()).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -443,6 +443,25 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
baseTag := `<base href="` + httpMiddleware.SecureBaseHref(baseURL) + `" />`
|
||||
indexHTML = []byte(strings.Replace(string(indexHTML), "<head>", "<head>\n "+baseTag, 1))
|
||||
}
|
||||
// <base href> only changes how relative URLs resolve; path-absolute
|
||||
// URLs (those starting with `/`) still resolve against the origin
|
||||
// and would bypass the reverse-proxy prefix. Rewrite the internal
|
||||
// path-absolute references emitted by the build so the browser
|
||||
// requests them through the proxy under the prefix.
|
||||
//
|
||||
// HTML-escape the prefix before interpolating it into attributes:
|
||||
// BasePathPrefix already gates X-Forwarded-Prefix via
|
||||
// SafeForwardedPrefix, but the validator only blocks open-redirect
|
||||
// shapes (// prefix, backslashes, control chars), not attribute
|
||||
// breakout characters like `"`. Escaping makes this resilient
|
||||
// even if the validator ever loosens.
|
||||
if prefix := httpMiddleware.BasePathPrefix(c); prefix != "/" {
|
||||
safePrefix := httpMiddleware.SecureBaseHref(prefix)
|
||||
html := string(indexHTML)
|
||||
html = strings.ReplaceAll(html, `="/assets/`, `="`+safePrefix+`assets/`)
|
||||
html = strings.ReplaceAll(html, `="/favicon.svg"`, `="`+safePrefix+`favicon.svg"`)
|
||||
indexHTML = []byte(html)
|
||||
}
|
||||
return c.HTMLBlob(http.StatusOK, indexHTML)
|
||||
}
|
||||
|
||||
|
||||
@@ -446,6 +446,42 @@ var _ = Describe("API test", func() {
|
||||
Expect(sc).To(Equal(200), "status code")
|
||||
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
|
||||
})
|
||||
|
||||
// Caddy's `handle_path` (and similar directives) strip the matched
|
||||
// prefix before forwarding upstream, so LocalAI receives the
|
||||
// already-stripped path together with X-Forwarded-Prefix. The base
|
||||
// href and asset URLs must still include the prefix so the browser
|
||||
// requests them through the proxy.
|
||||
It("Should support reverse-proxy when prefix is stripped by the proxy", func() {
|
||||
|
||||
err, sc, body := getRequest("http://127.0.0.1:9090/app", http.Header{
|
||||
"X-Forwarded-Proto": {"https"},
|
||||
"X-Forwarded-Host": {"example.org"},
|
||||
"X-Forwarded-Prefix": {"/myprefix"},
|
||||
})
|
||||
Expect(err).To(BeNil(), "error")
|
||||
Expect(sc).To(Equal(200), "status code")
|
||||
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
|
||||
Expect(string(body)).ToNot(ContainSubstring(`="/assets/`), "asset URLs must include the prefix")
|
||||
Expect(string(body)).ToNot(ContainSubstring(`="/favicon.svg"`), "favicon URL must include the prefix")
|
||||
})
|
||||
|
||||
// X-Forwarded-Prefix is attacker controllable on misconfigured
|
||||
// proxy chains. A value like "//evil.com" would otherwise turn the
|
||||
// asset URL rewrite into a protocol-relative URL that loads JS
|
||||
// from a foreign origin. BasePathPrefix must reject these via
|
||||
// SafeForwardedPrefix and fall back to "/".
|
||||
It("Should ignore an unsafe X-Forwarded-Prefix and not poison asset URLs", func() {
|
||||
err, sc, body := getRequest("http://127.0.0.1:9090/app", http.Header{
|
||||
"X-Forwarded-Proto": {"https"},
|
||||
"X-Forwarded-Host": {"example.org"},
|
||||
"X-Forwarded-Prefix": {"//evil.com"},
|
||||
})
|
||||
Expect(err).To(BeNil(), "error")
|
||||
Expect(sc).To(Equal(200), "status code")
|
||||
Expect(string(body)).ToNot(ContainSubstring("evil.com"), "unsafe prefix must not leak into the response")
|
||||
Expect(string(body)).ToNot(ContainSubstring(`="//`), "asset URLs must not become protocol-relative")
|
||||
})
|
||||
})
|
||||
|
||||
Context("Applying models", func() {
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -20,6 +21,8 @@ import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/auth"
|
||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
@@ -79,6 +82,26 @@ type Session struct {
|
||||
InputSampleRate int
|
||||
OutputSampleRate int
|
||||
MaxOutputTokens types.IntOrInf
|
||||
// MaxHistoryItems caps the number of MessageItems passed to the LLM each
|
||||
// turn (0 = unlimited). Small models — especially the LFM2.5-Audio 1.5B
|
||||
// served via the liquid-audio backend — degrade quickly past a handful
|
||||
// of turns. Counted from the tail; FunctionCall + FunctionCallOutput
|
||||
// pairs are kept together so we never feed an orphaned tool result.
|
||||
MaxHistoryItems int
|
||||
|
||||
// AssistantExecutor is non-nil when the session opted into the in-process
|
||||
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
||||
// executor's catalog are run inproc and their output is fed back to the
|
||||
// model server-side; the client never sees a function_call_arguments
|
||||
// event for those. Mirrors the chat handler's metadata.localai_assistant
|
||||
// path.
|
||||
AssistantExecutor mcpTools.ToolExecutor
|
||||
|
||||
// AssistantTools is the cached ToolUnion slice we injected at session
|
||||
// creation. Re-applied after every client session.update so a
|
||||
// client-driven tool refresh (e.g. toggling a client MCP server) doesn't
|
||||
// silently strip Manage Mode's tools.
|
||||
AssistantTools []types.ToolUnion
|
||||
|
||||
// Response cancellation: protects activeResponseCancel/activeResponseDone
|
||||
responseMu sync.Mutex
|
||||
@@ -205,6 +228,19 @@ func RealtimeTranscriptionSession(application *application.Application) echo.Han
|
||||
}
|
||||
}
|
||||
|
||||
// RealtimeSessionOptions bundles per-session knobs decoded from the WS query
|
||||
// string (or the WebRTC handshake body). Mirrors what chat.go pulls off
|
||||
// `metadata.localai_assistant` — admin-only opt-in to the in-process
|
||||
// management tool surface.
|
||||
type RealtimeSessionOptions struct {
|
||||
LocalAIAssistant bool
|
||||
// AuthEnabled mirrors chat.go's requireAssistantAccess gate. We resolve
|
||||
// admin role at handshake time (where the echo.Context has the auth
|
||||
// cookie/Bearer) and drop the result here so runRealtimeSession can
|
||||
// decide without holding onto the request.
|
||||
IsAdmin bool
|
||||
}
|
||||
|
||||
func Realtime(application *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
ws, err := upgrader.Upgrade(c.Response(), c.Request(), nil)
|
||||
@@ -218,25 +254,105 @@ func Realtime(application *application.Application) echo.HandlerFunc {
|
||||
|
||||
// Extract query parameters from Echo context before passing to websocket handler
|
||||
model := c.QueryParam("model")
|
||||
assistantFlag, _ := strconv.ParseBool(c.QueryParam("localai_assistant"))
|
||||
opts := RealtimeSessionOptions{
|
||||
LocalAIAssistant: assistantFlag,
|
||||
IsAdmin: isCurrentUserAdmin(c, application),
|
||||
}
|
||||
|
||||
registerRealtime(application, model)(ws)
|
||||
registerRealtime(application, model, opts)(ws)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func registerRealtime(application *application.Application, model string) func(c *websocket.Conn) {
|
||||
// isCurrentUserAdmin replicates the chat-side admin check at the realtime
|
||||
// handshake. When auth is disabled, every caller is treated as admin (same
|
||||
// as chat's requireAssistantAccess).
|
||||
func isCurrentUserAdmin(c echo.Context, application *application.Application) bool {
|
||||
if application == nil || application.ApplicationConfig() == nil || !application.ApplicationConfig().Auth.Enabled {
|
||||
return true
|
||||
}
|
||||
user := auth.GetUser(c)
|
||||
return user != nil && user.Role == auth.RoleAdmin
|
||||
}
|
||||
|
||||
func registerRealtime(application *application.Application, model string, opts RealtimeSessionOptions) func(c *websocket.Conn) {
|
||||
return func(conn *websocket.Conn) {
|
||||
t := NewWebSocketTransport(conn)
|
||||
evaluator := application.TemplatesEvaluator()
|
||||
xlog.Debug("Realtime WebSocket connection established", "address", conn.RemoteAddr().String(), "model", model)
|
||||
runRealtimeSession(application, t, model, evaluator)
|
||||
runRealtimeSession(application, t, model, evaluator, opts)
|
||||
}
|
||||
}
|
||||
|
||||
// defaultMaxHistoryItems picks a sensible default cap for the session.
|
||||
// Small any-to-any audio models degrade quickly past a handful of turns;
|
||||
// legacy pipelines composing larger LLMs keep the historical "unlimited"
|
||||
// default and rely on the LLM's own context window.
|
||||
func defaultMaxHistoryItems(cfg *config.ModelConfig) int {
|
||||
if cfg != nil && cfg.HasUsecases(config.FLAG_REALTIME_AUDIO) {
|
||||
return 6
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap).
|
||||
// Walks backwards keeping function_call + function_call_output pairs together
|
||||
// so we never feed the LLM an orphaned tool result that references a call it
|
||||
// can't see.
|
||||
func trimRealtimeItems(items []*types.MessageItemUnion, maxItems int) []*types.MessageItemUnion {
|
||||
if maxItems <= 0 || len(items) <= maxItems {
|
||||
return items
|
||||
}
|
||||
// Find the cut point starting from len-maxItems and pull it left until
|
||||
// we're not in the middle of a tool-call pair.
|
||||
cut := len(items) - maxItems
|
||||
for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
|
||||
cut--
|
||||
}
|
||||
return items[cut:]
|
||||
}
|
||||
|
||||
// prepareRealtimeConfig validates a model config for use in a realtime session
|
||||
// and fills in pipeline slots for self-contained any-to-any models. It returns
|
||||
// an error code + message pair suitable for sendError; the bool indicates
|
||||
// whether the caller should proceed. Extracted from runRealtimeSession so the
|
||||
// gate logic can be exercised in unit tests without a full Application.
|
||||
func prepareRealtimeConfig(cfg *config.ModelConfig) (errCode, errMsg string, ok bool) {
|
||||
if cfg == nil {
|
||||
return "invalid_model", "Model is not a pipeline model", false
|
||||
}
|
||||
|
||||
// Self-contained any-to-any models (e.g. liquid-audio) own the whole
|
||||
// loop in one engine — surface them by populating empty pipeline slots
|
||||
// with the model's own name so newModel can resolve a config for each
|
||||
// role. The user can still pin individual slots (e.g. Pipeline.VAD =
|
||||
// silero-vad) and those wins.
|
||||
if cfg.HasUsecases(config.FLAG_REALTIME_AUDIO) {
|
||||
if cfg.Pipeline.VAD == "" {
|
||||
cfg.Pipeline.VAD = cfg.Name
|
||||
}
|
||||
if cfg.Pipeline.Transcription == "" {
|
||||
cfg.Pipeline.Transcription = cfg.Name
|
||||
}
|
||||
if cfg.Pipeline.LLM == "" {
|
||||
cfg.Pipeline.LLM = cfg.Name
|
||||
}
|
||||
if cfg.Pipeline.TTS == "" {
|
||||
cfg.Pipeline.TTS = cfg.Name
|
||||
}
|
||||
return "", "", true
|
||||
}
|
||||
|
||||
if cfg.Pipeline.VAD == "" && cfg.Pipeline.Transcription == "" && cfg.Pipeline.TTS == "" && cfg.Pipeline.LLM == "" {
|
||||
return "invalid_model", "Model is not a pipeline model", false
|
||||
}
|
||||
return "", "", true
|
||||
}
|
||||
|
||||
// runRealtimeSession runs the main event loop for a realtime session.
|
||||
// It is transport-agnostic and works with both WebSocket and WebRTC.
|
||||
func runRealtimeSession(application *application.Application, t Transport, model string, evaluator *templates.Evaluator) {
|
||||
// TODO: Allow any-to-any model to be specified
|
||||
func runRealtimeSession(application *application.Application, t Transport, model string, evaluator *templates.Evaluator, opts RealtimeSessionOptions) {
|
||||
cl := application.ModelConfigLoader()
|
||||
cfg, err := cl.LoadModelConfigFileByNameDefaultOptions(model, application.ApplicationConfig())
|
||||
if err != nil {
|
||||
@@ -245,22 +361,79 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
return
|
||||
}
|
||||
|
||||
if cfg == nil || (cfg.Pipeline.VAD == "" && cfg.Pipeline.Transcription == "" && cfg.Pipeline.TTS == "" && cfg.Pipeline.LLM == "") {
|
||||
if code, msg, ok := prepareRealtimeConfig(cfg); !ok {
|
||||
xlog.Error("model is not a pipeline", "model", model)
|
||||
sendError(t, "invalid_model", "Model is not a pipeline model", "", "")
|
||||
sendError(t, code, msg, "", "")
|
||||
return
|
||||
}
|
||||
|
||||
// LocalAI Assistant opt-in: gate on admin (same rule as chat.go's
|
||||
// requireAssistantAccess) and grab the process-wide holder's executor.
|
||||
// We collect tools + system prompt here and merge them into the session
|
||||
// below so they're live from the first response.create.
|
||||
var assistantTools []types.ToolUnion
|
||||
var assistantSystemPrompt string
|
||||
var assistantExecutor mcpTools.ToolExecutor
|
||||
if opts.LocalAIAssistant {
|
||||
if !opts.IsAdmin {
|
||||
sendError(t, "forbidden", "localai_assistant requires admin", "", "")
|
||||
return
|
||||
}
|
||||
appCfg := application.ApplicationConfig()
|
||||
if appCfg != nil && appCfg.DisableLocalAIAssistant {
|
||||
sendError(t, "unavailable", "LocalAI Assistant is disabled on this server", "", "")
|
||||
return
|
||||
}
|
||||
holder := application.LocalAIAssistant()
|
||||
if holder == nil || !holder.HasTools() {
|
||||
sendError(t, "unavailable", "LocalAI Assistant is not available on this server", "", "")
|
||||
return
|
||||
}
|
||||
exec := holder.Executor()
|
||||
fns, discErr := exec.DiscoverTools(context.Background())
|
||||
if discErr != nil {
|
||||
xlog.Error("realtime: failed to discover LocalAI Assistant tools", "error", discErr)
|
||||
sendError(t, "tool_discovery_failed", "failed to discover assistant tools: "+discErr.Error(), "", "")
|
||||
return
|
||||
}
|
||||
assistantExecutor = exec
|
||||
assistantSystemPrompt = holder.SystemPrompt()
|
||||
assistantTools = make([]types.ToolUnion, 0, len(fns))
|
||||
for _, fn := range fns {
|
||||
fnCopy := fn
|
||||
assistantTools = append(assistantTools, types.ToolUnion{
|
||||
Function: &types.ToolFunction{
|
||||
Name: fnCopy.Name,
|
||||
Description: fnCopy.Description,
|
||||
Parameters: fnCopy.Parameters,
|
||||
},
|
||||
})
|
||||
}
|
||||
xlog.Debug("realtime: LocalAI Assistant tools injected", "count", len(fns))
|
||||
}
|
||||
|
||||
sttModel := cfg.Pipeline.Transcription
|
||||
|
||||
// Compose the system prompt: prepend the assistant prompt when we have
|
||||
// one (it teaches the model the safety rules and tool recipes), then the
|
||||
// session's default voice instructions. Order matches chat.go's
|
||||
// hasSystemMessage check — assistant prompt comes first.
|
||||
instructions := defaultInstructions
|
||||
if assistantSystemPrompt != "" {
|
||||
instructions = assistantSystemPrompt + "\n\n" + defaultInstructions
|
||||
}
|
||||
|
||||
sessionID := generateSessionID()
|
||||
session := &Session{
|
||||
ID: sessionID,
|
||||
TranscriptionOnly: false,
|
||||
Model: model,
|
||||
Voice: cfg.TTSConfig.Voice,
|
||||
Instructions: defaultInstructions,
|
||||
Instructions: instructions,
|
||||
ModelConfig: cfg,
|
||||
Tools: assistantTools,
|
||||
AssistantTools: assistantTools,
|
||||
AssistantExecutor: assistantExecutor,
|
||||
TurnDetection: &types.TurnDetectionUnion{
|
||||
ServerVad: &types.ServerVad{
|
||||
Threshold: 0.5,
|
||||
@@ -275,6 +448,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
Conversations: make(map[string]*Conversation),
|
||||
InputSampleRate: defaultRemoteSampleRate,
|
||||
OutputSampleRate: defaultRemoteSampleRate,
|
||||
MaxHistoryItems: defaultMaxHistoryItems(cfg),
|
||||
}
|
||||
|
||||
// Create a default conversation
|
||||
@@ -810,7 +984,28 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
|
||||
}
|
||||
|
||||
if rt.Tools != nil {
|
||||
session.Tools = rt.Tools
|
||||
// Manage Mode tools survive a client-driven session.update — the
|
||||
// alternative is silently dropping them whenever the user toggles
|
||||
// a client MCP server, which would break the modality mid-session.
|
||||
// Names from rt.Tools win on collision (the client is explicit;
|
||||
// we preserve, we don't override).
|
||||
merged := append([]types.ToolUnion(nil), rt.Tools...)
|
||||
seen := make(map[string]struct{}, len(merged))
|
||||
for _, t := range merged {
|
||||
if t.Function != nil {
|
||||
seen[t.Function.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, t := range session.AssistantTools {
|
||||
if t.Function == nil {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[t.Function.Name]; ok {
|
||||
continue
|
||||
}
|
||||
merged = append(merged, t)
|
||||
}
|
||||
session.Tools = merged
|
||||
}
|
||||
if rt.ToolChoice != nil {
|
||||
session.ToolChoice = rt.ToolChoice
|
||||
@@ -1104,7 +1299,17 @@ func generateResponse(ctx context.Context, session *Session, utt []byte, transcr
|
||||
triggerResponse(ctx, session, conv, t, nil)
|
||||
}
|
||||
|
||||
// maxAssistantToolTurns caps the server-side agentic loop. Mirrors the
|
||||
// chat-page maxToolTurns:10 from useChat.js — the model gets up to this
|
||||
// many consecutive tool round-trips before we return control to the user
|
||||
// without another response cycle.
|
||||
const maxAssistantToolTurns = 10
|
||||
|
||||
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
||||
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
||||
}
|
||||
|
||||
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
||||
config := session.ModelInterface.PredictConfig()
|
||||
|
||||
// Default values
|
||||
@@ -1155,7 +1360,8 @@ func triggerResponse(ctx context.Context, session *Session, conv *Conversation,
|
||||
|
||||
imgIndex := 0
|
||||
conv.Lock.Lock()
|
||||
for _, item := range conv.Items {
|
||||
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
||||
for _, item := range items {
|
||||
if item.User != nil {
|
||||
msg := schema.Message{
|
||||
Role: string(types.MessageRoleUser),
|
||||
@@ -1575,8 +1781,16 @@ func triggerResponse(ctx context.Context, session *Session, conv *Conversation,
|
||||
})
|
||||
}
|
||||
|
||||
// Handle Tool Calls
|
||||
// Handle Tool Calls. Two paths:
|
||||
// - LocalAI Assistant tools (session.AssistantExecutor.IsTool) run
|
||||
// server-side; we append both the call and its output to conv.Items
|
||||
// and re-trigger a follow-up response so the model can speak the
|
||||
// result. The client only sees observability events.
|
||||
// - All other tools follow the standard OpenAI flow: emit
|
||||
// function_call_arguments.done and wait for the client to send
|
||||
// conversation.item.create back.
|
||||
xlog.Debug("About to handle tool calls", "finalToolCallsCount", len(finalToolCalls))
|
||||
executedAssistantTool := false
|
||||
for i, tc := range finalToolCalls {
|
||||
toolCallID := generateItemID()
|
||||
callID := "call_" + generateUniqueID() // OpenAI uses call_xyz
|
||||
@@ -1608,6 +1822,51 @@ func triggerResponse(ctx context.Context, session *Session, conv *Conversation,
|
||||
Item: fcItem,
|
||||
})
|
||||
|
||||
serverSide := session.AssistantExecutor != nil && session.AssistantExecutor.IsTool(tc.Name)
|
||||
if serverSide {
|
||||
output, execErr := session.AssistantExecutor.ExecuteTool(ctx, tc.Name, tc.Arguments)
|
||||
if execErr != nil {
|
||||
output = "Error: " + execErr.Error()
|
||||
xlog.Error("realtime: assistant tool execution failed", "tool", tc.Name, "error", execErr)
|
||||
}
|
||||
foItem := types.MessageItemUnion{
|
||||
FunctionCallOutput: &types.MessageItemFunctionCallOutput{
|
||||
ID: generateItemID(),
|
||||
CallID: callID,
|
||||
Output: output,
|
||||
Status: types.ItemStatusCompleted,
|
||||
},
|
||||
}
|
||||
conv.Lock.Lock()
|
||||
conv.Items = append(conv.Items, &foItem)
|
||||
conv.Lock.Unlock()
|
||||
// Close the call out and emit the output as its own paired
|
||||
// added/done — the OpenAI spec pairs every item-done with a
|
||||
// preceding item-added, so we re-pair here for the output.
|
||||
// The UI renders the transcript entry on item.done for both
|
||||
// shapes (FunctionCall + FunctionCallOutput).
|
||||
sendEvent(t, types.ResponseOutputItemDoneEvent{
|
||||
ServerEventBase: types.ServerEventBase{},
|
||||
ResponseID: responseID,
|
||||
OutputIndex: outputIndex,
|
||||
Item: fcItem,
|
||||
})
|
||||
sendEvent(t, types.ResponseOutputItemAddedEvent{
|
||||
ServerEventBase: types.ServerEventBase{},
|
||||
ResponseID: responseID,
|
||||
OutputIndex: outputIndex,
|
||||
Item: foItem,
|
||||
})
|
||||
sendEvent(t, types.ResponseOutputItemDoneEvent{
|
||||
ServerEventBase: types.ServerEventBase{},
|
||||
ResponseID: responseID,
|
||||
OutputIndex: outputIndex,
|
||||
Item: foItem,
|
||||
})
|
||||
executedAssistantTool = true
|
||||
continue
|
||||
}
|
||||
|
||||
sendEvent(t, types.ResponseFunctionCallArgumentsDeltaEvent{
|
||||
ServerEventBase: types.ServerEventBase{},
|
||||
ResponseID: responseID,
|
||||
@@ -1643,6 +1902,19 @@ func triggerResponse(ctx context.Context, session *Session, conv *Conversation,
|
||||
Status: types.ResponseStatusCompleted,
|
||||
},
|
||||
})
|
||||
|
||||
// If we executed any assistant tools inproc, run another response cycle
|
||||
// so the model can speak the result. Mirrors the chat-side agentic loop
|
||||
// but driven server-side rather than by client round-trip. Bounded so a
|
||||
// degenerate "model keeps calling tools" doesn't blow the stack.
|
||||
if executedAssistantTool {
|
||||
if toolTurn+1 >= maxAssistantToolTurns {
|
||||
xlog.Warn("realtime: assistant tool-turn limit reached, stopping the agentic loop",
|
||||
"limit", maxAssistantToolTurns, "model", session.Model)
|
||||
return
|
||||
}
|
||||
triggerResponseAtTurn(ctx, session, conv, t, nil, toolTurn+1)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions to generate unique IDs
|
||||
|
||||
153
core/http/endpoints/openai/realtime_gate_test.go
Normal file
153
core/http/endpoints/openai/realtime_gate_test.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// withUsecases returns a *ModelConfigUsecase pointing at the OR of the given flags.
|
||||
// Helper so each spec keeps its intent obvious.
|
||||
func withUsecases(flags ...config.ModelConfigUsecase) *config.ModelConfigUsecase {
|
||||
var u config.ModelConfigUsecase
|
||||
for _, f := range flags {
|
||||
u |= f
|
||||
}
|
||||
return &u
|
||||
}
|
||||
|
||||
var _ = Describe("prepareRealtimeConfig", func() {
|
||||
It("rejects a nil config", func() {
|
||||
code, msg, ok := prepareRealtimeConfig(nil)
|
||||
Expect(ok).To(BeFalse())
|
||||
Expect(code).To(Equal("invalid_model"))
|
||||
Expect(msg).To(ContainSubstring("not a pipeline model"))
|
||||
})
|
||||
|
||||
It("rejects a model with no pipeline slots and no realtime_audio usecase", func() {
|
||||
cfg := &config.ModelConfig{Name: "plain-chat"}
|
||||
code, msg, ok := prepareRealtimeConfig(cfg)
|
||||
Expect(ok).To(BeFalse())
|
||||
Expect(code).To(Equal("invalid_model"))
|
||||
Expect(msg).To(ContainSubstring("not a pipeline model"))
|
||||
})
|
||||
|
||||
It("accepts a model with a fully populated legacy pipeline", func() {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: "legacy",
|
||||
Pipeline: config.Pipeline{
|
||||
VAD: "silero",
|
||||
Transcription: "whisper",
|
||||
LLM: "llama",
|
||||
TTS: "piper",
|
||||
},
|
||||
}
|
||||
_, _, ok := prepareRealtimeConfig(cfg)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(cfg.Pipeline.LLM).To(Equal("llama"), "user-supplied pipeline slot must not be overwritten")
|
||||
})
|
||||
|
||||
It("accepts a self-contained realtime_audio model and self-pipelines empty slots", func() {
|
||||
cfg := &config.ModelConfig{
|
||||
Name: "lfm2.5-audio-realtime",
|
||||
KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
|
||||
}
|
||||
_, _, ok := prepareRealtimeConfig(cfg)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(cfg.Pipeline.VAD).To(Equal("lfm2.5-audio-realtime"))
|
||||
Expect(cfg.Pipeline.Transcription).To(Equal("lfm2.5-audio-realtime"))
|
||||
Expect(cfg.Pipeline.LLM).To(Equal("lfm2.5-audio-realtime"))
|
||||
Expect(cfg.Pipeline.TTS).To(Equal("lfm2.5-audio-realtime"))
|
||||
})
|
||||
|
||||
It("preserves user-pinned pipeline slots on a realtime_audio model", func() {
|
||||
// A user might want a dedicated silero-vad and let the realtime_audio
|
||||
// model own only STT/LLM/TTS.
|
||||
cfg := &config.ModelConfig{
|
||||
Name: "lfm-with-external-vad",
|
||||
KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
|
||||
Pipeline: config.Pipeline{
|
||||
VAD: "silero-vad",
|
||||
},
|
||||
}
|
||||
_, _, ok := prepareRealtimeConfig(cfg)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(cfg.Pipeline.VAD).To(Equal("silero-vad"))
|
||||
Expect(cfg.Pipeline.Transcription).To(Equal("lfm-with-external-vad"))
|
||||
Expect(cfg.Pipeline.LLM).To(Equal("lfm-with-external-vad"))
|
||||
Expect(cfg.Pipeline.TTS).To(Equal("lfm-with-external-vad"))
|
||||
})
|
||||
|
||||
It("accepts a model with at least one legacy pipeline slot set", func() {
|
||||
// Pre-existing behaviour: the gate only rejected when ALL four slots
|
||||
// were empty. Lock that in so the change doesn't tighten the gate.
|
||||
cfg := &config.ModelConfig{
|
||||
Name: "partial",
|
||||
Pipeline: config.Pipeline{
|
||||
LLM: "llama",
|
||||
},
|
||||
}
|
||||
_, _, ok := prepareRealtimeConfig(cfg)
|
||||
Expect(ok).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("defaultMaxHistoryItems", func() {
|
||||
It("caps realtime_audio sessions at 6", func() {
|
||||
cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)}
|
||||
Expect(defaultMaxHistoryItems(cfg)).To(Equal(6))
|
||||
})
|
||||
It("leaves legacy pipelines unlimited", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama"}}
|
||||
Expect(defaultMaxHistoryItems(cfg)).To(Equal(0))
|
||||
})
|
||||
It("tolerates nil", func() {
|
||||
Expect(defaultMaxHistoryItems(nil)).To(Equal(0))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("trimRealtimeItems", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
}
|
||||
assistant := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: id}}
|
||||
}
|
||||
fnCall := func(id, callID string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id, CallID: callID}}
|
||||
}
|
||||
fnOut := func(id, callID string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id, CallID: callID}}
|
||||
}
|
||||
|
||||
It("returns the input unchanged when cap is zero", func() {
|
||||
in := []*types.MessageItemUnion{user("u1"), assistant("a1")}
|
||||
Expect(trimRealtimeItems(in, 0)).To(Equal(in))
|
||||
})
|
||||
|
||||
It("returns the input unchanged when under the cap", func() {
|
||||
in := []*types.MessageItemUnion{user("u1"), assistant("a1")}
|
||||
Expect(trimRealtimeItems(in, 4)).To(Equal(in))
|
||||
})
|
||||
|
||||
It("keeps the tail when over the cap", func() {
|
||||
in := []*types.MessageItemUnion{user("u1"), assistant("a1"), user("u2"), assistant("a2"), user("u3")}
|
||||
out := trimRealtimeItems(in, 3)
|
||||
Expect(out).To(HaveLen(3))
|
||||
Expect(out[0].User.ID).To(Equal("u2"))
|
||||
Expect(out[2].User.ID).To(Equal("u3"))
|
||||
})
|
||||
|
||||
It("pulls the cut left to keep a function_call paired with its output", func() {
|
||||
// 0:user 1:fc 2:fc_out 3:assistant — cap=2 would otherwise start at
|
||||
// index 2 (orphan fc_out). Helper must roll back to include 1.
|
||||
in := []*types.MessageItemUnion{user("u1"), fnCall("fc1", "c1"), fnOut("fo1", "c1"), assistant("a1")}
|
||||
out := trimRealtimeItems(in, 2)
|
||||
// Expect at least the fc + fc_out + assistant (3 items, cap was 2)
|
||||
// — the rollback prefers correctness over the cap.
|
||||
Expect(len(out)).To(BeNumerically(">=", 3))
|
||||
Expect(out[0].FunctionCall).NotTo(BeNil())
|
||||
Expect(out[1].FunctionCallOutput).NotTo(BeNil())
|
||||
})
|
||||
})
|
||||
@@ -15,6 +15,10 @@ import (
|
||||
type RealtimeCallRequest struct {
|
||||
SDP string `json:"sdp"`
|
||||
Model string `json:"model"`
|
||||
// LocalAIAssistant opts the session into the in-process admin tool
|
||||
// surface (same modality as the chat page's "Manage Mode"). Admin-only;
|
||||
// the realtime entry point gates it the same way the chat handler does.
|
||||
LocalAIAssistant bool `json:"localai_assistant,omitempty"`
|
||||
}
|
||||
|
||||
// RealtimeCallResponse is the JSON response for POST /v1/realtime/calls.
|
||||
@@ -165,9 +169,13 @@ func RealtimeCalls(application *application.Application) echo.HandlerFunc {
|
||||
|
||||
// Start the realtime session in a goroutine
|
||||
evaluator := application.TemplatesEvaluator()
|
||||
opts := RealtimeSessionOptions{
|
||||
LocalAIAssistant: req.LocalAIAssistant,
|
||||
IsAdmin: isCurrentUserAdmin(c, application),
|
||||
}
|
||||
go func() {
|
||||
defer transport.Close()
|
||||
runRealtimeSession(application, transport, req.Model, evaluator)
|
||||
runRealtimeSession(application, transport, req.Model, evaluator, opts)
|
||||
}()
|
||||
|
||||
return c.JSON(http.StatusCreated, RealtimeCallResponse{
|
||||
|
||||
@@ -6,20 +6,55 @@ import (
|
||||
"github.com/labstack/echo/v4"
|
||||
)
|
||||
|
||||
// BasePathPrefix returns the URL path prefix that the request was reached
|
||||
// under (e.g. "/myprefix/"). It always returns a value that starts and ends
|
||||
// with `/`, defaulting to "/" when the app is not behind a path prefix.
|
||||
//
|
||||
// It first looks at the path StripPathPrefix removed (when the proxy forwards
|
||||
// the prefix in the URL), then falls back to the X-Forwarded-Prefix header
|
||||
// (when the proxy strips the prefix before forwarding, e.g. Caddy's
|
||||
// handle_path).
|
||||
//
|
||||
// The header fallback is gated through SafeForwardedPrefix because the value
|
||||
// flows into the SPA HTML response (both <base href> and the path-absolute
|
||||
// asset URL rewrite in serveIndex). X-Forwarded-Prefix is attacker
|
||||
// controllable on misconfigured proxy chains; without that gate a value like
|
||||
// "//evil.com" turns the asset rewrite into a protocol-relative URL that
|
||||
// loads JS from a foreign origin.
|
||||
func BasePathPrefix(c echo.Context) string {
|
||||
path := c.Path()
|
||||
origPath := c.Request().URL.Path
|
||||
|
||||
if storedPath, ok := c.Get("_original_path").(string); ok && storedPath != "" {
|
||||
origPath = storedPath
|
||||
}
|
||||
|
||||
if path != origPath && strings.HasSuffix(origPath, path) && len(path) > 0 {
|
||||
prefixLen := len(origPath) - len(path)
|
||||
if prefixLen > 0 {
|
||||
pathPrefix := origPath[:prefixLen]
|
||||
if !strings.HasSuffix(pathPrefix, "/") {
|
||||
pathPrefix += "/"
|
||||
}
|
||||
return pathPrefix
|
||||
}
|
||||
}
|
||||
|
||||
if validated, ok := SafeForwardedPrefix(c.Request().Header.Get("X-Forwarded-Prefix")); ok {
|
||||
if !strings.HasSuffix(validated, "/") {
|
||||
validated += "/"
|
||||
}
|
||||
return validated
|
||||
}
|
||||
|
||||
return "/"
|
||||
}
|
||||
|
||||
// BaseURL returns the base URL for the given HTTP request context.
|
||||
// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
|
||||
// The returned URL is guaranteed to end with `/`.
|
||||
// The method should be used in conjunction with the StripPathPrefix middleware.
|
||||
func BaseURL(c echo.Context) string {
|
||||
path := c.Path()
|
||||
origPath := c.Request().URL.Path
|
||||
|
||||
// Check if StripPathPrefix middleware stored the original path
|
||||
if storedPath, ok := c.Get("_original_path").(string); ok && storedPath != "" {
|
||||
origPath = storedPath
|
||||
}
|
||||
|
||||
// Check X-Forwarded-Proto for scheme
|
||||
scheme := "http"
|
||||
if c.Request().Header.Get("X-Forwarded-Proto") == "https" {
|
||||
scheme = "https"
|
||||
@@ -27,22 +62,10 @@ func BaseURL(c echo.Context) string {
|
||||
scheme = "https"
|
||||
}
|
||||
|
||||
// Check X-Forwarded-Host for host
|
||||
host := c.Request().Host
|
||||
if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
|
||||
host = forwardedHost
|
||||
}
|
||||
|
||||
if path != origPath && strings.HasSuffix(origPath, path) && len(path) > 0 {
|
||||
prefixLen := len(origPath) - len(path)
|
||||
if prefixLen > 0 && prefixLen <= len(origPath) {
|
||||
pathPrefix := origPath[:prefixLen]
|
||||
if !strings.HasSuffix(pathPrefix, "/") {
|
||||
pathPrefix += "/"
|
||||
}
|
||||
return scheme + "://" + host + pathPrefix
|
||||
}
|
||||
}
|
||||
|
||||
return scheme + "://" + host + "/"
|
||||
return scheme + "://" + host + BasePathPrefix(c)
|
||||
}
|
||||
|
||||
@@ -55,4 +55,84 @@ var _ = Describe("BaseURL", func() {
|
||||
Expect(actualURL).To(Equal("http://example.com/myprefix/"), "base URL")
|
||||
})
|
||||
})
|
||||
|
||||
// Caddy's handle_path (and similar reverse-proxy directives) strips the
|
||||
// matched prefix before forwarding upstream, so LocalAI receives the
|
||||
// already-stripped path together with X-Forwarded-Prefix. In that case
|
||||
// StripPathPrefix never stores _original_path, but BaseURL must still
|
||||
// honor the header so that <base href> and asset URLs include the prefix.
|
||||
Context("with X-Forwarded-Prefix header but pre-stripped path", func() {
|
||||
It("should return base URL with prefix from header", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
|
||||
routePath := "/app"
|
||||
app.GET(routePath, func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
|
||||
req := httptest.NewRequest("GET", "/app", nil)
|
||||
req.Header.Set("X-Forwarded-Prefix", "/localai")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
|
||||
Expect(rec.Code).To(Equal(200), "response status code")
|
||||
Expect(actualURL).To(Equal("http://example.com/localai/"), "base URL")
|
||||
})
|
||||
|
||||
It("should normalize a prefix that already ends with a slash", func() {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
|
||||
routePath := "/app"
|
||||
app.GET(routePath, func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
|
||||
req := httptest.NewRequest("GET", "/app", nil)
|
||||
req.Header.Set("X-Forwarded-Prefix", "/localai/")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
|
||||
Expect(rec.Code).To(Equal(200), "response status code")
|
||||
Expect(actualURL).To(Equal("http://example.com/localai/"), "base URL")
|
||||
})
|
||||
})
|
||||
|
||||
// X-Forwarded-Prefix is attacker controllable on misconfigured proxy
|
||||
// chains, and the value flows into the SPA HTML response (<base href>
|
||||
// and asset URLs). BasePathPrefix must gate the header through
|
||||
// SafeForwardedPrefix so values that turn the prefix into an open
|
||||
// redirect or a protocol-relative URL are ignored and the base falls
|
||||
// back to "/".
|
||||
Context("with unsafe X-Forwarded-Prefix header", func() {
|
||||
DescribeTable("falls back to / when the header is unsafe",
|
||||
func(header string) {
|
||||
app := echo.New()
|
||||
actualURL := ""
|
||||
|
||||
app.GET("/app", func(c echo.Context) error {
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
|
||||
req := httptest.NewRequest("GET", "/app", nil)
|
||||
req.Header.Set("X-Forwarded-Prefix", header)
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
|
||||
Expect(rec.Code).To(Equal(200), "response status code")
|
||||
Expect(actualURL).To(Equal("http://example.com/"), "base URL")
|
||||
},
|
||||
Entry("protocol-relative URL", "//evil.com"),
|
||||
Entry("protocol-relative URL with path", "//evil.com/assets"),
|
||||
Entry("backslash path", `/foo\bar`),
|
||||
Entry("embedded NUL", "/foo\x00bar"),
|
||||
Entry("CR injection", "/foo\rbar"),
|
||||
Entry("LF injection", "/foo\nbar"),
|
||||
Entry("missing leading slash", "evil"),
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -14,7 +14,6 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -241,6 +240,28 @@ func (re *RequestExtractor) SetOpenAIRequest(c echo.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractToolChoiceFunctionName parses a tool_choice map and returns the
|
||||
// specific function name. Accepts both the OpenAI-spec nested shape
|
||||
// ({type:function, function:{name:...}}) and the legacy/Anthropic-compat
|
||||
// flat shape ({type:function, name:...}); the nested form wins when both
|
||||
// are present. Returns "" for malformed input or when the shape names a
|
||||
// mode rather than a specific tool.
|
||||
func extractToolChoiceFunctionName(m map[string]any) string {
|
||||
tcType, ok := m["type"].(string)
|
||||
if !ok || tcType != "function" {
|
||||
return ""
|
||||
}
|
||||
if fn, ok := m["function"].(map[string]any); ok {
|
||||
if n, ok := fn["name"].(string); ok && n != "" {
|
||||
return n
|
||||
}
|
||||
}
|
||||
if n, ok := m["name"].(string); ok {
|
||||
return n
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.OpenAIRequest) error {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
@@ -320,17 +341,55 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice functions.Tool
|
||||
|
||||
// OpenAI tool_choice has three valid shapes plus one tolerated
|
||||
// non-spec form seen in the wild:
|
||||
//
|
||||
// 1. string mode: "auto" | "none" | "required"
|
||||
// 2. specific tool: {"type":"function", "function":{"name":"..."}} (current spec)
|
||||
// 3. legacy: {"type":"function", "name":"..."} (older / Anthropic-compat)
|
||||
// 4. double-encoded: "{\"type\":\"function\", ...}" (some clients serialize the object)
|
||||
//
|
||||
// The pre-#9559 code unmarshalled the string case through
|
||||
// json.Unmarshal([]byte(content), &functions.Tool{}), which:
|
||||
// - failed for plain string modes (so "required" / "none" were
|
||||
// silently ignored and tools stayed enabled regardless), but
|
||||
// - happened to handle shape 4 by accident.
|
||||
// It also could not parse shape 3 because functions.Tool has no
|
||||
// flat top-level Name field.
|
||||
//
|
||||
// Mirror the parsing pattern from MergeOpenResponsesConfig (#9509),
|
||||
// route results through the existing input.FunctionCall string/map
|
||||
// dispatch downstream (see the switch on input.FunctionCall in this
|
||||
// same function), and preserve the shape-4 fallback so non-spec
|
||||
// clients don't silently break. Tracked in #9508; sibling fix in #9526.
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
_ = json.Unmarshal([]byte(content), &toolChoice)
|
||||
// "auto" is the default and needs no override. "none" and "required"
|
||||
// both reach SetFunctionCallString via the input.FunctionCall string
|
||||
// branch below; ShouldUseFunctions() then returns false for "none"
|
||||
// (tools disabled) and true for "required" (mode engaged).
|
||||
//
|
||||
// If the string looks like a JSON object, try shape 4 first: parse
|
||||
// it as a tool_choice map and use the resulting name. Falling back
|
||||
// to mode-string handling when the parse yields no usable name keeps
|
||||
// genuinely-malformed input from accidentally engaging a mode.
|
||||
if content == "" || content == "auto" {
|
||||
break
|
||||
}
|
||||
if strings.HasPrefix(strings.TrimSpace(content), "{") {
|
||||
var nested map[string]any
|
||||
if err := json.Unmarshal([]byte(content), &nested); err == nil {
|
||||
if name := extractToolChoiceFunctionName(nested); name != "" {
|
||||
input.FunctionCall = map[string]any{"name": name}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
input.FunctionCall = content
|
||||
case map[string]any:
|
||||
dat, _ := json.Marshal(content)
|
||||
_ = json.Unmarshal(dat, &toolChoice)
|
||||
}
|
||||
input.FunctionCall = map[string]any{
|
||||
"name": toolChoice.Function.Name,
|
||||
if name := extractToolChoiceFunctionName(content); name != "" {
|
||||
input.FunctionCall = map[string]any{"name": name}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -306,3 +306,248 @@ var _ = Describe("MergeOpenResponsesConfig tool_choice parsing", func() {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SetModelAndConfig + SetOpenAIRequest - /v1/chat/completions tool_choice parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Parallel to the MergeOpenResponsesConfig specs above, but for the chat
|
||||
// completions path. The parsing block lives in mergeOpenAIRequestAndModelConfig
|
||||
// (called from SetOpenAIRequest), so these tests drive the full middleware
|
||||
// chain the way the production /v1/chat/completions route does.
|
||||
//
|
||||
// What we assert per shape:
|
||||
// - "required" -> ShouldUseFunctions=true, no specific name
|
||||
// - "none" -> ShouldUseFunctions=false (tools disabled)
|
||||
// - "auto" -> ShouldUseFunctions=true, no specific name
|
||||
// - {type:function, function:{name:"X"}} (spec) -> ShouldCallSpecificFunction=true, FunctionToCall="X"
|
||||
// - {type:function, name:"X"} (legacy) -> ShouldCallSpecificFunction=true, FunctionToCall="X"
|
||||
// - nested+flat both present -> nested wins
|
||||
// - malformed (no type / no name) -> no-op
|
||||
var _ = Describe("SetModelAndConfig tool_choice parsing (chat completions)", func() {
|
||||
var (
|
||||
app *echo.Echo
|
||||
modelDir string
|
||||
capturedConfig *config.ModelConfig
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
modelDir, err = os.MkdirTemp("", "localai-test-models-*")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
cfgContent := []byte("name: test-model\nbackend: llama-cpp\n")
|
||||
Expect(os.WriteFile(filepath.Join(modelDir, "test-model.yaml"), cfgContent, 0644)).To(Succeed())
|
||||
|
||||
ss := &system.SystemState{
|
||||
Model: system.Model{ModelsPath: modelDir},
|
||||
}
|
||||
appConfig := config.NewApplicationConfig()
|
||||
appConfig.SystemState = ss
|
||||
|
||||
mcl := config.NewModelConfigLoader(modelDir)
|
||||
ml := model.NewModelLoader(ss)
|
||||
re := NewRequestExtractor(mcl, ml, appConfig)
|
||||
|
||||
capturedConfig = nil
|
||||
app = echo.New()
|
||||
app.POST("/v1/chat/completions",
|
||||
func(c echo.Context) error {
|
||||
if cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig); ok {
|
||||
capturedConfig = cfg
|
||||
}
|
||||
return c.String(http.StatusOK, "ok")
|
||||
},
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
if err := re.SetOpenAIRequest(c); err != nil {
|
||||
return err
|
||||
}
|
||||
return next(c)
|
||||
}
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
_ = os.RemoveAll(modelDir)
|
||||
})
|
||||
|
||||
// chatReq wraps a tool_choice JSON fragment in a minimal valid chat-completions
|
||||
// payload. The tools array is non-empty so downstream code paths that gate on
|
||||
// len(input.Functions) see something to work with.
|
||||
chatReq := func(toolChoiceJSON string) string {
|
||||
return `{"model":"test-model",` +
|
||||
`"messages":[{"role":"user","content":"hi"}],` +
|
||||
`"tools":[{"type":"function","function":{"name":"get_weather"}}],` +
|
||||
`"tool_choice":` + toolChoiceJSON + `}`
|
||||
}
|
||||
|
||||
Context("string tool_choice", func() {
|
||||
It("engages mode for tool_choice=\"required\"", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"required"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
Expect(capturedConfig.ShouldUseFunctions()).To(BeTrue())
|
||||
})
|
||||
|
||||
It("disables tools for tool_choice=\"none\"", func() {
|
||||
// Before #9559 this was a silent no-op (json.Unmarshal of "none"
|
||||
// into functions.Tool failed); now "none" is honored per OpenAI spec.
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"none"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldUseFunctions()).To(BeFalse())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("leaves config untouched for tool_choice=\"auto\"", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(`"auto"`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
// "auto" is the default: tools available, model decides.
|
||||
Expect(capturedConfig.ShouldUseFunctions()).To(BeTrue())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal(""))
|
||||
})
|
||||
})
|
||||
|
||||
Context("specific-function tool_choice (OpenAI spec shape)", func() {
|
||||
It("parses {type:function, function:{name:...}} and forces the named function", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"function","function":{"name":"get_weather"}}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
// Key invariant: a correctly-formed OpenAI tool_choice must engage
|
||||
// grammar-based forcing via SetFunctionCallNameString.
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeTrue())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal("get_weather"))
|
||||
})
|
||||
|
||||
It("prefers the nested function.name over a stray top-level name", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"function","function":{"name":"correct_name"},"name":"legacy_name"}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal("correct_name"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("specific-function tool_choice (legacy Anthropic-compat shape)", func() {
|
||||
It("parses {type:function, name:...} and forces the named function", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"function","name":"get_weather"}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeTrue())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal("get_weather"))
|
||||
})
|
||||
})
|
||||
|
||||
// Some non-spec clients send the object form serialized as a JSON string.
|
||||
// The pre-#9559 code accepted that by accident; this Context locks in
|
||||
// continued tolerance so those clients do not silently regress.
|
||||
Context("double-encoded tool_choice (JSON string of an object, non-spec)", func() {
|
||||
It("parses a serialized OpenAI-spec nested object", func() {
|
||||
// tool_choice value is itself a JSON-encoded string containing the
|
||||
// object form. Use json.Marshal of the inner blob so the escapes
|
||||
// are correct regardless of the test reader.
|
||||
inner := `{"type":"function","function":{"name":"get_weather"}}`
|
||||
encoded, err := json.Marshal(inner)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(string(encoded)))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeTrue())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal("get_weather"))
|
||||
})
|
||||
|
||||
It("parses a serialized legacy/Anthropic flat object", func() {
|
||||
inner := `{"type":"function","name":"get_weather"}`
|
||||
encoded, err := json.Marshal(inner)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(string(encoded)))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeTrue())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal("get_weather"))
|
||||
})
|
||||
|
||||
It("falls back to mode-string handling when the JSON string parses but has no usable name", func() {
|
||||
// A JSON-string that decodes to a map without a function name
|
||||
// should not engage specific-function forcing. We expect it to
|
||||
// fall through to the mode-string path; the resulting mode is
|
||||
// the raw blob (nonsense), but ShouldCallSpecificFunction stays
|
||||
// false - the invariant that matters.
|
||||
inner := `{"type":"function"}`
|
||||
encoded, err := json.Marshal(inner)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
rec := postJSON(app, "/v1/chat/completions", chatReq(string(encoded)))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
Context("malformed tool_choice", func() {
|
||||
It("is a no-op when type is missing", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"function":{"name":"get_weather"}}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("is a no-op when type is not \"function\"", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"object","function":{"name":"get_weather"}}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("is a no-op when name is missing from both shapes", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"function","function":{}}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal(""))
|
||||
})
|
||||
|
||||
It("is a no-op when name is empty string", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
chatReq(`{"type":"function","function":{"name":""}}`))
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
Context("nil tool_choice", func() {
|
||||
It("is a no-op", func() {
|
||||
rec := postJSON(app, "/v1/chat/completions",
|
||||
`{"model":"test-model","messages":[{"role":"user","content":"hi"}]}`)
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(capturedConfig).ToNot(BeNil())
|
||||
Expect(capturedConfig.ShouldCallSpecificFunction()).To(BeFalse())
|
||||
Expect(capturedConfig.FunctionToCall()).To(Equal(""))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
51
core/http/react-ui/package-lock.json
generated
51
core/http/react-ui/package-lock.json
generated
@@ -16,6 +16,8 @@
|
||||
"@codemirror/search": "^6.5.10",
|
||||
"@codemirror/state": "^6.5.2",
|
||||
"@codemirror/view": "^6.36.8",
|
||||
"@fontsource-variable/geist": "^5.2.8",
|
||||
"@fontsource-variable/geist-mono": "^5.2.7",
|
||||
"@fortawesome/fontawesome-free": "^6.7.2",
|
||||
"@lezer/highlight": "^1.2.1",
|
||||
"@modelcontextprotocol/ext-apps": "^1.2.2",
|
||||
@@ -965,6 +967,24 @@
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@fontsource-variable/geist": {
|
||||
"version": "5.2.8",
|
||||
"resolved": "https://registry.npmjs.org/@fontsource-variable/geist/-/geist-5.2.8.tgz",
|
||||
"integrity": "sha512-cJ6m9e+8MQ5dCYJsLylfZrgBh6KkG4bOLckB35Tr9J/EqdkEM6QllH5PxqP1dhTvFup+HtMRPuz9xOjxXJggxw==",
|
||||
"license": "OFL-1.1",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ayuhito"
|
||||
}
|
||||
},
|
||||
"node_modules/@fontsource-variable/geist-mono": {
|
||||
"version": "5.2.7",
|
||||
"resolved": "https://registry.npmjs.org/@fontsource-variable/geist-mono/-/geist-mono-5.2.7.tgz",
|
||||
"integrity": "sha512-ZKlZ5sjtalb2TwXKs400mAGDlt/+2ENLNySPx0wTz3bP3mWARCsUW+rpxzZc7e05d2qGch70pItt3K4qttbIYA==",
|
||||
"license": "OFL-1.1",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ayuhito"
|
||||
}
|
||||
},
|
||||
"node_modules/@fortawesome/fontawesome-free": {
|
||||
"version": "6.7.2",
|
||||
"resolved": "https://registry.npmjs.org/@fortawesome/fontawesome-free/-/fontawesome-free-6.7.2.tgz",
|
||||
@@ -2903,11 +2923,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/express-rate-limit": {
|
||||
"version": "8.3.1",
|
||||
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz",
|
||||
"integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==",
|
||||
"version": "8.5.1",
|
||||
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.1.tgz",
|
||||
"integrity": "sha512-5O6KYmyJEpuPJV5hNTXKbAHWRqrzyu+OI3vUnSd2kXFubIVpG7ezpgxQy76Zo5GQZtrQBg86hF+CM/NX+cioiQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ip-address": "10.1.0"
|
||||
"ip-address": "^10.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 16"
|
||||
@@ -2951,9 +2972,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/fast-uri": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
|
||||
"integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
|
||||
"integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
@@ -2963,7 +2984,8 @@
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/fastify"
|
||||
}
|
||||
]
|
||||
],
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/fastq": {
|
||||
"version": "1.20.1",
|
||||
@@ -3421,9 +3443,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/hono": {
|
||||
"version": "4.12.14",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz",
|
||||
"integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==",
|
||||
"version": "4.12.18",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.18.tgz",
|
||||
"integrity": "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=16.9.0"
|
||||
@@ -3681,9 +3703,10 @@
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
|
||||
},
|
||||
"node_modules/ip-address": {
|
||||
"version": "10.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
|
||||
"integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
|
||||
"version": "10.2.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz",
|
||||
"integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
"diarization": "Diarization",
|
||||
"soundGen": "Sound",
|
||||
"audioTransform": "Audio FX",
|
||||
"realtimeAudio": "Realtime Audio",
|
||||
"embedding": "Embeddings",
|
||||
"rerank": "Rerank",
|
||||
"detection": "Detection",
|
||||
|
||||
@@ -732,6 +732,9 @@ export default function FineTune() {
|
||||
const [seed, setSeed] = useState(0)
|
||||
const [mixedPrecision, setMixedPrecision] = useState('')
|
||||
const [extraOptions, setExtraOptions] = useState([])
|
||||
// liquid-audio specific knobs (folded into extra_options on submit)
|
||||
const [liquidAudioVoice, setLiquidAudioVoice] = useState('')
|
||||
const [liquidAudioValDataset, setLiquidAudioValDataset] = useState('')
|
||||
const [hfToken, setHfToken] = useState('')
|
||||
const [showAdvanced, setShowAdvanced] = useState(false)
|
||||
const [resumeFromCheckpoint, setResumeFromCheckpoint] = useState('')
|
||||
@@ -801,6 +804,12 @@ export default function FineTune() {
|
||||
for (const { key, value } of extraOptions) {
|
||||
if (key.trim()) extra[key.trim()] = value
|
||||
}
|
||||
// Fold liquid-audio specific fields into extra_options. The Python
|
||||
// backend reads `voice` and `val_dataset` directly from there.
|
||||
if (backend === 'liquid-audio') {
|
||||
if (liquidAudioVoice) extra.voice = liquidAudioVoice
|
||||
if (liquidAudioValDataset.trim()) extra.val_dataset = liquidAudioValDataset.trim()
|
||||
}
|
||||
|
||||
const isAdapter = ['lora', 'loha', 'lokr'].includes(trainingType)
|
||||
|
||||
@@ -872,6 +881,10 @@ export default function FineTune() {
|
||||
for (const { key, value } of extraOptions) {
|
||||
if (key.trim()) extra[key.trim()] = value
|
||||
}
|
||||
if (backend === 'liquid-audio') {
|
||||
if (liquidAudioVoice) extra.voice = liquidAudioVoice
|
||||
if (liquidAudioValDataset.trim()) extra.val_dataset = liquidAudioValDataset.trim()
|
||||
}
|
||||
return {
|
||||
model,
|
||||
backend,
|
||||
@@ -965,10 +978,15 @@ export default function FineTune() {
|
||||
setSaveTotalLimit(Number(config.extra_options.save_total_limit))
|
||||
}
|
||||
|
||||
// Restore liquid-audio specific extras (also filtered out of the
|
||||
// freeform list below).
|
||||
if (config.extra_options?.voice != null) setLiquidAudioVoice(String(config.extra_options.voice))
|
||||
if (config.extra_options?.val_dataset != null) setLiquidAudioValDataset(String(config.extra_options.val_dataset))
|
||||
|
||||
// Convert extra_options object to [{key, value}] entries, filtering out handled keys
|
||||
if (config.extra_options && typeof config.extra_options === 'object') {
|
||||
const entries = Object.entries(config.extra_options)
|
||||
.filter(([k]) => !['max_seq_length', 'save_total_limit', 'hf_token', 'eval_strategy', 'eval_steps', 'eval_split', 'eval_dataset_source', 'eval_split_ratio'].includes(k))
|
||||
.filter(([k]) => !['max_seq_length', 'save_total_limit', 'hf_token', 'eval_strategy', 'eval_steps', 'eval_split', 'eval_dataset_source', 'eval_split_ratio', 'voice', 'val_dataset'].includes(k))
|
||||
.map(([key, value]) => ({ key, value: String(value) }))
|
||||
setExtraOptions(entries)
|
||||
}
|
||||
@@ -1458,6 +1476,31 @@ export default function FineTune() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{backend === 'liquid-audio' && (
|
||||
<div style={{ marginBottom: 'var(--spacing-md)' }}>
|
||||
<label className="form-label">Liquid Audio</label>
|
||||
<div style={{ fontSize: '0.8125rem', color: 'var(--color-text-muted)', marginBottom: 'var(--spacing-sm)' }}>
|
||||
Dataset must be preprocessed by <code>LFM2AudioChatMapper</code> (a directory of LFM2DataLoader-ready arrow files). See <code>liquid_audio/examples/preprocess_jenny_tts.py</code> for the conversion recipe.
|
||||
</div>
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(220px, 1fr))', gap: 'var(--spacing-sm)' }}>
|
||||
<div>
|
||||
<label className="form-label">TTS Voice (optional)</label>
|
||||
<select value={liquidAudioVoice} onChange={e => setLiquidAudioVoice(e.target.value)} className="input">
|
||||
<option value="">— inherit from system prompt —</option>
|
||||
<option value="us_male">us_male</option>
|
||||
<option value="us_female">us_female</option>
|
||||
<option value="uk_male">uk_male</option>
|
||||
<option value="uk_female">uk_female</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label className="form-label">Validation Dataset (path)</label>
|
||||
<input type="text" value={liquidAudioValDataset} onChange={e => setLiquidAudioValDataset(e.target.value)} placeholder="e.g. /data/jenny_tts/val" className="input" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<label className="form-label">Extra Options (backend-specific key-value pairs)</label>
|
||||
<KeyValueEditor entries={extraOptions} onChange={setExtraOptions} />
|
||||
|
||||
@@ -28,6 +28,7 @@ const FILTERS = [
|
||||
{ key: 'diarization', labelKey: 'filters.diarization', icon: 'fa-users' },
|
||||
{ key: 'sound_generation', labelKey: 'filters.soundGen', icon: 'fa-music' },
|
||||
{ key: 'audio_transform', labelKey: 'filters.audioTransform', icon: 'fa-sliders' },
|
||||
{ key: 'realtime_audio', labelKey: 'filters.realtimeAudio', icon: 'fa-tower-broadcast' },
|
||||
{ key: 'embeddings', labelKey: 'filters.embedding', icon: 'fa-vector-square' },
|
||||
{ key: 'rerank', labelKey: 'filters.rerank', icon: 'fa-sort' },
|
||||
{ key: 'detection', labelKey: 'filters.detection', icon: 'fa-bullseye' },
|
||||
|
||||
@@ -2,6 +2,10 @@ import { useState, useRef, useEffect, useCallback, useMemo } from 'react'
|
||||
import { useOutletContext, useNavigate } from 'react-router-dom'
|
||||
import { realtimeApi } from '../utils/api'
|
||||
import ModelSelector from '../components/ModelSelector'
|
||||
import ClientMCPDropdown from '../components/ClientMCPDropdown'
|
||||
import { useMCPClient } from '../hooks/useMCPClient'
|
||||
import { loadClientMCPServers } from '../utils/mcpClientStorage'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
|
||||
const STATUS_STYLES = {
|
||||
disconnected: { icon: 'fa-solid fa-circle', color: 'var(--color-text-secondary)', bg: 'transparent' },
|
||||
@@ -40,6 +44,27 @@ export default function Talk() {
|
||||
const [voiceEdited, setVoiceEdited] = useState(false)
|
||||
const [language, setLanguage] = useState('')
|
||||
|
||||
// Client MCP — mirrors the chat page's wiring (useMCPClient + ClientMCPDropdown).
|
||||
// Talk has a single ephemeral session, so the active server set lives in component
|
||||
// state rather than per-chat config.
|
||||
const [clientMCPServers, setClientMCPServers] = useState(() => loadClientMCPServers())
|
||||
const [activeMCPIds, setActiveMCPIds] = useState([])
|
||||
const {
|
||||
connect: mcpConnect,
|
||||
disconnect: mcpDisconnect,
|
||||
getToolsForLLM,
|
||||
isClientTool,
|
||||
executeTool,
|
||||
connectionStatuses,
|
||||
getConnectedTools,
|
||||
} = useMCPClient()
|
||||
|
||||
// LocalAI Assistant ("Manage Mode") — mirrors the chat-page toggle.
|
||||
// Admin-only; the realtime endpoint enforces the gate too. When on, the
|
||||
// backend mounts the in-process MCP admin tool surface for this session.
|
||||
const { isAdmin } = useAuth()
|
||||
const [manageMode, setManageMode] = useState(false)
|
||||
|
||||
// Diagnostics
|
||||
const [diagVisible, setDiagVisible] = useState(false)
|
||||
|
||||
@@ -75,7 +100,7 @@ export default function Talk() {
|
||||
if (!voiceEdited) setVoice(models[0].voice || '')
|
||||
}
|
||||
})
|
||||
.catch(err => addToast(`Failed to load pipeline models: ${err.message}`, 'error', 5000, { link: { href: '/app/traces?tab=backend', text: 'View traces' } }))
|
||||
.catch(err => addToast(`Failed to load realtime models: ${err.message}`, 'error', 5000, { link: { href: '/app/traces?tab=backend', text: 'View traces' } }))
|
||||
.finally(() => setModelsLoading(false))
|
||||
}, [])
|
||||
|
||||
@@ -84,6 +109,32 @@ export default function Talk() {
|
||||
transcriptEndRef.current?.scrollIntoView({ behavior: 'smooth' })
|
||||
}, [transcript])
|
||||
|
||||
// Mirror Chat.jsx: connect / disconnect client MCP servers as the user toggles them.
|
||||
useEffect(() => {
|
||||
const activeSet = new Set(activeMCPIds)
|
||||
for (const server of clientMCPServers) {
|
||||
const status = connectionStatuses[server.id]?.status
|
||||
if (activeSet.has(server.id) && status !== 'connected' && status !== 'connecting') {
|
||||
mcpConnect(server)
|
||||
} else if (!activeSet.has(server.id) && (status === 'connected' || status === 'connecting')) {
|
||||
mcpDisconnect(server.id)
|
||||
}
|
||||
}
|
||||
}, [activeMCPIds.join(','), clientMCPServers, connectionStatuses, mcpConnect, mcpDisconnect])
|
||||
|
||||
const handleClientMCPToggle = useCallback((serverId) => {
|
||||
setActiveMCPIds(prev => prev.includes(serverId) ? prev.filter(s => s !== serverId) : [...prev, serverId])
|
||||
}, [])
|
||||
const handleClientMCPServerAdded = useCallback((server) => {
|
||||
setClientMCPServers(loadClientMCPServers())
|
||||
setActiveMCPIds(prev => prev.includes(server.id) ? prev : [...prev, server.id])
|
||||
}, [])
|
||||
const handleClientMCPServerRemoved = useCallback(async (id) => {
|
||||
await mcpDisconnect(id)
|
||||
setClientMCPServers(loadClientMCPServers())
|
||||
setActiveMCPIds(prev => prev.filter(s => s !== id))
|
||||
}, [mcpDisconnect])
|
||||
|
||||
const selectedModelInfo = pipelineModels.find(m => m.name === selectedModel)
|
||||
|
||||
// ── Status helper ──
|
||||
@@ -96,7 +147,9 @@ export default function Talk() {
|
||||
const sendSessionUpdate = useCallback(() => {
|
||||
const dc = dcRef.current
|
||||
if (!dc || dc.readyState !== 'open') return
|
||||
if (!instructions.trim() && !voice.trim() && !language.trim()) return
|
||||
|
||||
const tools = getToolsForLLM()
|
||||
if (!instructions.trim() && !voice.trim() && !language.trim() && tools.length === 0) return
|
||||
|
||||
const session = {}
|
||||
if (instructions.trim()) session.instructions = instructions.trim()
|
||||
@@ -105,9 +158,57 @@ export default function Talk() {
|
||||
if (voice.trim()) session.audio.output = { voice: voice.trim() }
|
||||
if (language.trim()) session.audio.input = { transcription: { language: language.trim() } }
|
||||
}
|
||||
// Pass MCP-server-advertised tools straight through. Server-side they
|
||||
// get rendered into the model's prompt via the function:/argument_regex
|
||||
// pair on the model config (gallery/lfm.yaml for LFM2.5-Audio).
|
||||
if (tools.length > 0) session.tools = tools
|
||||
|
||||
dc.send(JSON.stringify({ type: 'session.update', session }))
|
||||
}, [instructions, voice, language])
|
||||
}, [instructions, voice, language, getToolsForLLM])
|
||||
|
||||
// Re-send session.update whenever the tool set changes mid-session so the
|
||||
// model sees newly-toggled MCP servers without a reconnect.
|
||||
useEffect(() => {
|
||||
if (isConnected) sendSessionUpdate()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [activeMCPIds.join(',')])
|
||||
|
||||
// ── Function-call dispatcher ──
|
||||
// Mirrors the chat-page agentic loop: collect args from the model's
|
||||
// function_call_arguments.done event, hand them to the MCP client's
|
||||
// executeTool, then echo the result back via conversation.item.create +
|
||||
// response.create so the model can complete its turn with the tool output.
|
||||
const handleFunctionCall = useCallback(async (event) => {
|
||||
const dc = dcRef.current
|
||||
if (!dc || dc.readyState !== 'open') return
|
||||
const { call_id: callId, name, arguments: argsJson } = event
|
||||
if (!callId || !name) return
|
||||
if (!isClientTool(name)) {
|
||||
// No MCP server advertises this tool — let the model know so it can
|
||||
// recover instead of hanging.
|
||||
dc.send(JSON.stringify({
|
||||
type: 'conversation.item.create',
|
||||
item: { type: 'function_call_output', call_id: callId, output: `Error: unknown tool "${name}"` },
|
||||
}))
|
||||
dc.send(JSON.stringify({ type: 'response.create' }))
|
||||
return
|
||||
}
|
||||
updateStatus('thinking', `Running tool ${name}...`)
|
||||
try {
|
||||
const result = await executeTool(name, argsJson)
|
||||
dc.send(JSON.stringify({
|
||||
type: 'conversation.item.create',
|
||||
item: { type: 'function_call_output', call_id: callId, output: typeof result === 'string' ? result : JSON.stringify(result) },
|
||||
}))
|
||||
dc.send(JSON.stringify({ type: 'response.create' }))
|
||||
} catch (err) {
|
||||
dc.send(JSON.stringify({
|
||||
type: 'conversation.item.create',
|
||||
item: { type: 'function_call_output', call_id: callId, output: `Error: ${err?.message || err}` },
|
||||
}))
|
||||
dc.send(JSON.stringify({ type: 'response.create' }))
|
||||
}
|
||||
}, [executeTool, isClientTool, updateStatus])
|
||||
|
||||
// ── Server event handler ──
|
||||
const handleServerEvent = useCallback((event) => {
|
||||
@@ -163,6 +264,32 @@ export default function Talk() {
|
||||
case 'response.output_audio.delta':
|
||||
updateStatus('speaking', 'Speaking...')
|
||||
break
|
||||
case 'response.output_item.done': {
|
||||
// Server-executed tools (Manage Mode) surface as output items —
|
||||
// FunctionCall when the model invokes a tool, FunctionCallOutput
|
||||
// once the server has run it. Render both on `done` so we get
|
||||
// each transcript entry exactly once.
|
||||
const item = event.item
|
||||
if (!item) break
|
||||
if (item.FunctionCall) {
|
||||
setTranscript(prev => [...prev, {
|
||||
role: 'tool_call',
|
||||
text: `${item.FunctionCall.name}(${item.FunctionCall.arguments || ''})`,
|
||||
}])
|
||||
} else if (item.FunctionCallOutput) {
|
||||
let preview = item.FunctionCallOutput.output || ''
|
||||
// Pretty-print JSON for readability; fall back to raw string.
|
||||
try { preview = JSON.stringify(JSON.parse(preview), null, 2) } catch (_) { /* keep raw */ }
|
||||
setTranscript(prev => [...prev, { role: 'tool_result', text: preview }])
|
||||
streamingRef.current = null // tool result ends the current assistant text run
|
||||
}
|
||||
break
|
||||
}
|
||||
case 'response.function_call_arguments.done':
|
||||
// Don't await — keep the event loop free; handleFunctionCall sends
|
||||
// conversation.item.create + response.create when it's done.
|
||||
handleFunctionCall(event)
|
||||
break
|
||||
case 'response.done':
|
||||
updateStatus('listening', 'Listening...')
|
||||
break
|
||||
@@ -171,12 +298,12 @@ export default function Talk() {
|
||||
updateStatus('error', 'Error: ' + (event.error?.message || 'Unknown error'))
|
||||
break
|
||||
}
|
||||
}, [sendSessionUpdate, updateStatus])
|
||||
}, [sendSessionUpdate, updateStatus, handleFunctionCall])
|
||||
|
||||
// ── Connect ──
|
||||
const connect = useCallback(async () => {
|
||||
if (!selectedModel) {
|
||||
addToast('Please select a pipeline model first.', 'warning')
|
||||
addToast('Please select a realtime model first.', 'warning')
|
||||
return
|
||||
}
|
||||
if (!navigator.mediaDevices?.getUserMedia) {
|
||||
@@ -237,6 +364,7 @@ export default function Talk() {
|
||||
const data = await realtimeApi.call({
|
||||
sdp: pc.localDescription.sdp,
|
||||
model: selectedModel,
|
||||
localai_assistant: manageMode,
|
||||
})
|
||||
|
||||
await pc.setRemoteDescription({ type: 'answer', sdp: data.sdp })
|
||||
@@ -245,7 +373,7 @@ export default function Talk() {
|
||||
updateStatus('error', 'Connection failed: ' + err.message)
|
||||
disconnect()
|
||||
}
|
||||
}, [selectedModel, diagVisible, handleServerEvent, updateStatus, addToast])
|
||||
}, [selectedModel, manageMode, diagVisible, handleServerEvent, updateStatus, addToast])
|
||||
|
||||
// ── Disconnect ──
|
||||
const disconnect = useCallback(() => {
|
||||
@@ -508,8 +636,58 @@ export default function Talk() {
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Tools (client-side MCP servers, mirroring the chat page) */}
|
||||
<div style={{ marginBottom: 'var(--spacing-md)' }}>
|
||||
<label className="form-label" style={{ fontSize: '0.8125rem' }}>
|
||||
<i className="fas fa-screwdriver-wrench" style={{ color: 'var(--color-primary)', marginRight: 4 }} /> Tools
|
||||
</label>
|
||||
<ClientMCPDropdown
|
||||
activeServerIds={activeMCPIds}
|
||||
onToggleServer={handleClientMCPToggle}
|
||||
onServerAdded={handleClientMCPServerAdded}
|
||||
onServerRemoved={handleClientMCPServerRemoved}
|
||||
connectionStatuses={connectionStatuses}
|
||||
getConnectedTools={getConnectedTools}
|
||||
/>
|
||||
{isAdmin && (
|
||||
<label style={{
|
||||
display: 'flex', alignItems: 'center', gap: 'var(--spacing-xs)',
|
||||
marginTop: 'var(--spacing-xs)', fontSize: '0.8125rem',
|
||||
cursor: isConnected ? 'not-allowed' : 'pointer',
|
||||
color: isConnected ? 'var(--color-text-secondary)' : 'var(--color-text)',
|
||||
}}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={manageMode}
|
||||
disabled={isConnected}
|
||||
onChange={(e) => setManageMode(e.target.checked)}
|
||||
/>
|
||||
<i className="fas fa-user-shield" style={{ color: 'var(--color-primary)' }} />
|
||||
Manage Mode
|
||||
<span style={{ color: 'var(--color-text-secondary)', fontSize: '0.75rem' }}>
|
||||
— let the model query LocalAI (models, backends, system info)
|
||||
</span>
|
||||
</label>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Pipeline details */}
|
||||
{selectedModelInfo && (
|
||||
{selectedModelInfo && selectedModelInfo.self_contained && (
|
||||
<div style={{
|
||||
background: 'var(--color-bg-secondary)', borderRadius: 'var(--radius-sm)',
|
||||
padding: 'var(--spacing-xs) var(--spacing-sm)', border: '1px solid var(--color-border)',
|
||||
marginBottom: 'var(--spacing-xs)', fontSize: '0.75rem',
|
||||
display: 'flex', alignItems: 'center', gap: 'var(--spacing-xs)',
|
||||
}}>
|
||||
<i className="fas fa-tower-broadcast" style={{ color: 'var(--color-primary)' }} />
|
||||
<span style={{ color: 'var(--color-text-secondary)' }}>Self-contained any-to-any —</span>
|
||||
<span style={{ fontFamily: 'var(--font-mono)', overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
|
||||
{selectedModelInfo.name}
|
||||
</span>
|
||||
<span style={{ color: 'var(--color-text-secondary)', marginLeft: 'auto' }}>handles VAD · STT · LLM · TTS</span>
|
||||
</div>
|
||||
)}
|
||||
{selectedModelInfo && !selectedModelInfo.self_contained && (
|
||||
<div style={{
|
||||
display: 'grid', gridTemplateColumns: 'repeat(4, 1fr)', gap: 'var(--spacing-xs)',
|
||||
marginBottom: 'var(--spacing-xs)', fontSize: '0.75rem',
|
||||
@@ -533,7 +711,8 @@ export default function Talk() {
|
||||
{selectedModelInfo && !isConnected && (
|
||||
<div style={{ marginBottom: 'var(--spacing-md)' }}>
|
||||
<button className="btn btn-secondary btn-sm" onClick={() => navigate(`/app/model-editor/${encodeURIComponent(selectedModel)}`)}>
|
||||
<i className="fas fa-pen-to-square" style={{ marginRight: 'var(--spacing-xs)' }} /> Edit Pipeline
|
||||
<i className="fas fa-pen-to-square" style={{ marginRight: 'var(--spacing-xs)' }} />
|
||||
{selectedModelInfo.self_contained ? ' Edit Model Config' : ' Edit Pipeline'}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
@@ -600,16 +779,28 @@ export default function Talk() {
|
||||
Conversation will appear here...
|
||||
</p>
|
||||
)}
|
||||
{transcript.map((entry, i) => (
|
||||
<div key={i} style={{ display: 'flex', alignItems: 'flex-start', gap: 'var(--spacing-xs)' }}>
|
||||
<i className={entry.role === 'user' ? 'fa-solid fa-user' : 'fa-solid fa-robot'}
|
||||
style={{
|
||||
color: entry.role === 'user' ? 'var(--color-primary)' : 'var(--color-accent)',
|
||||
marginTop: 3, flexShrink: 0, fontSize: '0.75rem',
|
||||
}} />
|
||||
<p style={{ margin: 0 }}>{entry.text}</p>
|
||||
</div>
|
||||
))}
|
||||
{transcript.map((entry, i) => {
|
||||
const isToolCall = entry.role === 'tool_call'
|
||||
const isToolResult = entry.role === 'tool_result'
|
||||
const isUser = entry.role === 'user'
|
||||
const iconClass = isToolCall ? 'fa-solid fa-screwdriver-wrench'
|
||||
: isToolResult ? 'fa-solid fa-clipboard-list'
|
||||
: isUser ? 'fa-solid fa-user' : 'fa-solid fa-robot'
|
||||
const iconColor = isToolCall || isToolResult ? 'var(--color-text-secondary)'
|
||||
: isUser ? 'var(--color-primary)' : 'var(--color-accent)'
|
||||
return (
|
||||
<div key={i} style={{ display: 'flex', alignItems: 'flex-start', gap: 'var(--spacing-xs)' }}>
|
||||
<i className={iconClass} style={{ color: iconColor, marginTop: 3, flexShrink: 0, fontSize: '0.75rem' }} />
|
||||
<p style={{
|
||||
margin: 0,
|
||||
fontFamily: (isToolCall || isToolResult) ? 'var(--font-mono)' : undefined,
|
||||
fontSize: (isToolCall || isToolResult) ? '0.8125rem' : undefined,
|
||||
color: (isToolCall || isToolResult) ? 'var(--color-text-secondary)' : undefined,
|
||||
whiteSpace: isToolResult ? 'pre-wrap' : undefined,
|
||||
}}>{entry.text}</p>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
<div ref={transcriptEndRef} />
|
||||
</div>
|
||||
|
||||
|
||||
1
core/http/react-ui/src/utils/capabilities.js
vendored
1
core/http/react-ui/src/utils/capabilities.js
vendored
@@ -20,3 +20,4 @@ export const CAP_DETECTION = 'FLAG_DETECTION'
|
||||
export const CAP_FACE_RECOGNITION = 'FLAG_FACE_RECOGNITION'
|
||||
export const CAP_SPEAKER_RECOGNITION = 'FLAG_SPEAKER_RECOGNITION'
|
||||
export const CAP_AUDIO_TRANSFORM = 'FLAG_AUDIO_TRANSFORM'
|
||||
export const CAP_REALTIME_AUDIO = 'FLAG_REALTIME_AUDIO'
|
||||
|
||||
@@ -18,7 +18,11 @@ func RegisterUIRoutes(app *echo.Echo,
|
||||
// SPA routes are handled by the 404 fallback in app.go which serves
|
||||
// index.html for any unmatched HTML request, enabling client-side routing.
|
||||
|
||||
// Pipeline models API (for the Talk page WebRTC interface)
|
||||
// Pipeline models API (for the Talk page WebRTC interface).
|
||||
// A model qualifies when it either declares an explicit VAD+STT+LLM+TTS
|
||||
// pipeline (legacy/composed) or carries the realtime_audio usecase (a
|
||||
// self-contained any-to-any audio backend like liquid-audio that owns the
|
||||
// full loop in a single AudioToAudioStream RPC).
|
||||
app.GET("/api/pipeline-models", func(c echo.Context) error {
|
||||
type pipelineModelInfo struct {
|
||||
Name string `json:"name"`
|
||||
@@ -27,9 +31,17 @@ func RegisterUIRoutes(app *echo.Echo,
|
||||
LLM string `json:"llm"`
|
||||
TTS string `json:"tts"`
|
||||
Voice string `json:"voice"`
|
||||
// SelfContained is true for any-to-any audio models — the four
|
||||
// pipeline slots are populated with the model's own name so the
|
||||
// UI can render them, but the Realtime API routes the session
|
||||
// directly to the backend's AudioToAudioStream RPC.
|
||||
SelfContained bool `json:"self_contained,omitempty"`
|
||||
}
|
||||
|
||||
pipelineModels := cl.GetModelConfigsByFilter(func(_ string, cfg *config.ModelConfig) bool {
|
||||
if cfg.HasUsecases(config.FLAG_REALTIME_AUDIO) {
|
||||
return true
|
||||
}
|
||||
p := cfg.Pipeline
|
||||
return p.VAD != "" && p.Transcription != "" && p.LLM != "" && p.TTS != ""
|
||||
})
|
||||
@@ -38,8 +50,20 @@ func RegisterUIRoutes(app *echo.Echo,
|
||||
return cmp.Compare(a.Name, b.Name)
|
||||
})
|
||||
|
||||
var models []pipelineModelInfo
|
||||
models := make([]pipelineModelInfo, 0, len(pipelineModels))
|
||||
for _, cfg := range pipelineModels {
|
||||
if cfg.HasUsecases(config.FLAG_REALTIME_AUDIO) {
|
||||
models = append(models, pipelineModelInfo{
|
||||
Name: cfg.Name,
|
||||
VAD: cfg.Name,
|
||||
Transcription: cfg.Name,
|
||||
LLM: cfg.Name,
|
||||
TTS: cfg.Name,
|
||||
Voice: cfg.TTSConfig.Voice,
|
||||
SelfContained: true,
|
||||
})
|
||||
continue
|
||||
}
|
||||
models = append(models, pipelineModelInfo{
|
||||
Name: cfg.Name,
|
||||
VAD: cfg.Pipeline.VAD,
|
||||
|
||||
@@ -54,6 +54,7 @@ var usecaseFilters = map[string]config.ModelConfigUsecase{
|
||||
config.UsecaseVAD: config.FLAG_VAD,
|
||||
config.UsecaseAudioTransform: config.FLAG_AUDIO_TRANSFORM,
|
||||
config.UsecaseDiarization: config.FLAG_DIARIZATION,
|
||||
config.UsecaseRealtimeAudio: config.FLAG_REALTIME_AUDIO,
|
||||
}
|
||||
|
||||
|
||||
|
||||
153
core/http/routes/ui_pipeline_models_test.go
Normal file
153
core/http/routes/ui_pipeline_models_test.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package routes_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/routes"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Pipeline models API", func() {
|
||||
var (
|
||||
app *echo.Echo
|
||||
tempDir string
|
||||
configLoader *config.ModelConfigLoader
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tempDir, err = os.MkdirTemp("", "pipeline-models-test-*")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
configLoader = config.NewModelConfigLoader(tempDir)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
Expect(os.RemoveAll(tempDir)).To(Succeed())
|
||||
})
|
||||
|
||||
writeConfig := func(name, body string) {
|
||||
path := filepath.Join(tempDir, name+".yaml")
|
||||
Expect(os.WriteFile(path, []byte(body), 0o644)).To(Succeed())
|
||||
}
|
||||
|
||||
queryPipelineModels := func() []map[string]any {
|
||||
Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||
|
||||
app = echo.New()
|
||||
routes.RegisterUIRoutes(app, configLoader, nil, nil, func(next echo.HandlerFunc) echo.HandlerFunc { return next })
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/pipeline-models", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
body, err := io.ReadAll(rec.Body)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
var got []map[string]any
|
||||
Expect(json.Unmarshal(body, &got)).To(Succeed())
|
||||
return got
|
||||
}
|
||||
|
||||
It("returns models with an explicit VAD/STT/LLM/TTS pipeline", func() {
|
||||
writeConfig("legacy-pipeline", `
|
||||
name: legacy-pipeline
|
||||
backend: llama-cpp
|
||||
pipeline:
|
||||
vad: silero
|
||||
transcription: whisper
|
||||
llm: llama
|
||||
tts: piper
|
||||
tts:
|
||||
voice: en-amy
|
||||
`)
|
||||
// A model with a partial pipeline must not appear.
|
||||
writeConfig("half-pipeline", `
|
||||
name: half-pipeline
|
||||
backend: llama-cpp
|
||||
pipeline:
|
||||
vad: silero
|
||||
transcription: whisper
|
||||
`)
|
||||
|
||||
models := queryPipelineModels()
|
||||
Expect(models).To(HaveLen(1))
|
||||
Expect(models[0]["name"]).To(Equal("legacy-pipeline"))
|
||||
Expect(models[0]["vad"]).To(Equal("silero"))
|
||||
Expect(models[0]["llm"]).To(Equal("llama"))
|
||||
Expect(models[0]["voice"]).To(Equal("en-amy"))
|
||||
// self_contained is omitempty — absent for legacy pipelines.
|
||||
_, hasFlag := models[0]["self_contained"]
|
||||
Expect(hasFlag).To(BeFalse())
|
||||
})
|
||||
|
||||
It("surfaces self-contained any-to-any models tagged with realtime_audio", func() {
|
||||
writeConfig("lfm-realtime", `
|
||||
name: lfm-realtime
|
||||
backend: liquid-audio
|
||||
known_usecases:
|
||||
- realtime_audio
|
||||
- chat
|
||||
- tts
|
||||
- transcript
|
||||
tts:
|
||||
voice: us_female
|
||||
`)
|
||||
|
||||
models := queryPipelineModels()
|
||||
Expect(models).To(HaveLen(1))
|
||||
Expect(models[0]["name"]).To(Equal("lfm-realtime"))
|
||||
// All four pipeline slots are populated with the model's own name so
|
||||
// the Talk page UI has something to render.
|
||||
Expect(models[0]["vad"]).To(Equal("lfm-realtime"))
|
||||
Expect(models[0]["transcription"]).To(Equal("lfm-realtime"))
|
||||
Expect(models[0]["llm"]).To(Equal("lfm-realtime"))
|
||||
Expect(models[0]["tts"]).To(Equal("lfm-realtime"))
|
||||
Expect(models[0]["voice"]).To(Equal("us_female"))
|
||||
Expect(models[0]["self_contained"]).To(BeTrue())
|
||||
})
|
||||
|
||||
It("includes both legacy and self-contained models in the same response", func() {
|
||||
writeConfig("legacy", `
|
||||
name: legacy
|
||||
backend: llama-cpp
|
||||
pipeline:
|
||||
vad: silero
|
||||
transcription: whisper
|
||||
llm: llama
|
||||
tts: piper
|
||||
`)
|
||||
writeConfig("realtime", `
|
||||
name: realtime
|
||||
backend: liquid-audio
|
||||
known_usecases:
|
||||
- realtime_audio
|
||||
`)
|
||||
|
||||
models := queryPipelineModels()
|
||||
Expect(models).To(HaveLen(2))
|
||||
// Sorted by name → legacy, realtime.
|
||||
Expect(models[0]["name"]).To(Equal("legacy"))
|
||||
Expect(models[1]["name"]).To(Equal("realtime"))
|
||||
Expect(models[1]["self_contained"]).To(BeTrue())
|
||||
})
|
||||
|
||||
It("excludes models that have neither a pipeline nor realtime_audio", func() {
|
||||
writeConfig("plain-chat", `
|
||||
name: plain-chat
|
||||
backend: llama-cpp
|
||||
known_usecases:
|
||||
- chat
|
||||
`)
|
||||
|
||||
Expect(queryPipelineModels()).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
@@ -120,10 +120,14 @@ type OllamaGenerateResponse struct {
|
||||
EvalDuration int64 `json:"eval_duration,omitempty"`
|
||||
}
|
||||
|
||||
// OllamaEmbedRequest represents a request to the Ollama Embed API
|
||||
// OllamaEmbedRequest represents a request to the Ollama Embed API.
|
||||
// Ollama's /api/embed endpoint accepts both `input` and `prompt` as the
|
||||
// input string value (see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings),
|
||||
// so both keys are deserialized here for client compatibility.
|
||||
type OllamaEmbedRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input any `json:"input"` // string or []string
|
||||
Model string `json:"model"`
|
||||
Input any `json:"input,omitempty"` // string or []string
|
||||
Prompt any `json:"prompt,omitempty"` // string or []string (Ollama alias for Input)
|
||||
Options *OllamaOptions `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
@@ -135,10 +139,21 @@ func (r *OllamaEmbedRequest) ModelName(s *string) string {
|
||||
return r.Model
|
||||
}
|
||||
|
||||
// GetInputStrings normalizes the Input field to a string slice
|
||||
// GetInputStrings normalizes the Input/Prompt field to a string slice.
|
||||
// Input takes precedence over Prompt when both are provided.
|
||||
func (r *OllamaEmbedRequest) GetInputStrings() []string {
|
||||
switch v := r.Input.(type) {
|
||||
if v := normalizeOllamaEmbedInput(r.Input); v != nil {
|
||||
return v
|
||||
}
|
||||
return normalizeOllamaEmbedInput(r.Prompt)
|
||||
}
|
||||
|
||||
func normalizeOllamaEmbedInput(v any) []string {
|
||||
switch v := v.(type) {
|
||||
case string:
|
||||
if v == "" {
|
||||
return nil
|
||||
}
|
||||
return []string{v}
|
||||
case []any:
|
||||
var result []string
|
||||
|
||||
86
core/schema/ollama_test.go
Normal file
86
core/schema/ollama_test.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package schema_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
. "github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("OllamaEmbedRequest", func() {
|
||||
|
||||
Context("GetInputStrings", func() {
|
||||
It("returns a single string when Input is a string", func() {
|
||||
req := OllamaEmbedRequest{Input: "hello world"}
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"hello world"}))
|
||||
})
|
||||
|
||||
It("returns a list of strings when Input is a []string", func() {
|
||||
req := OllamaEmbedRequest{Input: []string{"hello", "world"}}
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"hello", "world"}))
|
||||
})
|
||||
|
||||
It("returns a list of strings when Input is a []any (post JSON unmarshal)", func() {
|
||||
req := OllamaEmbedRequest{Input: []any{"hello", "world"}}
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"hello", "world"}))
|
||||
})
|
||||
})
|
||||
|
||||
Context("JSON unmarshaling (Ollama API compatibility)", func() {
|
||||
It("accepts the 'input' field as a single string", func() {
|
||||
body := []byte(`{"model": "m", "input": "why is the sky blue?"}`)
|
||||
|
||||
var req OllamaEmbedRequest
|
||||
Expect(json.Unmarshal(body, &req)).To(Succeed())
|
||||
|
||||
Expect(req.Model).To(Equal("m"))
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?"}))
|
||||
})
|
||||
|
||||
It("accepts the 'input' field as an array of strings", func() {
|
||||
body := []byte(`{"model": "m", "input": ["why is the sky blue?", "why is the grass green?"]}`)
|
||||
|
||||
var req OllamaEmbedRequest
|
||||
Expect(json.Unmarshal(body, &req)).To(Succeed())
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?", "why is the grass green?"}))
|
||||
})
|
||||
|
||||
// Ollama's embedding endpoint accepts both `input` and `prompt` keys:
|
||||
// https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings
|
||||
// LocalAI must accept `prompt` so client libraries using that key are not broken.
|
||||
// See https://github.com/mudler/LocalAI/issues/9767.
|
||||
It("accepts the 'prompt' field as a single string (Ollama compatibility)", func() {
|
||||
body := []byte(`{"model": "m", "prompt": "why is the sky blue?"}`)
|
||||
|
||||
var req OllamaEmbedRequest
|
||||
Expect(json.Unmarshal(body, &req)).To(Succeed())
|
||||
|
||||
Expect(req.Model).To(Equal("m"))
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?"}))
|
||||
})
|
||||
|
||||
It("accepts the 'prompt' field as an array of strings (Ollama compatibility)", func() {
|
||||
body := []byte(`{"model": "m", "prompt": ["why is the sky blue?", "why is the grass green?"]}`)
|
||||
|
||||
var req OllamaEmbedRequest
|
||||
Expect(json.Unmarshal(body, &req)).To(Succeed())
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?", "why is the grass green?"}))
|
||||
})
|
||||
|
||||
It("prefers 'input' when both 'input' and 'prompt' are provided", func() {
|
||||
body := []byte(`{"model": "m", "input": "from input", "prompt": "from prompt"}`)
|
||||
|
||||
var req OllamaEmbedRequest
|
||||
Expect(json.Unmarshal(body, &req)).To(Succeed())
|
||||
|
||||
Expect(req.GetInputStrings()).To(Equal([]string{"from input"}))
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
@@ -46,8 +45,6 @@ type AgentJobService struct {
|
||||
tasks *xsync.SyncedMap[string, schema.Task]
|
||||
jobs *xsync.SyncedMap[string, schema.Job]
|
||||
persister JobPersister
|
||||
tasksFile string // Path to agent_tasks.json (kept for backward compat)
|
||||
jobsFile string // Path to agent_jobs.json (kept for backward compat)
|
||||
userID string // Scoping: empty for global (main service), set for per-user instances
|
||||
|
||||
// Job execution channel
|
||||
@@ -70,9 +67,6 @@ type AgentJobService struct {
|
||||
// Service lifecycle
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
// Mutex for file operations
|
||||
fileMutex sync.Mutex
|
||||
}
|
||||
|
||||
// DistributedDispatcher is the interface for distributed job dispatching via NATS.
|
||||
@@ -220,8 +214,6 @@ func NewAgentJobServiceWithPaths(
|
||||
tasksFile: tasksFile,
|
||||
jobsFile: jobsFile,
|
||||
},
|
||||
tasksFile: tasksFile,
|
||||
jobsFile: jobsFile,
|
||||
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
||||
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
||||
cronScheduler: cron.New(), // Support seconds in cron
|
||||
@@ -230,127 +222,51 @@ func NewAgentJobServiceWithPaths(
|
||||
}
|
||||
}
|
||||
|
||||
// LoadTasksFromFile loads tasks from agent_tasks.json
|
||||
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
||||
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
||||
// mode it loads from the database.
|
||||
func (s *AgentJobService) LoadTasksFromFile() error {
|
||||
if s.tasksFile == "" {
|
||||
return nil // No file path configured
|
||||
}
|
||||
|
||||
s.fileMutex.Lock()
|
||||
defer s.fileMutex.Unlock()
|
||||
|
||||
if _, err := os.Stat(s.tasksFile); os.IsNotExist(err) {
|
||||
xlog.Debug("agent_tasks.json not found, starting with empty tasks")
|
||||
return nil
|
||||
}
|
||||
|
||||
fileContent, err := os.ReadFile(s.tasksFile)
|
||||
tasks, err := s.persister.LoadTasks(s.userID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tasks file: %w", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var tasksFile schema.TasksFile
|
||||
if err := json.Unmarshal(fileContent, &tasksFile); err != nil {
|
||||
return fmt.Errorf("failed to parse tasks file: %w", err)
|
||||
}
|
||||
|
||||
for _, task := range tasksFile.Tasks {
|
||||
for _, task := range tasks {
|
||||
s.tasks.Set(task.ID, task)
|
||||
// Schedule cron if enabled and has cron expression
|
||||
if task.Enabled && task.Cron != "" {
|
||||
if err := s.ScheduleCronTask(task); err != nil {
|
||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xlog.Info("Loaded tasks from file", "count", len(tasksFile.Tasks))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveTasksToFile saves tasks to agent_tasks.json
|
||||
// SaveTasksToFile flushes the current tasks map via the persister. File
|
||||
// persister bulk-writes the JSON file atomically; DB persister no-ops
|
||||
// because per-task SaveTask calls already wrote through.
|
||||
func (s *AgentJobService) SaveTasksToFile() error {
|
||||
if s.tasksFile == "" {
|
||||
return nil // No file path configured
|
||||
}
|
||||
|
||||
s.fileMutex.Lock()
|
||||
defer s.fileMutex.Unlock()
|
||||
|
||||
tasksFile := schema.TasksFile{
|
||||
Tasks: s.tasks.Values(),
|
||||
}
|
||||
|
||||
fileContent, err := json.MarshalIndent(tasksFile, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal tasks: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(s.tasksFile, fileContent, 0600); err != nil {
|
||||
return fmt.Errorf("failed to write tasks file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
return s.persister.FlushTasks()
|
||||
}
|
||||
|
||||
// LoadJobsFromFile loads jobs from agent_jobs.json
|
||||
// LoadJobsFromFile loads jobs from the persister into the in-memory map.
|
||||
// Named "FromFile" for backward compat; in DB mode it loads from the
|
||||
// database.
|
||||
func (s *AgentJobService) LoadJobsFromFile() error {
|
||||
if s.jobsFile == "" {
|
||||
return nil // No file path configured
|
||||
}
|
||||
|
||||
s.fileMutex.Lock()
|
||||
defer s.fileMutex.Unlock()
|
||||
|
||||
if _, err := os.Stat(s.jobsFile); os.IsNotExist(err) {
|
||||
xlog.Debug("agent_jobs.json not found, starting with empty jobs")
|
||||
return nil
|
||||
}
|
||||
|
||||
fileContent, err := os.ReadFile(s.jobsFile)
|
||||
jobs, err := s.persister.LoadJobs(s.userID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read jobs file: %w", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var jobsFile schema.JobsFile
|
||||
if err := json.Unmarshal(fileContent, &jobsFile); err != nil {
|
||||
return fmt.Errorf("failed to parse jobs file: %w", err)
|
||||
}
|
||||
|
||||
// Load jobs into memory
|
||||
for _, job := range jobsFile.Jobs {
|
||||
for _, job := range jobs {
|
||||
s.jobs.Set(job.ID, job)
|
||||
}
|
||||
|
||||
xlog.Info("Loaded jobs from file", "count", len(jobsFile.Jobs))
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveJobsToFile saves jobs to agent_jobs.json
|
||||
// SaveJobsToFile flushes the current jobs map via the persister. File
|
||||
// persister bulk-writes the JSON file atomically; DB persister no-ops
|
||||
// because per-job SaveJob calls already wrote through.
|
||||
func (s *AgentJobService) SaveJobsToFile() error {
|
||||
if s.jobsFile == "" {
|
||||
return nil // No file path configured
|
||||
}
|
||||
|
||||
s.fileMutex.Lock()
|
||||
defer s.fileMutex.Unlock()
|
||||
|
||||
jobsFile := schema.JobsFile{
|
||||
Jobs: s.jobs.Values(),
|
||||
LastCleanup: time.Now(),
|
||||
}
|
||||
|
||||
fileContent, err := json.MarshalIndent(jobsFile, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal jobs: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(s.jobsFile, fileContent, 0600); err != nil {
|
||||
return fmt.Errorf("failed to write jobs file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
return s.persister.FlushJobs()
|
||||
}
|
||||
|
||||
// CreateTask creates a new task
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
@@ -281,6 +282,71 @@ var _ = Describe("AgentJobService", func() {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(retrieved.TaskID).To(Equal(taskID))
|
||||
})
|
||||
|
||||
It("does not surface a partial file when saves and loads race", func() {
|
||||
// Regression for the macOS-only CI flake where a concurrent
|
||||
// LoadJobsFromFile landed between os.WriteFile's open(O_TRUNC)
|
||||
// and write, yielding "unexpected end of JSON input" at offset 0.
|
||||
// Atomic temp+rename in the persister eliminates the window.
|
||||
task := schema.Task{
|
||||
Name: "Race Task",
|
||||
Model: "test-model",
|
||||
Prompt: "Test prompt",
|
||||
Enabled: true,
|
||||
}
|
||||
|
||||
taskID, err := service.CreateTask(task)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
_, err = service.ExecuteJob(taskID, map[string]string{}, "test", nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(service.SaveJobsToFile()).To(Succeed())
|
||||
|
||||
newService := agentpool.NewAgentJobService(
|
||||
appConfig,
|
||||
modelLoader,
|
||||
configLoader,
|
||||
evaluator,
|
||||
)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
deadline := time.Now().Add(500 * time.Millisecond)
|
||||
readerErrs := make(chan error, 1024)
|
||||
|
||||
for range 4 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for time.Now().Before(deadline) {
|
||||
_ = service.SaveJobsToFile()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for range 4 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for time.Now().Before(deadline) {
|
||||
if err := newService.LoadJobsFromFile(); err != nil {
|
||||
readerErrs <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
close(readerErrs)
|
||||
|
||||
var firstErr error
|
||||
for err := range readerErrs {
|
||||
if firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
Expect(firstErr).NotTo(HaveOccurred(), "concurrent load saw a partial/empty file")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Prompt templating", func() {
|
||||
|
||||
@@ -16,6 +16,12 @@ type JobPersister interface {
|
||||
SaveJob(userID string, job schema.Job) error
|
||||
DeleteJob(jobID string) error
|
||||
|
||||
// Bulk flush of the current in-memory state. File-backed persister
|
||||
// rewrites the whole JSON file; DB-backed persister no-ops because
|
||||
// SaveTask/SaveJob are already write-through.
|
||||
FlushTasks() error
|
||||
FlushJobs() error
|
||||
|
||||
// Authoritative reads — DB returns fresh data; file returns nil, nil
|
||||
GetJob(jobID string) (*schema.Job, error)
|
||||
ListJobs(userID, taskID, status string, limit int) ([]schema.Job, error)
|
||||
|
||||
@@ -32,6 +32,12 @@ func (p *dbJobPersister) DeleteJob(jobID string) error {
|
||||
return p.store.DeleteJob(jobID)
|
||||
}
|
||||
|
||||
// FlushTasks is a no-op: SaveTask already writes through to the DB.
|
||||
func (p *dbJobPersister) FlushTasks() error { return nil }
|
||||
|
||||
// FlushJobs is a no-op: SaveJob already writes through to the DB.
|
||||
func (p *dbJobPersister) FlushJobs() error { return nil }
|
||||
|
||||
func (p *dbJobPersister) GetJob(jobID string) (*schema.Job, error) {
|
||||
rec, err := p.store.GetJob(jobID)
|
||||
if err != nil {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -41,6 +42,14 @@ func (p *fileJobPersister) DeleteJob(_ string) error {
|
||||
return p.saveJobsToFile()
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) FlushTasks() error {
|
||||
return p.saveTasksToFile()
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) FlushJobs() error {
|
||||
return p.saveJobsToFile()
|
||||
}
|
||||
|
||||
// GetJob returns nil — file persister has no authoritative reads.
|
||||
func (p *fileJobPersister) GetJob(_ string) (*schema.Job, error) {
|
||||
return nil, nil
|
||||
@@ -127,7 +136,7 @@ func (p *fileJobPersister) saveTasksToFile() error {
|
||||
return fmt.Errorf("failed to marshal tasks: %w", err)
|
||||
}
|
||||
|
||||
return os.WriteFile(p.tasksFile, data, 0600)
|
||||
return writeFileAtomic(p.tasksFile, data, 0600)
|
||||
}
|
||||
|
||||
// saveJobsToFile serializes the entire jobs map to the JSON file.
|
||||
@@ -149,5 +158,45 @@ func (p *fileJobPersister) saveJobsToFile() error {
|
||||
return fmt.Errorf("failed to marshal jobs: %w", err)
|
||||
}
|
||||
|
||||
return os.WriteFile(p.jobsFile, data, 0600)
|
||||
return writeFileAtomic(p.jobsFile, data, 0600)
|
||||
}
|
||||
|
||||
// writeFileAtomic writes data to path via a same-directory temp file + rename.
|
||||
// os.WriteFile opens with O_TRUNC, so a concurrent reader can land between the
|
||||
// truncate and the write and see an empty file ("unexpected end of JSON input").
|
||||
// rename(2) is atomic on POSIX, so readers see either the prior contents or the
|
||||
// new contents and never a zero-byte window.
|
||||
func writeFileAtomic(path string, data []byte, perm os.FileMode) error {
|
||||
dir := filepath.Dir(path)
|
||||
tmp, err := os.CreateTemp(dir, filepath.Base(path)+".tmp-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
tmpPath := tmp.Name()
|
||||
removeTmp := func() { _ = os.Remove(tmpPath) }
|
||||
|
||||
if _, err := tmp.Write(data); err != nil {
|
||||
_ = tmp.Close()
|
||||
removeTmp()
|
||||
return fmt.Errorf("failed to write temp file: %w", err)
|
||||
}
|
||||
if err := tmp.Chmod(perm); err != nil {
|
||||
_ = tmp.Close()
|
||||
removeTmp()
|
||||
return fmt.Errorf("failed to chmod temp file: %w", err)
|
||||
}
|
||||
if err := tmp.Sync(); err != nil {
|
||||
_ = tmp.Close()
|
||||
removeTmp()
|
||||
return fmt.Errorf("failed to sync temp file: %w", err)
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
removeTmp()
|
||||
return fmt.Errorf("failed to close temp file: %w", err)
|
||||
}
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
removeTmp()
|
||||
return fmt.Errorf("failed to rename temp file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -12,6 +12,24 @@ import (
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// perModelMissThreshold is the number of consecutive failed gRPC probes
|
||||
// against a model's backend before the model is removed from the registry.
|
||||
// A single failure can be transient (network blip, brief GC pause on the
|
||||
// worker, a long-running request hogging the gRPC server thread); requiring
|
||||
// N consecutive misses avoids deleting healthy rows over noise. At the
|
||||
// default 15s tick this means a model has to be unreachable for ~45s before
|
||||
// it gets reaped.
|
||||
const perModelMissThreshold = 3
|
||||
|
||||
// modelKey identifies a specific (node, model, replica) tuple. We track miss
|
||||
// counts per tuple because the same model name can be loaded on multiple
|
||||
// replicas on the same node.
|
||||
type modelKey struct {
|
||||
NodeID string
|
||||
ModelName string
|
||||
ReplicaIndex int
|
||||
}
|
||||
|
||||
// HealthMonitor periodically checks the health of registered backend nodes.
|
||||
type HealthMonitor struct {
|
||||
registry NodeHealthStore
|
||||
@@ -21,6 +39,8 @@ type HealthMonitor struct {
|
||||
autoOffline bool // mark stale nodes as offline (preserves approval status)
|
||||
clientFactory BackendClientFactory // creates gRPC backend clients
|
||||
perModelHealthCheck bool // check each model's backend process individually
|
||||
missesMu sync.Mutex
|
||||
misses map[modelKey]int // consecutive failed-probe counts; reset on success or model removal
|
||||
cancel context.CancelFunc
|
||||
cancelMu sync.Mutex
|
||||
}
|
||||
@@ -46,6 +66,7 @@ func NewHealthMonitor(registry NodeHealthStore, db *gorm.DB, checkInterval, stal
|
||||
autoOffline: true,
|
||||
clientFactory: factory,
|
||||
perModelHealthCheck: perModelHealthCheck,
|
||||
misses: make(map[modelKey]int),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,9 +173,11 @@ func (hm *HealthMonitor) doCheckAll(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Per-model backend health check (opt-in): probe each model's gRPC address
|
||||
// and remove stale model records. This does NOT affect the node's status —
|
||||
// a crashed backend process is a model-level issue, not a node-level one.
|
||||
// Per-model backend health check: probe each model's gRPC address and
|
||||
// remove stale model records. This does NOT affect the node's status —
|
||||
// a crashed backend process is a model-level issue, not a node-level
|
||||
// one. A model is only removed after perModelMissThreshold consecutive
|
||||
// failed probes so a single network/GC blip doesn't force a reload.
|
||||
if hm.perModelHealthCheck {
|
||||
models, _ := hm.registry.GetNodeModels(ctx, node.ID)
|
||||
for _, m := range models {
|
||||
@@ -163,15 +186,43 @@ func (hm *HealthMonitor) doCheckAll(ctx context.Context) {
|
||||
}
|
||||
mClient := hm.clientFactory.NewClient(m.Address, false)
|
||||
mCheckCtx, mCancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
if ok, _ := mClient.HealthCheck(mCheckCtx); !ok {
|
||||
xlog.Warn("Model backend unhealthy, removing from registry",
|
||||
"node", node.ID, "model", m.ModelName, "replica", m.ReplicaIndex, "address", m.Address)
|
||||
hm.registry.RemoveNodeModel(ctx, node.ID, m.ModelName, m.ReplicaIndex)
|
||||
}
|
||||
ok, _ := mClient.HealthCheck(mCheckCtx)
|
||||
mCancel()
|
||||
if closer, ok := mClient.(io.Closer); ok {
|
||||
closer.Close()
|
||||
}
|
||||
|
||||
key := modelKey{NodeID: node.ID, ModelName: m.ModelName, ReplicaIndex: m.ReplicaIndex}
|
||||
hm.missesMu.Lock()
|
||||
if ok {
|
||||
// Probe succeeded — wipe any previous miss streak.
|
||||
delete(hm.misses, key)
|
||||
hm.missesMu.Unlock()
|
||||
continue
|
||||
}
|
||||
hm.misses[key]++
|
||||
misses := hm.misses[key]
|
||||
hm.missesMu.Unlock()
|
||||
|
||||
if misses < perModelMissThreshold {
|
||||
xlog.Debug("Model backend probe failed, awaiting threshold before removal",
|
||||
"node", node.ID, "model", m.ModelName, "replica", m.ReplicaIndex,
|
||||
"address", m.Address, "misses", misses, "threshold", perModelMissThreshold)
|
||||
continue
|
||||
}
|
||||
xlog.Warn("Model backend unhealthy after consecutive misses, removing from registry",
|
||||
"node", node.ID, "model", m.ModelName, "replica", m.ReplicaIndex,
|
||||
"address", m.Address, "misses", misses)
|
||||
if err := hm.registry.RemoveNodeModel(ctx, node.ID, m.ModelName, m.ReplicaIndex); err != nil {
|
||||
xlog.Warn("Failed to remove unhealthy model from registry",
|
||||
"node", node.ID, "model", m.ModelName, "replica", m.ReplicaIndex, "error", err)
|
||||
// Leave the miss counter in place so the next tick retries
|
||||
// the removal rather than starting the streak over.
|
||||
continue
|
||||
}
|
||||
hm.missesMu.Lock()
|
||||
delete(hm.misses, key)
|
||||
hm.missesMu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,6 +232,9 @@ func (c *fakeBackendClient) AudioTransform(_ context.Context, _ *pb.AudioTransfo
|
||||
func (c *fakeBackendClient) AudioTransformStream(_ context.Context, _ ...ggrpc.CallOption) (grpc.AudioTransformStreamClient, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (c *fakeBackendClient) AudioToAudioStream(_ context.Context, _ ...ggrpc.CallOption) (grpc.AudioToAudioStreamClient, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (c *fakeBackendClient) ModelMetadata(_ context.Context, _ *pb.ModelOptions, _ ...ggrpc.CallOption) (*pb.ModelMetadataResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -321,6 +324,7 @@ func newTestHealthMonitor(store NodeHealthStore, factory BackendClientFactory, a
|
||||
staleThreshold: staleThreshold,
|
||||
autoOffline: autoOffline,
|
||||
clientFactory: factory,
|
||||
misses: make(map[modelKey]int),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -255,7 +255,7 @@ var _ = Describe("HealthMonitor (mock-based)", func() {
|
||||
Expect(calls).NotTo(ContainElement(ContainSubstring("MarkUnhealthy")))
|
||||
})
|
||||
|
||||
It("removes stale model via per-model health check without affecting node status", func() {
|
||||
It("removes stale model via per-model health check after consecutive failures", func() {
|
||||
store := newFakeNodeHealthStore()
|
||||
factory := newFakeBackendClientFactory()
|
||||
hm := newTestHealthMonitor(store, factory, true, staleThreshold)
|
||||
@@ -268,6 +268,15 @@ var _ = Describe("HealthMonitor (mock-based)", func() {
|
||||
// Model backend is dead
|
||||
factory.setClient("10.0.0.10:50053", &fakeBackendClient{healthy: false, err: fmt.Errorf("connection refused")})
|
||||
|
||||
// First (perModelMissThreshold-1) probes must NOT remove the row —
|
||||
// a single failure could be a transient blip.
|
||||
for i := 0; i < perModelMissThreshold-1; i++ {
|
||||
hm.doCheckAll(context.Background())
|
||||
Expect(store.getCalls()).NotTo(ContainElement(ContainSubstring("RemoveNodeModel")),
|
||||
"removed too early at miss %d", i+1)
|
||||
}
|
||||
|
||||
// Threshold-th consecutive miss triggers removal.
|
||||
hm.doCheckAll(context.Background())
|
||||
|
||||
// Node should remain healthy — only the specific replica record is removed.
|
||||
@@ -275,5 +284,36 @@ var _ = Describe("HealthMonitor (mock-based)", func() {
|
||||
Expect(store.getCalls()).To(ContainElement("RemoveNodeModel:node-model:piper-model:0"))
|
||||
Expect(store.getCalls()).NotTo(ContainElement(ContainSubstring("MarkUnhealthy")))
|
||||
})
|
||||
|
||||
It("preserves model row when an intermittent failure is followed by a success", func() {
|
||||
store := newFakeNodeHealthStore()
|
||||
factory := newFakeBackendClientFactory()
|
||||
hm := newTestHealthMonitor(store, factory, true, staleThreshold)
|
||||
hm.perModelHealthCheck = true
|
||||
|
||||
node := makeTestNode("node-flap", "flap-worker", "10.0.0.11:50051", StatusHealthy, freshTime())
|
||||
store.addNode(node)
|
||||
store.addNodeModel("node-flap", NodeModel{NodeID: "node-flap", ModelName: "piper-model", Address: "10.0.0.11:50053"})
|
||||
|
||||
deadClient := &fakeBackendClient{healthy: false, err: fmt.Errorf("connection refused")}
|
||||
liveClient := &fakeBackendClient{healthy: true}
|
||||
|
||||
// Two failing probes then a recovery — should NOT remove the row,
|
||||
// and should reset the miss counter so two more failures don't tip
|
||||
// it over.
|
||||
factory.setClient("10.0.0.11:50053", deadClient)
|
||||
hm.doCheckAll(context.Background())
|
||||
hm.doCheckAll(context.Background())
|
||||
factory.setClient("10.0.0.11:50053", liveClient)
|
||||
hm.doCheckAll(context.Background())
|
||||
|
||||
Expect(store.getCalls()).NotTo(ContainElement(ContainSubstring("RemoveNodeModel")))
|
||||
|
||||
// Counter is reset; two more failures must not be enough to remove.
|
||||
factory.setClient("10.0.0.11:50053", deadClient)
|
||||
hm.doCheckAll(context.Background())
|
||||
hm.doCheckAll(context.Background())
|
||||
Expect(store.getCalls()).NotTo(ContainElement(ContainSubstring("RemoveNodeModel")))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -176,6 +176,10 @@ func (f *fakeGRPCBackend) AudioTransformStream(_ context.Context, _ ...ggrpc.Cal
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (f *fakeGRPCBackend) AudioToAudioStream(_ context.Context, _ ...ggrpc.CallOption) (grpc.AudioToAudioStreamClient, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (f *fakeGRPCBackend) ModelMetadata(_ context.Context, _ *pb.ModelOptions, _ ...ggrpc.CallOption) (*pb.ModelMetadataResponse, error) {
|
||||
return &pb.ModelMetadataResponse{}, nil
|
||||
}
|
||||
|
||||
@@ -546,7 +546,13 @@ func (r *NodeRegistry) GetByName(ctx context.Context, name string) (*BackendNode
|
||||
return &node, nil
|
||||
}
|
||||
|
||||
// MarkUnhealthy sets a node status to unhealthy.
|
||||
// MarkUnhealthy sets a node status to unhealthy. Deliberately status-only:
|
||||
// callers fire this on transient triggers (a single nats.ErrNoResponders from
|
||||
// managers_distributed / reconciler) where the next heartbeat is expected to
|
||||
// flip the node back to healthy, and cascade-deleting node_models here would
|
||||
// force a full model reload on every brief NATS hiccup. Stale rows are reaped
|
||||
// by the per-model health probe (on by default; see HealthMonitor) and by
|
||||
// MarkOffline when the heartbeat really has gone away.
|
||||
func (r *NodeRegistry) MarkUnhealthy(ctx context.Context, nodeID string) error {
|
||||
return r.setStatus(ctx, nodeID, StatusUnhealthy)
|
||||
}
|
||||
@@ -556,9 +562,23 @@ func (r *NodeRegistry) MarkHealthy(ctx context.Context, nodeID string) error {
|
||||
return r.setStatus(ctx, nodeID, StatusHealthy)
|
||||
}
|
||||
|
||||
// MarkDraining sets a node status to draining (no new requests).
|
||||
// MarkDraining sets a node status to draining (no new requests) and clears its
|
||||
// model records. Routing already filters out non-healthy nodes, so removing
|
||||
// the rows on drain doesn't change new-request behavior — but it does stop the
|
||||
// Models UI from showing the node's models as "running" while the box has been
|
||||
// taken out of rotation, and it prevents stale rows from being selected if
|
||||
// (re)scheduling logic gets relaxed elsewhere. In-flight requests already hold
|
||||
// their gRPC client through Route() and will finish normally; the only
|
||||
// observable effect is that the per-call IncrementInFlight bookkeeping logs a
|
||||
// non-fatal warning, which is acceptable for a drain.
|
||||
func (r *NodeRegistry) MarkDraining(ctx context.Context, nodeID string) error {
|
||||
return r.setStatus(ctx, nodeID, StatusDraining)
|
||||
if err := r.setStatus(ctx, nodeID, StatusDraining); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.db.WithContext(ctx).Where("node_id = ?", nodeID).Delete(&NodeModel{}).Error; err != nil {
|
||||
xlog.Warn("Failed to clear model records on draining", "node", nodeID, "error", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FindStaleNodes returns nodes that haven't sent a heartbeat within the given threshold.
|
||||
@@ -673,9 +693,18 @@ func (r *NodeRegistry) FindAndLockNodeWithModel(ctx context.Context, modelName s
|
||||
// to moderate concurrency where requests don't overlap) collapses to
|
||||
// "biggest GPU wins every time" and one node ends up taking nearly all
|
||||
// the load while replicas on other nodes sit idle.
|
||||
// Filter on backend_nodes.status = healthy in the inner JOIN itself,
|
||||
// not only in the later node-fetch step. The previous version picked
|
||||
// a (node_id, replica) pair purely on node_models state, then bailed
|
||||
// out when the second query couldn't find a healthy node row — but
|
||||
// any concurrent reader of node_models could still pick the same
|
||||
// stale row in the same window, and other helpers that mirror this
|
||||
// JOIN need the same invariant. Belt-and-braces: status filter here
|
||||
// AND the status-checked node fetch below.
|
||||
q := tx.Clauses(clause.Locking{Strength: "UPDATE"}).
|
||||
Joins("JOIN backend_nodes ON backend_nodes.id = node_models.node_id").
|
||||
Where("node_models.model_name = ? AND node_models.state = ?", modelName, "loaded")
|
||||
Where("node_models.model_name = ? AND node_models.state = ? AND backend_nodes.status = ?",
|
||||
modelName, "loaded", StatusHealthy)
|
||||
if len(candidateNodeIDs) > 0 {
|
||||
q = q.Where("node_models.node_id IN ?", candidateNodeIDs)
|
||||
}
|
||||
|
||||
@@ -251,18 +251,68 @@ options:
|
||||
|
||||
These are set via the `options:` array in the model configuration (format: `key:value`):
|
||||
|
||||
**Common options**
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_type` | string | `none` | Speculative decoding type (see table below) |
|
||||
| `spec_type` / `speculative_type` | string | `none` | Speculative decoding type, or comma-separated list to chain multiple (see table below) |
|
||||
| `spec_n_max` / `draft_max` | int | 16 | Maximum number of tokens to draft per step |
|
||||
| `spec_n_min` / `draft_min` | int | 0 | Minimum draft tokens required to use speculation |
|
||||
| `spec_p_min` / `draft_p_min` | float | 0.75 | Minimum probability threshold for greedy acceptance |
|
||||
| `spec_p_split` | float | 0.1 | Split probability for tree-based branching |
|
||||
|
||||
**Draft-model options** (apply when `spec_type=draft`, i.e. a `draft_model` is configured)
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `draft_gpu_layers` | int | -1 | GPU layers for the draft model (-1 = use default) |
|
||||
| `draft_threads` / `spec_draft_threads` | int | same as main | Threads used by the draft model (`<= 0` = hardware concurrency) |
|
||||
| `draft_threads_batch` / `spec_draft_threads_batch` | int | same as `draft_threads` | Threads used by the draft model during batch / prompt processing |
|
||||
| `draft_cache_type_k` / `spec_draft_cache_type_k` | string | `f16` | KV cache K data type for the draft model (same values as `cache_type_k`) |
|
||||
| `draft_cache_type_v` / `spec_draft_cache_type_v` | string | `f16` | KV cache V data type for the draft model |
|
||||
| `draft_cpu_moe` / `spec_draft_cpu_moe` | bool | false | Keep all MoE expert weights of the draft model on CPU |
|
||||
| `draft_n_cpu_moe` / `spec_draft_n_cpu_moe` | int | 0 | Keep MoE expert weights of the first N draft-model layers on CPU |
|
||||
| `draft_override_tensor` / `spec_draft_override_tensor` | string | "" | Comma-separated `<tensor regex>=<buffer type>` overrides for the draft model |
|
||||
| `draft_ctx_size` | int | (ignored) | Deprecated upstream: the draft now shares the target context size. Accepted for backward compatibility but has no effect. |
|
||||
|
||||
**`ngram_simple` options** (used when `spec_type` includes `ngram_simple`)
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_ngram_size_n` / `ngram_size_n` | int | 12 | N-gram lookup size |
|
||||
| `spec_ngram_size_m` / `ngram_size_m` | int | 48 | M-gram proposal size |
|
||||
| `spec_ngram_min_hits` / `ngram_min_hits` | int | 1 | Minimum hits for accepting n-gram proposals |
|
||||
| `draft_gpu_layers` | int | -1 | GPU layers for the draft model (-1 = use default) |
|
||||
| `draft_ctx_size` | int | 0 | Context size for the draft model (0 = auto) |
|
||||
|
||||
**`ngram_mod` options** (used when `spec_type` includes `ngram_mod`)
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_ngram_mod_n_min` | int | 48 | Minimum number of ngram tokens to use |
|
||||
| `spec_ngram_mod_n_max` | int | 64 | Maximum number of ngram tokens to use |
|
||||
| `spec_ngram_mod_n_match` | int | 24 | Ngram lookup length |
|
||||
|
||||
**`ngram_map_k` options** (used when `spec_type` includes `ngram_map_k`)
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_ngram_map_k_size_n` | int | 12 | N-gram lookup size |
|
||||
| `spec_ngram_map_k_size_m` | int | 48 | M-gram proposal size |
|
||||
| `spec_ngram_map_k_min_hits` | int | 1 | Minimum hits for accepting proposals |
|
||||
|
||||
**`ngram_map_k4v` options** (used when `spec_type` includes `ngram_map_k4v`)
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_ngram_map_k4v_size_n` | int | 12 | N-gram lookup size |
|
||||
| `spec_ngram_map_k4v_size_m` | int | 48 | M-gram proposal size |
|
||||
| `spec_ngram_map_k4v_min_hits` | int | 1 | Minimum hits for accepting proposals |
|
||||
|
||||
**`ngram_cache` lookup files**
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `spec_lookup_cache_static` / `lookup_cache_static` | string | "" | Path to a static ngram lookup cache file |
|
||||
| `spec_lookup_cache_dynamic` / `lookup_cache_dynamic` | string | "" | Path to a dynamic ngram lookup cache file (updated by generation) |
|
||||
|
||||
#### Speculative Type Values
|
||||
|
||||
@@ -277,6 +327,8 @@ These are set via the `options:` array in the model configuration (format: `key:
|
||||
| `ngram_mod` | Modified n-gram speculation |
|
||||
| `ngram_cache` | 3-level n-gram cache |
|
||||
|
||||
Multiple types can be chained by passing a comma-separated list to `spec_type` (e.g. `spec_type:ngram_simple,ngram_mod`). The runtime tries them in order and accepts the first proposal that meets the acceptance criteria.
|
||||
|
||||
{{% notice note %}}
|
||||
Speculative decoding is automatically disabled when multimodal models (with `mmproj`) are active. The `n_draft` parameter can also be overridden per-request.
|
||||
{{% /notice %}}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.2.0"
|
||||
"version": "v4.2.3"
|
||||
}
|
||||
|
||||
@@ -3197,6 +3197,110 @@
|
||||
- filename: llama-cpp/models/LFM2.5-1.2B-Nova-Function-Calling.Q4_K_M.gguf
|
||||
sha256: 5d039ad4195447cf4b6dbee8f7fe11f985c01d671a18153084c869077e431fbf
|
||||
uri: https://huggingface.co/NovachronoAI/LFM2.5-1.2B-Nova-Function-Calling-GGUF/resolve/main/LFM2.5-1.2B-Nova-Function-Calling.Q4_K_M.gguf
|
||||
- name: lfm2.5-audio-1.5b-realtime
|
||||
url: github:mudler/LocalAI/gallery/liquid-audio.yaml@master
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
description: |
|
||||
LFM2.5-Audio-1.5B is LiquidAI's any-to-any audio foundation model. The
|
||||
1.2B LFM2.5 backbone plus a FastConformer audio encoder and an LFM2-based
|
||||
audio detokenizer give real-time speech-to-speech with text + audio output
|
||||
interleaved at 12.5 Hz / 24 kHz. This entry runs in S2S (speech-to-speech)
|
||||
mode and is the model the LocalAI realtime API any-to-any path consumes.
|
||||
Switch to ASR, TTS, or chat by picking the sibling gallery entries.
|
||||
license: LFM-Open-License-v1.0
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||
tags:
|
||||
- lfm2
|
||||
- liquid
|
||||
- audio
|
||||
- speech-to-speech
|
||||
- any-to-any
|
||||
- realtime
|
||||
- 1.5b
|
||||
last_checked: "2026-05-11"
|
||||
overrides:
|
||||
backend: liquid-audio
|
||||
# realtime_audio drives the Talk-page filter; the rest let the model
|
||||
# also surface on the chat / transcribe / speech endpoints when called
|
||||
# directly (the backend implements all three RPCs).
|
||||
known_usecases:
|
||||
- realtime_audio
|
||||
- chat
|
||||
- transcript
|
||||
- tts
|
||||
- vad
|
||||
options:
|
||||
- mode:s2s
|
||||
- name: lfm2.5-audio-1.5b-chat
|
||||
url: github:mudler/LocalAI/gallery/liquid-audio.yaml@master
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
description: |
|
||||
LFM2.5-Audio-1.5B in text-only chat mode. The model runs `generate_sequential`
|
||||
with no audio modality, behaving like a small LFM2 chat model. Pick this
|
||||
entry for tool-calling experiments without the audio overhead.
|
||||
license: LFM-Open-License-v1.0
|
||||
tags:
|
||||
- lfm2
|
||||
- liquid
|
||||
- audio
|
||||
- chat
|
||||
- 1.5b
|
||||
last_checked: "2026-05-11"
|
||||
overrides:
|
||||
backend: liquid-audio
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- mode:chat
|
||||
- name: lfm2.5-audio-1.5b-asr
|
||||
url: github:mudler/LocalAI/gallery/liquid-audio.yaml@master
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
description: |
|
||||
LFM2.5-Audio-1.5B in ASR mode. System prompt `Perform ASR.` is prepended;
|
||||
output is capitalised and punctuated. Wire this entry as a transcription
|
||||
model on the /v1/audio/transcriptions endpoint.
|
||||
license: LFM-Open-License-v1.0
|
||||
tags:
|
||||
- lfm2
|
||||
- liquid
|
||||
- audio
|
||||
- asr
|
||||
- speech-to-text
|
||||
- 1.5b
|
||||
last_checked: "2026-05-11"
|
||||
overrides:
|
||||
backend: liquid-audio
|
||||
known_usecases:
|
||||
- transcript
|
||||
options:
|
||||
- mode:asr
|
||||
- name: lfm2.5-audio-1.5b-tts
|
||||
url: github:mudler/LocalAI/gallery/liquid-audio.yaml@master
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
description: |
|
||||
LFM2.5-Audio-1.5B in TTS mode. Four baked voices: us_male, us_female,
|
||||
uk_male, uk_female — pick the default at load time via `voice:` option,
|
||||
or override per-request via the OpenAI `/v1/audio/speech` `voice` field.
|
||||
license: LFM-Open-License-v1.0
|
||||
tags:
|
||||
- lfm2
|
||||
- liquid
|
||||
- audio
|
||||
- tts
|
||||
- text-to-speech
|
||||
- 1.5b
|
||||
last_checked: "2026-05-11"
|
||||
overrides:
|
||||
backend: liquid-audio
|
||||
known_usecases:
|
||||
- tts
|
||||
options:
|
||||
- mode:tts
|
||||
- voice:us_female
|
||||
- name: mistral-nemo-instruct-2407-12b-thinking-m-claude-opus-high-reasoning-i1
|
||||
url: github:mudler/LocalAI/gallery/virtual.yaml@master
|
||||
urls:
|
||||
|
||||
@@ -10,6 +10,16 @@ config_file: |
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
- <|endoftext|>
|
||||
function:
|
||||
# LFM2 Pythonic tool-call syntax: <|tool_call_start|>[name(k="v", ...)]<|tool_call_end|>
|
||||
# Mirrors common_chat_params_init_lfm2 in llama.cpp/common/chat.cpp.
|
||||
response_regex:
|
||||
- '<\|tool_call_start\|>\[(?P<name>\w+)\((?P<arguments>.*?)\)\]<\|tool_call_end\|>'
|
||||
argument_regex:
|
||||
- '(?P<key>\w+)\s*=\s*"(?P<value>[^"]*)"'
|
||||
- '(?P<key>\w+)\s*=\s*(?P<value>-?\d+(?:\.\d+)?|true|false|null)'
|
||||
argument_regex_key_name: key
|
||||
argument_regex_value_name: value
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
|
||||
40
gallery/liquid-audio.yaml
Normal file
40
gallery/liquid-audio.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
name: "liquid-audio"
|
||||
|
||||
description: |
|
||||
LiquidAI LFM2 / LFM2.5 Audio models served by the Python `liquid-audio` backend.
|
||||
Supports four roles via the `mode:` option:
|
||||
- chat text-only chat completion (generate_sequential, no audio)
|
||||
- asr speech-to-text (Perform ASR. system prompt)
|
||||
- tts text-to-speech in 4 baked voices (us_male/us_female/uk_male/uk_female)
|
||||
- s2s interleaved speech-to-speech (the realtime any-to-any path)
|
||||
|
||||
license: "LFM Open License v1.0"
|
||||
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B
|
||||
- https://github.com/Liquid4All/liquid-audio
|
||||
|
||||
config_file: |
|
||||
backend: liquid-audio
|
||||
context_size: 32768
|
||||
f16: true
|
||||
mmap: true
|
||||
# realtime_audio surfaces the model on the Talk page; chat/tts/transcript
|
||||
# let it also serve the standalone /v1/chat/completions, /v1/audio/speech,
|
||||
# and /v1/audio/transcriptions endpoints (backend implements all three).
|
||||
known_usecases:
|
||||
- realtime_audio
|
||||
- chat
|
||||
- tts
|
||||
- transcript
|
||||
- vad
|
||||
parameters:
|
||||
model: LiquidAI/LFM2.5-Audio-1.5B
|
||||
# Special tokens emitted in the text track during interleaved generation.
|
||||
# Included so a future client-side parser can spot them; the LFM2 tool-call
|
||||
# format itself is auto-detected by the upstream llama.cpp parser when the
|
||||
# model loads under that backend.
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <|endoftext|>
|
||||
43
go.mod
43
go.mod
@@ -7,7 +7,7 @@ require (
|
||||
fyne.io/fyne/v2 v2.7.3
|
||||
github.com/Masterminds/sprig/v3 v3.3.0
|
||||
github.com/alecthomas/kong v1.14.0
|
||||
github.com/anthropics/anthropic-sdk-go v1.27.0
|
||||
github.com/anthropics/anthropic-sdk-go v1.42.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.6
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.16
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.15
|
||||
@@ -18,7 +18,7 @@ require (
|
||||
github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8
|
||||
github.com/ebitengine/purego v0.10.0
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||
github.com/fsnotify/fsnotify v1.9.0
|
||||
github.com/fsnotify/fsnotify v1.10.1
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
|
||||
github.com/gofrs/flock v0.13.0
|
||||
@@ -37,14 +37,14 @@ require (
|
||||
github.com/microcosm-cc/bluemonday v1.0.27
|
||||
github.com/modelcontextprotocol/go-sdk v1.5.0
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
|
||||
github.com/mudler/edgevpn v0.31.1
|
||||
github.com/mudler/edgevpn v0.32.2
|
||||
github.com/mudler/go-processmanager v0.1.1
|
||||
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
|
||||
github.com/mudler/xlog v0.0.6
|
||||
github.com/nats-io/nats.go v1.50.0
|
||||
github.com/ollama/ollama v0.20.4
|
||||
github.com/onsi/ginkgo/v2 v2.28.2
|
||||
github.com/onsi/gomega v1.39.1
|
||||
github.com/onsi/gomega v1.40.0
|
||||
github.com/openai/openai-go/v3 v3.26.0
|
||||
github.com/otiai10/copy v1.14.1
|
||||
github.com/otiai10/openaigo v1.7.0
|
||||
@@ -95,7 +95,9 @@ require (
|
||||
github.com/bahlo/generic-list-go v0.2.0 // indirect
|
||||
github.com/buger/jsonparser v1.1.2 // indirect
|
||||
github.com/dunglas/httpsfv v1.1.0 // indirect
|
||||
github.com/filecoin-project/go-clock v0.1.0 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.1.4 // indirect
|
||||
github.com/invopop/jsonschema v0.13.0 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/jolestar/go-commons-pool/v2 v2.1.2 // indirect
|
||||
@@ -105,6 +107,7 @@ require (
|
||||
github.com/moby/moby/client v0.4.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.15 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829 // indirect
|
||||
github.com/stretchr/testify v1.11.1 // indirect
|
||||
github.com/sv-tools/openapi v0.2.1 // indirect
|
||||
github.com/swaggo/swag/v2 v2.0.0-rc4 // indirect
|
||||
@@ -243,7 +246,7 @@ require (
|
||||
github.com/jeandeaual/go-locale v0.0.0-20250612000132-0ef82f21eade // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/jsummers/gobmp v0.0.0-20230614200233-a9de23ed2e25 // indirect
|
||||
github.com/libp2p/go-yamux/v5 v5.0.1 // indirect
|
||||
github.com/libp2p/go-yamux/v5 v5.1.0 // indirect
|
||||
github.com/magiconair/properties v1.8.10 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
github.com/moby/go-archive v0.2.0 // indirect
|
||||
@@ -280,7 +283,7 @@ require (
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect
|
||||
go.uber.org/mock v0.5.2 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.4
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
@@ -315,7 +318,7 @@ require (
|
||||
github.com/creachadair/otp v0.5.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 // indirect
|
||||
github.com/dlclark/regexp2 v1.11.5 // indirect
|
||||
github.com/docker/cli v29.4.0+incompatible // indirect
|
||||
github.com/docker/docker v28.5.2+incompatible
|
||||
@@ -335,7 +338,7 @@ require (
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e // indirect
|
||||
github.com/google/btree v1.1.3 // indirect
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/google/gopacket v1.1.19 // indirect
|
||||
@@ -347,10 +350,10 @@ require (
|
||||
github.com/henvic/httpretty v0.1.4 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/huin/goupnp v1.3.0 // indirect
|
||||
github.com/ipfs/boxo v0.30.0 // indirect
|
||||
github.com/ipfs/boxo v0.37.0 // indirect
|
||||
github.com/ipfs/go-cid v0.6.1 // indirect
|
||||
github.com/ipfs/go-datastore v0.8.2 // indirect
|
||||
github.com/ipfs/go-log/v2 v2.6.0 // indirect
|
||||
github.com/ipfs/go-datastore v0.9.1 // indirect
|
||||
github.com/ipfs/go-log/v2 v2.9.1 // indirect
|
||||
github.com/ipld/go-ipld-prime v0.23.0 // indirect
|
||||
github.com/jackpal/go-nat-pmp v1.0.2 // indirect
|
||||
github.com/jaypipes/pcidb v1.1.1 // indirect
|
||||
@@ -361,11 +364,11 @@ require (
|
||||
github.com/koron/go-ssdp v0.0.6 // indirect
|
||||
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
|
||||
github.com/libp2p/go-cidranger v1.1.0 // indirect
|
||||
github.com/libp2p/go-flow-metrics v0.2.0 // indirect
|
||||
github.com/libp2p/go-flow-metrics v0.3.0 // indirect
|
||||
github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.33.1 // indirect
|
||||
github.com/libp2p/go-libp2p-kbucket v0.7.0 // indirect
|
||||
github.com/libp2p/go-libp2p-pubsub v0.14.2 // indirect
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.39.0 // indirect
|
||||
github.com/libp2p/go-libp2p-kbucket v0.8.0 // indirect
|
||||
github.com/libp2p/go-libp2p-pubsub v0.15.0 // indirect
|
||||
github.com/libp2p/go-libp2p-record v0.3.1 // indirect
|
||||
github.com/libp2p/go-libp2p-routing-helpers v0.7.5 // indirect
|
||||
github.com/libp2p/go-msgio v0.3.0 // indirect
|
||||
@@ -379,7 +382,7 @@ require (
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.17 // indirect
|
||||
github.com/miekg/dns v1.1.66 // indirect
|
||||
github.com/miekg/dns v1.1.72 // indirect
|
||||
github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect
|
||||
github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect
|
||||
github.com/minio/sha256-simd v1.0.1 // indirect
|
||||
@@ -397,7 +400,7 @@ require (
|
||||
github.com/multiformats/go-base32 v0.1.0 // indirect
|
||||
github.com/multiformats/go-base36 v0.2.0 // indirect
|
||||
github.com/multiformats/go-multiaddr v0.16.1
|
||||
github.com/multiformats/go-multiaddr-dns v0.4.1 // indirect
|
||||
github.com/multiformats/go-multiaddr-dns v0.5.0 // indirect
|
||||
github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
|
||||
github.com/multiformats/go-multibase v0.3.0 // indirect
|
||||
github.com/multiformats/go-multicodec v0.10.0 // indirect
|
||||
@@ -435,7 +438,7 @@ require (
|
||||
github.com/ulikunitz/xz v0.5.14 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/vbatts/tar-split v0.12.2 // indirect
|
||||
github.com/vishvananda/netlink v1.3.0 // indirect
|
||||
github.com/vishvananda/netlink v1.3.1 // indirect
|
||||
github.com/vishvananda/netns v0.0.5 // indirect
|
||||
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
||||
@@ -448,7 +451,7 @@ require (
|
||||
go.uber.org/dig v1.19.0 // indirect
|
||||
go.uber.org/fx v1.24.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.27.0 // indirect
|
||||
go.uber.org/zap v1.27.1 // indirect
|
||||
golang.org/x/crypto v0.50.0
|
||||
golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f // indirect
|
||||
golang.org/x/mod v0.35.0 // indirect
|
||||
@@ -461,7 +464,7 @@ require (
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
|
||||
golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
|
||||
gonum.org/v1/gonum v0.17.0 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect
|
||||
gopkg.in/fsnotify.v1 v1.4.7 // indirect
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
|
||||
howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 // indirect
|
||||
|
||||
97
go.sum
97
go.sum
@@ -100,8 +100,8 @@ github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fus
|
||||
github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||
github.com/antchfx/xpath v1.3.6 h1:s0y+ElRRtTQdfHP609qFu0+c6bglDv20pqOViQjjdPI=
|
||||
github.com/antchfx/xpath v1.3.6/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||
github.com/anthropics/anthropic-sdk-go v1.27.0 h1:0CWbmBq5ofGAjF2H6lefCNRbnaUMGiTKO+lb7RLhDbI=
|
||||
github.com/anthropics/anthropic-sdk-go v1.27.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q=
|
||||
github.com/anthropics/anthropic-sdk-go v1.42.0 h1:Zv882/dnrE4OHnwhMAsi9lwVVXRF8GtR3ofiBResYUw=
|
||||
github.com/anthropics/anthropic-sdk-go v1.42.0/go.mod h1:r4eaLX9tBolUrXLOrLj7eU8tmeBtoobCkM0kBsivBaY=
|
||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
|
||||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
|
||||
@@ -279,8 +279,8 @@ github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR
|
||||
github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U=
|
||||
github.com/decred/dcrd/crypto/blake256 v1.1.0 h1:zPMNGQCm0g4QTY27fOCorQW7EryeQ/U0x++OzVrdms8=
|
||||
github.com/decred/dcrd/crypto/blake256 v1.1.0/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 h1:5RVFMOWjMyRy8cARdy79nAmgYw3hK/4HUq48LQ6Wwqo=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||
github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8 h1:OtSeLS5y0Uy01jaKK4mA/WVIYtpzVm63vLVAPzJXigg=
|
||||
github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8/go.mod h1:apkPC/CR3s48O2D7Y++n1XWEpgPNNCjXYga3PPbJe2E=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
@@ -334,6 +334,8 @@ github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
|
||||
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/filecoin-project/go-clock v0.1.0 h1:SFbYIM75M8NnFm1yMHhN9Ahy3W5bEZV9gd6MPfXbKVU=
|
||||
github.com/filecoin-project/go-clock v0.1.0/go.mod h1:4uB/O4PvOjlx1VCMdZ9MyDZXRm//gkj1ELEbxfI1AZs=
|
||||
github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
|
||||
github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
|
||||
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
|
||||
@@ -343,8 +345,8 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
|
||||
github.com/fredbi/uri v1.1.1 h1:xZHJC08GZNIUhbP5ImTHnt5Ya0T8FI2VAwI/37kh2Ko=
|
||||
github.com/fredbi/uri v1.1.1/go.mod h1:4+DZQ5zBjEwQCDmXW5JdIjz0PUA+yJbvtBv+u+adr5o=
|
||||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/fsnotify/fsnotify v1.10.1 h1:b0/UzAf9yR5rhf3RPm9gf3ehBPpf0oZKIjtpKrx59Ho=
|
||||
github.com/fsnotify/fsnotify v1.10.1/go.mod h1:TLheqan6HD6GBK6PrDWyDPBaEV8LspOxvPSjC+bVfgo=
|
||||
github.com/fyne-io/gl-js v0.2.0 h1:+EXMLVEa18EfkXBVKhifYB6OGs3HwKO3lUElA0LlAjs=
|
||||
github.com/fyne-io/gl-js v0.2.0/go.mod h1:ZcepK8vmOYLu96JoxbCKJy2ybr+g1pTnaBDdl7c3ajI=
|
||||
github.com/fyne-io/glfw-js v0.3.0 h1:d8k2+Y7l+zy2pc7wlGRyPfTgZoqDf3AI4G+2zOWhWUk=
|
||||
@@ -468,8 +470,8 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e h1:4bw4WeyTYPp0smaXiJZCNnLrvVBqirQVreixayXezGc=
|
||||
github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/gomarkdown/markdown v0.0.0-20250311123330-531bef5e742b h1:EY/KpStFl60qA17CptGXhwfZ+k1sFNJIUNR8DdbcuUk=
|
||||
github.com/gomarkdown/markdown v0.0.0-20250311123330-531bef5e742b/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
@@ -576,25 +578,25 @@ github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFck
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/ipfs/boxo v0.30.0 h1:7afsoxPGGqfoH7Dum/wOTGUB9M5fb8HyKPMlLfBvIEQ=
|
||||
github.com/ipfs/boxo v0.30.0/go.mod h1:BPqgGGyHB9rZZcPSzah2Dc9C+5Or3U1aQe7EH1H7370=
|
||||
github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs=
|
||||
github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM=
|
||||
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
|
||||
github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
|
||||
github.com/ipfs/boxo v0.37.0 h1:2E3mZvydMI2t5IkAgtkmZ3sGsld0oS7o3I+xyzDk6uI=
|
||||
github.com/ipfs/boxo v0.37.0/go.mod h1:8yyiRn54F2CsW13n0zwXEPrVsZix/gFj9SYIRYMZ6KE=
|
||||
github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk=
|
||||
github.com/ipfs/go-block-format v0.2.3/go.mod h1:WJaQmPAKhD3LspLixqlqNFxiZ3BZ3xgqxxoSR/76pnA=
|
||||
github.com/ipfs/go-cid v0.6.1 h1:T5TnNb08+ueovG76Z5gx1L4Y7QOaGTXHg1F6raWFxIc=
|
||||
github.com/ipfs/go-cid v0.6.1/go.mod h1:zrY0SwOhjrrIdfPQ/kf+k1sXyJ0QE7cMxfCployLBs0=
|
||||
github.com/ipfs/go-datastore v0.8.2 h1:Jy3wjqQR6sg/LhyY0NIePZC3Vux19nLtg7dx0TVqr6U=
|
||||
github.com/ipfs/go-datastore v0.8.2/go.mod h1:W+pI1NsUsz3tcsAACMtfC+IZdnQTnC/7VfPoJBQuts0=
|
||||
github.com/ipfs/go-datastore v0.9.1 h1:67Po2epre/o0UxrmkzdS9ZTe2GFGODgTd2odx8Wh6Yo=
|
||||
github.com/ipfs/go-datastore v0.9.1/go.mod h1:zi07Nvrpq1bQwSkEnx3bfjz+SQZbdbWyCNvyxMh9pN0=
|
||||
github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk=
|
||||
github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps=
|
||||
github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0=
|
||||
github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs=
|
||||
github.com/ipfs/go-log v1.0.5 h1:2dOuUCB1Z7uoczMWgAyDck5JLb72zHzrMnGnCNNbvY8=
|
||||
github.com/ipfs/go-log v1.0.5/go.mod h1:j0b8ZoR+7+R99LD9jZ6+AJsrzkPbSXbZfGakb5JPtIo=
|
||||
github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g=
|
||||
github.com/ipfs/go-log/v2 v2.6.0 h1:2Nu1KKQQ2ayonKp4MPo6pXCjqw1ULc9iohRqWV5EYqg=
|
||||
github.com/ipfs/go-log/v2 v2.6.0/go.mod h1:p+Efr3qaY5YXpx9TX7MoLCSEZX5boSWj9wh86P5HJa8=
|
||||
github.com/ipfs/go-test v0.2.1 h1:/D/a8xZ2JzkYqcVcV/7HYlCnc7bv/pKHQiX5TdClkPE=
|
||||
github.com/ipfs/go-test v0.2.1/go.mod h1:dzu+KB9cmWjuJnXFDYJwC25T3j1GcN57byN+ixmK39M=
|
||||
github.com/ipfs/go-log/v2 v2.9.1 h1:3JXwHWU31dsCpvQ+7asz6/QsFJHqFr4gLgQ0FWteujk=
|
||||
github.com/ipfs/go-log/v2 v2.9.1/go.mod h1:evFx7sBiohUN3AG12mXlZBw5hacBQld3ZPHrowlJYoo=
|
||||
github.com/ipfs/go-test v0.2.3 h1:Z/jXNAReQFtCYyn7bsv/ZqUwS6E7iIcSpJ2CuzCvnrc=
|
||||
github.com/ipfs/go-test v0.2.3/go.mod h1:QW8vSKkwYvWFwIZQLGQXdkt9Ud76eQXRQ9Ao2H+cA1o=
|
||||
github.com/ipld/go-ipld-prime v0.23.0 h1:csqdPZH60BsTC+AZrv7fpa27v+09I/oTqyHYYYE27eE=
|
||||
github.com/ipld/go-ipld-prime v0.23.0/go.mod h1:46YCFSFNFBJHPjB0pfMuv7Ly7df2eChpkpyPo5SE0bA=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
@@ -682,18 +684,18 @@ github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6
|
||||
github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg=
|
||||
github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c=
|
||||
github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic=
|
||||
github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
|
||||
github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
|
||||
github.com/libp2p/go-flow-metrics v0.3.0 h1:q31zcHUvHnwDO0SHaukewPYgwOBSxtt830uJtUx6784=
|
||||
github.com/libp2p/go-flow-metrics v0.3.0/go.mod h1:nuhlreIwEguM1IvHAew3ij7A8BMlyHQJ279ao24eZZo=
|
||||
github.com/libp2p/go-libp2p v0.48.0 h1:h2BrLAgrj7X8bEN05K7qmrjpNHYA+6tnsGRdprjTnvo=
|
||||
github.com/libp2p/go-libp2p v0.48.0/go.mod h1:Q1fBZNdmC2Hf82husCTfkKJVfHm2we5zk+NWmOGEmWk=
|
||||
github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
|
||||
github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.33.1 h1:hKFhHMf7WH69LDjaxsJUWOU6qZm71uO47M/a5ijkiP0=
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.33.1/go.mod h1:CdmNk4VeGJa9EXM9SLNyNVySEvduKvb+5rSC/H4pLAo=
|
||||
github.com/libp2p/go-libp2p-kbucket v0.7.0 h1:vYDvRjkyJPeWunQXqcW2Z6E93Ywx7fX0jgzb/dGOKCs=
|
||||
github.com/libp2p/go-libp2p-kbucket v0.7.0/go.mod h1:blOINGIj1yiPYlVEX0Rj9QwEkmVnz3EP8LK1dRKBC6g=
|
||||
github.com/libp2p/go-libp2p-pubsub v0.14.2 h1:nT5lFHPQOFJcp9CW8hpKtvbpQNdl2udJuzLQWbgRum8=
|
||||
github.com/libp2p/go-libp2p-pubsub v0.14.2/go.mod h1:MKPU5vMI8RRFyTP0HfdsF9cLmL1nHAeJm44AxJGJx44=
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.39.0 h1:mww38eBYiUvdsu+Xl/GLlBC0Aa8M+5HAwvafkFOygAM=
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.39.0/go.mod h1:Po2JugFEkDq9Vig/JXtc153ntOi0q58o4j7IuITCOVs=
|
||||
github.com/libp2p/go-libp2p-kbucket v0.8.0 h1:QAK7RzKJpYe+EuSEATAaaHYMYLkPDGC18m9jxPLnU8s=
|
||||
github.com/libp2p/go-libp2p-kbucket v0.8.0/go.mod h1:JMlxqcEyKwO6ox716eyC0hmiduSWZZl6JY93mGaaqc4=
|
||||
github.com/libp2p/go-libp2p-pubsub v0.15.0 h1:cG7Cng2BT82WttmPFMi50gDNV+58K626m/wR00vGL1o=
|
||||
github.com/libp2p/go-libp2p-pubsub v0.15.0/go.mod h1:lr4oE8bFgQaifRcoc2uWhWWiK6tPdOEKpUuR408GFN4=
|
||||
github.com/libp2p/go-libp2p-record v0.3.1 h1:cly48Xi5GjNw5Wq+7gmjfBiG9HCzQVkiZOUZ8kUl+Fg=
|
||||
github.com/libp2p/go-libp2p-record v0.3.1/go.mod h1:T8itUkLcWQLCYMqtX7Th6r7SexyUJpIyPgks757td/E=
|
||||
github.com/libp2p/go-libp2p-routing-helpers v0.7.5 h1:HdwZj9NKovMx0vqq6YNPTh6aaNzey5zHD7HeLJtq6fI=
|
||||
@@ -706,8 +708,8 @@ github.com/libp2p/go-netroute v0.4.0 h1:sZZx9hyANYUx9PZyqcgE/E1GUG3iEtTZHUEvdtXT
|
||||
github.com/libp2p/go-netroute v0.4.0/go.mod h1:Nkd5ShYgSMS5MUKy/MU2T57xFoOKvvLR92Lic48LEyA=
|
||||
github.com/libp2p/go-reuseport v0.4.0 h1:nR5KU7hD0WxXCJbmw7r2rhRYruNRl2koHw8fQscQm2s=
|
||||
github.com/libp2p/go-reuseport v0.4.0/go.mod h1:ZtI03j/wO5hZVDFo2jKywN6bYKWLOy8Se6DrI2E1cLU=
|
||||
github.com/libp2p/go-yamux/v5 v5.0.1 h1:f0WoX/bEF2E8SbE4c/k1Mo+/9z0O4oC/hWEA+nfYRSg=
|
||||
github.com/libp2p/go-yamux/v5 v5.0.1/go.mod h1:en+3cdX51U0ZslwRdRLrvQsdayFt3TSUKvBGErzpWbU=
|
||||
github.com/libp2p/go-yamux/v5 v5.1.0 h1:8Qlxj4E9JGJAQVW6+uj2o7mqkqsIVlSUGmTWhlXzoHE=
|
||||
github.com/libp2p/go-yamux/v5 v5.1.0/go.mod h1:tgIQ07ObtRR/I0IWsFOyQIL9/dR5UXgc2s8xKmNZv1o=
|
||||
github.com/libp2p/zeroconf/v2 v2.2.0 h1:Cup06Jv6u81HLhIj1KasuNM/RHHrJ8T7wOTS4+Tv53Q=
|
||||
github.com/libp2p/zeroconf/v2 v2.2.0/go.mod h1:fuJqLnUwZTshS3U/bMRJ3+ow/v9oid1n0DmyYyNO1Xs=
|
||||
github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
|
||||
@@ -752,8 +754,8 @@ github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwX
|
||||
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
|
||||
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
|
||||
github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
|
||||
github.com/miekg/dns v1.1.66 h1:FeZXOS3VCVsKnEAd+wBkjMC3D2K+ww66Cq3VnCINuJE=
|
||||
github.com/miekg/dns v1.1.66/go.mod h1:jGFzBsSNbJw6z1HYut1RKBKHA9PBdxeHrZG8J+gC2WE=
|
||||
github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
|
||||
github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
|
||||
github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8=
|
||||
github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c/go.mod h1:0SQS9kMwD2VsyFEB++InYyBJroV/FRmBgcydeSUcJms=
|
||||
github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b h1:z78hV3sbSMAUoyUMM0I83AUIT6Hu17AWfgjzIbtrYFc=
|
||||
@@ -819,8 +821,8 @@ github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87 h1:az+2umaD/sT1rRv
|
||||
github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87/go.mod h1:x77p9W1zKZr+W+UcEwg8/qdp00p4XXOI69wE7WlXZc0=
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b h1:A74T2Lauvg61KodYqsjTYDY05kPLcW+efVZjd23dghU=
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
|
||||
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
|
||||
github.com/mudler/edgevpn v0.32.2 h1:umTPyyZgkom/A81Bk4HbP0p1ZSEU5EFPW3Bg+YPxI8A=
|
||||
github.com/mudler/edgevpn v0.32.2/go.mod h1:UaMc8MORbcRsAjuO5gVJj9Bn3Nq2AP5U9NTb6epVyv8=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
|
||||
github.com/mudler/go-processmanager v0.1.1 h1:c/1NRZOZpW8HuFv9RhBG57nQu1oDMRomEHedwBFMlrw=
|
||||
@@ -846,8 +848,8 @@ github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a
|
||||
github.com/multiformats/go-multiaddr v0.1.1/go.mod h1:aMKBKNEYmzmDmxfX88/vz+J5IU55txyt0p4aiWVohjo=
|
||||
github.com/multiformats/go-multiaddr v0.16.1 h1:fgJ0Pitow+wWXzN9do+1b8Pyjmo8m5WhGfzpL82MpCw=
|
||||
github.com/multiformats/go-multiaddr v0.16.1/go.mod h1:JSVUmXDjsVFiW7RjIFMP7+Ev+h1DTbiJgVeTV/tcmP0=
|
||||
github.com/multiformats/go-multiaddr-dns v0.4.1 h1:whi/uCLbDS3mSEUMb1MsoT4uzUeZB0N32yzufqS0i5M=
|
||||
github.com/multiformats/go-multiaddr-dns v0.4.1/go.mod h1:7hfthtB4E4pQwirrz+J0CcDUfbWzTqEzVyYKKIKpgkc=
|
||||
github.com/multiformats/go-multiaddr-dns v0.5.0 h1:p/FTyHKX0nl59f+S+dEUe8HRK+i5Ow/QHMw8Nh3gPCo=
|
||||
github.com/multiformats/go-multiaddr-dns v0.5.0/go.mod h1:yJ349b8TPIAANUyuOzn1oz9o22tV9f+06L+cCeMxC14=
|
||||
github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E=
|
||||
github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo=
|
||||
github.com/multiformats/go-multibase v0.3.0 h1:8helZD2+4Db7NNWFiktk2NePbF0boolBe6bDQvM4r68=
|
||||
@@ -887,8 +889,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
|
||||
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
|
||||
github.com/onsi/ginkgo/v2 v2.28.2 h1:DTrMfpqxiNUyQ3Y0zhn1n3cOO2euFgQPYIpkWwxVFps=
|
||||
github.com/onsi/ginkgo/v2 v2.28.2/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE=
|
||||
github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28=
|
||||
github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg=
|
||||
github.com/onsi/gomega v1.40.0 h1:Vtol0e1MghCD2ZVIilPDIg44XSL9l2QAn8ZNaljWcJc=
|
||||
github.com/onsi/gomega v1.40.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A=
|
||||
github.com/openai/openai-go/v3 v3.26.0 h1:bRt6H/ozMNt/dDkN4gobnLqaEGrRGBzmbVs0xxJEnQE=
|
||||
github.com/openai/openai-go/v3 v3.26.0/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
@@ -1071,6 +1073,8 @@ github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef h1:Ch6Q+AZUxDBCVqd
|
||||
github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef/go.mod h1:nXTWP6+gD5+LUJ8krVhhoeHjvHTutPxMYl5SvkcnJNE=
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
|
||||
github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829 h1:zGlGD0Zfk2HaIo4EnUVBRhnXQ+cnGQz5X2PdBcplOyw=
|
||||
github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829/go.mod h1:L1MQhA6x4dn9r007T033lsaZMv9EmBAdXyU/+EF40fo=
|
||||
github.com/streamer45/silero-vad-go v0.2.1 h1:Li1/tTC4H/3cyw6q4weX+U8GWwEL3lTekK/nYa1Cvuk=
|
||||
github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdOB+3saBVzjOzdZnUs=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
@@ -1146,9 +1150,8 @@ github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQ
|
||||
github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
|
||||
github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4=
|
||||
github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
|
||||
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
|
||||
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
|
||||
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
||||
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
||||
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
||||
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ=
|
||||
@@ -1199,8 +1202,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
||||
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0=
|
||||
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
|
||||
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.65.0 h1:jOveH/b4lU9HT7y+Gfamf18BqlOuz2PWEvs8yM7Q6XE=
|
||||
@@ -1232,8 +1235,8 @@ go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN8
|
||||
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
|
||||
go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
|
||||
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
|
||||
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
|
||||
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
|
||||
go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
|
||||
go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
|
||||
go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ=
|
||||
go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
@@ -1641,8 +1644,8 @@ google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6D
|
||||
google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
|
||||
google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=
|
||||
google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 h1:sNrWoksmOyF5bvJUcnmbeAmQi8baNhqg5IWaI3llQqU=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
|
||||
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
|
||||
|
||||
106
pkg/functions/parse_lfm2_test.go
Normal file
106
pkg/functions/parse_lfm2_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package functions_test
|
||||
|
||||
import (
|
||||
. "github.com/mudler/LocalAI/pkg/functions"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// LFM2 / LFM2.5 emit tool calls in a Pythonic syntax wrapped in special tokens:
|
||||
//
|
||||
// <|tool_call_start|>[func_name(arg1="value1", arg2="value2")]<|tool_call_end|>
|
||||
//
|
||||
// See backend/cpp/llama-cpp/llama.cpp/common/chat.cpp:1277 (common_chat_params_init_lfm2)
|
||||
// and https://docs.liquid.ai/lfm/key-concepts/tool-use. The format is auto-detected
|
||||
// by upstream llama.cpp when the chat template contains <|tool_list_start|>/<|tool_list_end|>.
|
||||
//
|
||||
// The tests below pin the LocalAI-side parser config (response_regex + argument_regex)
|
||||
// that the lfm gallery template ships, so configurations relying on the gRPC backend
|
||||
// returning raw text (rather than pre-parsed tool_calls via use_jinja) still work.
|
||||
var _ = Describe("LFM2 Pythonic tool-call parsing", func() {
|
||||
// Matches the markers exactly; non-greedy `arguments` so the closing `)]` of one
|
||||
// call doesn't swallow trailing content that happens to share characters.
|
||||
const lfm2ResponseRegex = `<\|tool_call_start\|>\[(?P<name>\w+)\((?P<arguments>.*?)\)\]<\|tool_call_end\|>`
|
||||
|
||||
// Two argument extractors: quoted strings and bare scalars (numbers / true / false / null).
|
||||
// ParseFunctionCallArgs runs every regex in order, so later matches with the same key
|
||||
// would overwrite earlier ones — which is fine here because the patterns are disjoint.
|
||||
var lfm2ArgRegex = []string{
|
||||
`(?P<key>\w+)\s*=\s*"(?P<value>[^"]*)"`,
|
||||
`(?P<key>\w+)\s*=\s*(?P<value>-?\d+(?:\.\d+)?|true|false|null)`,
|
||||
}
|
||||
|
||||
cfg := func() FunctionsConfig {
|
||||
return FunctionsConfig{
|
||||
ResponseRegex: []string{lfm2ResponseRegex},
|
||||
ArgumentRegex: lfm2ArgRegex,
|
||||
ArgumentRegexKey: "key",
|
||||
ArgumentRegexValue: "value",
|
||||
}
|
||||
}
|
||||
|
||||
It("parses a single string-arg call", func() {
|
||||
input := `<|tool_call_start|>[get_weather(city="Berlin")]<|tool_call_end|>`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("get_weather"))
|
||||
Expect(results[0].Arguments).To(Equal(`{"city":"Berlin"}`))
|
||||
})
|
||||
|
||||
It("parses multiple string args", func() {
|
||||
input := `<|tool_call_start|>[search(query="hello world", source="web")]<|tool_call_end|>`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("search"))
|
||||
// argument map ordering is not stable; check content as JSON
|
||||
Expect(results[0].Arguments).To(SatisfyAny(
|
||||
Equal(`{"query":"hello world","source":"web"}`),
|
||||
Equal(`{"source":"web","query":"hello world"}`),
|
||||
))
|
||||
})
|
||||
|
||||
It("parses numeric and boolean args", func() {
|
||||
input := `<|tool_call_start|>[set_volume(level=42, mute=false)]<|tool_call_end|>`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("set_volume"))
|
||||
// ArgumentRegex always emits string values; the JSON we produce represents
|
||||
// them as strings. A typed parser is a future enhancement (PEG parser).
|
||||
Expect(results[0].Arguments).To(SatisfyAny(
|
||||
Equal(`{"level":"42","mute":"false"}`),
|
||||
Equal(`{"mute":"false","level":"42"}`),
|
||||
))
|
||||
})
|
||||
|
||||
It("parses a no-args call", func() {
|
||||
input := `<|tool_call_start|>[get_time()]<|tool_call_end|>`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("get_time"))
|
||||
Expect(results[0].Arguments).To(Equal(`{}`))
|
||||
})
|
||||
|
||||
It("ignores surrounding text", func() {
|
||||
input := `Sure, let me check.
|
||||
<|tool_call_start|>[get_weather(city="Paris")]<|tool_call_end|>
|
||||
Standby.`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("get_weather"))
|
||||
Expect(results[0].Arguments).To(Equal(`{"city":"Paris"}`))
|
||||
})
|
||||
|
||||
It("returns no results when the markers are absent", func() {
|
||||
input := `Plain text response with no tool call.`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("preserves quoted argument values that contain spaces and equals signs", func() {
|
||||
input := `<|tool_call_start|>[search(query="x = y + 1")]<|tool_call_end|>`
|
||||
results := ParseFunctionCall(input, cfg())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0].Name).To(Equal("search"))
|
||||
Expect(results[0].Arguments).To(Equal(`{"query":"x = y + 1"}`))
|
||||
})
|
||||
})
|
||||
@@ -82,6 +82,7 @@ type Backend interface {
|
||||
|
||||
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
|
||||
AudioTransformStream(ctx context.Context, opts ...grpc.CallOption) (AudioTransformStreamClient, error)
|
||||
AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error)
|
||||
|
||||
ModelMetadata(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.ModelMetadataResponse, error)
|
||||
|
||||
|
||||
@@ -158,6 +158,11 @@ func (llm *Base) AudioTransformStream(in <-chan *pb.AudioTransformFrameRequest,
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) AudioToAudioStream(in <-chan *pb.AudioToAudioRequest, out chan<- *pb.AudioToAudioResponse) error {
|
||||
close(out)
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) StartFineTune(*pb.FineTuneRequest) (*pb.FineTuneJobResult, error) {
|
||||
return nil, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
@@ -805,6 +805,67 @@ func (c *Client) AudioTransformStream(ctx context.Context, opts ...grpc.CallOpti
|
||||
}, nil
|
||||
}
|
||||
|
||||
// AudioToAudioStreamClient is the duplex interface returned by
|
||||
// (*Client).AudioToAudioStream. Mirrors AudioTransformStreamClient's
|
||||
// shape so realtime-API callers can plug in interchangeable backends.
|
||||
type AudioToAudioStreamClient interface {
|
||||
Send(*pb.AudioToAudioRequest) error
|
||||
Recv() (*pb.AudioToAudioResponse, error)
|
||||
CloseSend() error
|
||||
Context() context.Context
|
||||
}
|
||||
|
||||
type audioToAudioStreamClient struct {
|
||||
pb.Backend_AudioToAudioStreamClient
|
||||
conn *grpc.ClientConn
|
||||
closer func()
|
||||
}
|
||||
|
||||
func (s *audioToAudioStreamClient) CloseSend() error {
|
||||
err := s.Backend_AudioToAudioStreamClient.CloseSend()
|
||||
if s.closer != nil {
|
||||
s.closer()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Client) AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
c.wdMark()
|
||||
|
||||
cleanup := func() {
|
||||
c.wdUnMark()
|
||||
c.setBusy(false)
|
||||
if !c.parallel {
|
||||
c.opMutex.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
conn, err := c.dial()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
client := pb.NewBackendClient(conn)
|
||||
stream, err := client.AudioToAudioStream(ctx, opts...)
|
||||
if err != nil {
|
||||
_ = conn.Close()
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
return &audioToAudioStreamClient{
|
||||
Backend_AudioToAudioStreamClient: stream,
|
||||
conn: conn,
|
||||
closer: func() {
|
||||
_ = conn.Close()
|
||||
cleanup()
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Client) StartFineTune(ctx context.Context, in *pb.FineTuneRequest, opts ...grpc.CallOption) (*pb.FineTuneJobResult, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
|
||||
@@ -181,6 +181,31 @@ func (e *embedBackend) AudioTransformStream(ctx context.Context, opts ...grpc.Ca
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *embedBackend) AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error) {
|
||||
reqs := make(chan *pb.AudioToAudioRequest, 8)
|
||||
resps := make(chan *pb.AudioToAudioResponse, 8)
|
||||
srvDone := make(chan error, 1)
|
||||
|
||||
server := &embedBackendAudioToAudioStream{
|
||||
ctx: ctx,
|
||||
reqs: reqs,
|
||||
resps: resps,
|
||||
}
|
||||
|
||||
go func() {
|
||||
err := e.s.AudioToAudioStream(server)
|
||||
close(resps)
|
||||
srvDone <- err
|
||||
}()
|
||||
|
||||
return &embedBackendAudioToAudioStreamClient{
|
||||
ctx: ctx,
|
||||
reqs: reqs,
|
||||
resps: resps,
|
||||
srvDone: srvDone,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *embedBackend) ModelMetadata(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.ModelMetadataResponse, error) {
|
||||
return e.s.ModelMetadata(ctx, in)
|
||||
}
|
||||
@@ -236,6 +261,8 @@ func (e *embedBackend) Free(ctx context.Context) error {
|
||||
|
||||
var _ pb.Backend_AudioTransformStreamServer = new(embedBackendAudioTransformStream)
|
||||
var _ AudioTransformStreamClient = new(embedBackendAudioTransformStreamClient)
|
||||
var _ pb.Backend_AudioToAudioStreamServer = new(embedBackendAudioToAudioStream)
|
||||
var _ AudioToAudioStreamClient = new(embedBackendAudioToAudioStreamClient)
|
||||
|
||||
// embedBackendAudioTransformStream is the server side of an in-process bidi
|
||||
// stream. The hosted server reads requests from `reqs` (closed by client when
|
||||
@@ -332,6 +359,99 @@ func (e *embedBackendAudioTransformStreamClient) CloseSend() error {
|
||||
|
||||
func (e *embedBackendAudioTransformStreamClient) Context() context.Context { return e.ctx }
|
||||
|
||||
// embedBackendAudioToAudioStream is the in-process server-side handle for
|
||||
// the bidirectional any-to-any audio RPC. Mirrors embedBackendAudioTransform
|
||||
// Stream — the hosted server reads requests from `reqs` (closed by client
|
||||
// when done sending) and writes responses to `resps`.
|
||||
type embedBackendAudioToAudioStream struct {
|
||||
ctx context.Context
|
||||
reqs <-chan *pb.AudioToAudioRequest
|
||||
resps chan<- *pb.AudioToAudioResponse
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStream) Send(resp *pb.AudioToAudioResponse) error {
|
||||
select {
|
||||
case e.resps <- resp:
|
||||
return nil
|
||||
case <-e.ctx.Done():
|
||||
return e.ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStream) Recv() (*pb.AudioToAudioRequest, error) {
|
||||
select {
|
||||
case req, ok := <-e.reqs:
|
||||
if !ok {
|
||||
return nil, io.EOF
|
||||
}
|
||||
return req, nil
|
||||
case <-e.ctx.Done():
|
||||
return nil, e.ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStream) SetHeader(md metadata.MD) error { return nil }
|
||||
func (e *embedBackendAudioToAudioStream) SendHeader(md metadata.MD) error { return nil }
|
||||
func (e *embedBackendAudioToAudioStream) SetTrailer(md metadata.MD) {}
|
||||
func (e *embedBackendAudioToAudioStream) Context() context.Context { return e.ctx }
|
||||
func (e *embedBackendAudioToAudioStream) SendMsg(m any) error {
|
||||
if x, ok := m.(*pb.AudioToAudioResponse); ok {
|
||||
return e.Send(x)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (e *embedBackendAudioToAudioStream) RecvMsg(m any) error { return nil }
|
||||
|
||||
type embedBackendAudioToAudioStreamClient struct {
|
||||
ctx context.Context
|
||||
reqs chan<- *pb.AudioToAudioRequest
|
||||
resps <-chan *pb.AudioToAudioResponse
|
||||
srvDone <-chan error
|
||||
closeOnce bool
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStreamClient) Send(req *pb.AudioToAudioRequest) error {
|
||||
select {
|
||||
case e.reqs <- req:
|
||||
return nil
|
||||
case <-e.ctx.Done():
|
||||
return e.ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStreamClient) Recv() (*pb.AudioToAudioResponse, error) {
|
||||
select {
|
||||
case resp, ok := <-e.resps:
|
||||
if !ok {
|
||||
// Server goroutine writes to srvDone immediately after closing
|
||||
// resps; block (cap with ctx) so we don't race past a real error.
|
||||
select {
|
||||
case err := <-e.srvDone:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case <-e.ctx.Done():
|
||||
return nil, e.ctx.Err()
|
||||
}
|
||||
return nil, io.EOF
|
||||
}
|
||||
return resp, nil
|
||||
case <-e.ctx.Done():
|
||||
return nil, e.ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStreamClient) CloseSend() error {
|
||||
if e.closeOnce {
|
||||
return nil
|
||||
}
|
||||
e.closeOnce = true
|
||||
close(e.reqs)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *embedBackendAudioToAudioStreamClient) Context() context.Context { return e.ctx }
|
||||
|
||||
var _ pb.Backend_AudioTranscriptionStreamServer = new(embedBackendAudioTranscriptionStream)
|
||||
|
||||
type embedBackendAudioTranscriptionStream struct {
|
||||
|
||||
@@ -45,6 +45,7 @@ type AIModel interface {
|
||||
|
||||
AudioTransform(*pb.AudioTransformRequest) (*pb.AudioTransformResult, error)
|
||||
AudioTransformStream(in <-chan *pb.AudioTransformFrameRequest, out chan<- *pb.AudioTransformFrameResponse) error
|
||||
AudioToAudioStream(in <-chan *pb.AudioToAudioRequest, out chan<- *pb.AudioToAudioResponse) error
|
||||
|
||||
ModelMetadata(*pb.ModelOptions) (*pb.ModelMetadataResponse, error)
|
||||
|
||||
|
||||
@@ -487,6 +487,66 @@ func (s *server) AudioTransformStream(stream pb.Backend_AudioTransformStreamServ
|
||||
return recvErr
|
||||
}
|
||||
|
||||
// AudioToAudioStream is the bidirectional any-to-any S2S handler. The
|
||||
// shape mirrors AudioTransformStream exactly (recv → in chan, out chan →
|
||||
// send) so backends can implement either via the same goroutine idiom.
|
||||
func (s *server) AudioToAudioStream(stream pb.Backend_AudioToAudioStreamServer) error {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
defer s.llm.Unlock()
|
||||
}
|
||||
|
||||
in := make(chan *pb.AudioToAudioRequest, 8)
|
||||
out := make(chan *pb.AudioToAudioResponse, 8)
|
||||
|
||||
recvErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
defer close(in)
|
||||
for {
|
||||
req, err := stream.Recv()
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
recvErrCh <- nil
|
||||
return
|
||||
}
|
||||
recvErrCh <- err
|
||||
return
|
||||
}
|
||||
select {
|
||||
case in <- req:
|
||||
case <-stream.Context().Done():
|
||||
recvErrCh <- stream.Context().Err()
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
sendDone := make(chan error, 1)
|
||||
go func() {
|
||||
for resp := range out {
|
||||
if err := stream.Send(resp); err != nil {
|
||||
sendDone <- err
|
||||
for range out {
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
sendDone <- nil
|
||||
}()
|
||||
|
||||
backendErr := s.llm.AudioToAudioStream(in, out)
|
||||
sendErr := <-sendDone
|
||||
recvErr := <-recvErrCh
|
||||
|
||||
if backendErr != nil {
|
||||
return backendErr
|
||||
}
|
||||
if sendErr != nil {
|
||||
return sendErr
|
||||
}
|
||||
return recvErr
|
||||
}
|
||||
|
||||
func (s *server) StartFineTune(ctx context.Context, in *pb.FineTuneRequest) (*pb.FineTuneJobResult, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package xsysinfo
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
@@ -801,14 +803,15 @@ func GetResourceAggregateInfo() AggregateMemoryInfo {
|
||||
return resourceInfo.Aggregate
|
||||
}
|
||||
|
||||
// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
|
||||
// Note: Vulkan provides memory heap info but not real-time usage
|
||||
// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback.
|
||||
// Note: vulkaninfo JSON is a Vulkan Profiles export and does not include
|
||||
// VkPhysicalDeviceMemoryProperties, so memory heaps are parsed from text output.
|
||||
func getVulkanGPUMemory() []GPUMemoryInfo {
|
||||
if _, err := exec.LookPath("vulkaninfo"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.Command("vulkaninfo", "--json")
|
||||
cmd := exec.Command("vulkaninfo", "--text")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
@@ -819,60 +822,207 @@ func getVulkanGPUMemory() []GPUMemoryInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse Vulkan JSON output
|
||||
var result struct {
|
||||
VkPhysicalDevices []struct {
|
||||
DeviceName string `json:"deviceName"`
|
||||
DeviceType string `json:"deviceType"`
|
||||
VkPhysicalDeviceMemoryProperties struct {
|
||||
MemoryHeaps []struct {
|
||||
Flags int `json:"flags"`
|
||||
Size uint64 `json:"size"`
|
||||
} `json:"memoryHeaps"`
|
||||
} `json:"VkPhysicalDeviceMemoryProperties"`
|
||||
} `json:"VkPhysicalDevices"`
|
||||
}
|
||||
return parseVulkanGPUMemoryText(strings.NewReader(stdout.String()))
|
||||
|
||||
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
|
||||
xlog.Debug("failed to parse vulkaninfo output", "error", err)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
type vulkanGPUTextInfo struct {
|
||||
index int
|
||||
name string
|
||||
deviceType string
|
||||
totalVRAM uint64
|
||||
budgetVRAM uint64
|
||||
usageVRAM uint64
|
||||
}
|
||||
|
||||
func parseVulkanGPUMemoryText(r io.Reader) []GPUMemoryInfo {
|
||||
var gpus []GPUMemoryInfo
|
||||
var current *vulkanGPUTextInfo
|
||||
|
||||
for i, device := range result.VkPhysicalDevices {
|
||||
// Skip non-discrete/integrated GPUs if possible
|
||||
if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
|
||||
continue
|
||||
inMemoryProperties := false
|
||||
inMemoryHeaps := false
|
||||
inHeap := false
|
||||
heapSize := uint64(0)
|
||||
heapBudget := uint64(0)
|
||||
heapUsage := uint64(0)
|
||||
heapDeviceLocal := false
|
||||
|
||||
flushHeap := func() {
|
||||
if current != nil && inHeap && heapDeviceLocal {
|
||||
current.totalVRAM += heapSize
|
||||
current.usageVRAM += heapUsage
|
||||
current.budgetVRAM += heapBudget
|
||||
}
|
||||
heapSize = 0
|
||||
heapBudget = 0
|
||||
heapUsage = 0
|
||||
heapDeviceLocal = false
|
||||
inHeap = false
|
||||
}
|
||||
|
||||
flushGPU := func() {
|
||||
if current == nil || current.totalVRAM == 0 || current.deviceType == "PHYSICAL_DEVICE_TYPE_CPU" {
|
||||
return
|
||||
}
|
||||
|
||||
// Sum up device-local memory heaps
|
||||
var totalVRAM uint64
|
||||
for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
|
||||
// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
|
||||
if heap.Flags&1 != 0 {
|
||||
totalVRAM += heap.Size
|
||||
}
|
||||
if current.usageVRAM == 0 && current.budgetVRAM != 0 {
|
||||
current.usageVRAM = current.totalVRAM - current.budgetVRAM
|
||||
} else if current.usageVRAM != 0 && current.budgetVRAM == 0 {
|
||||
current.budgetVRAM = current.totalVRAM - current.usageVRAM
|
||||
} else if current.usageVRAM == 0 && current.budgetVRAM == 0 {
|
||||
current.usageVRAM = 0
|
||||
current.budgetVRAM = current.totalVRAM
|
||||
}
|
||||
|
||||
if totalVRAM == 0 {
|
||||
continue
|
||||
}
|
||||
usagePercent := float64(current.usageVRAM) / float64(current.totalVRAM) * float64(100.0)
|
||||
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: i,
|
||||
Name: device.DeviceName,
|
||||
Index: current.index,
|
||||
Name: current.name,
|
||||
Vendor: VendorVulkan,
|
||||
TotalVRAM: totalVRAM,
|
||||
UsedVRAM: 0, // Vulkan doesn't provide real-time usage
|
||||
FreeVRAM: totalVRAM,
|
||||
UsagePercent: 0,
|
||||
TotalVRAM: current.totalVRAM,
|
||||
UsedVRAM: current.usageVRAM,
|
||||
FreeVRAM: current.budgetVRAM,
|
||||
UsagePercent: usagePercent,
|
||||
})
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if index, ok := parseVulkanGPUHeader(line); ok {
|
||||
flushHeap()
|
||||
flushGPU()
|
||||
current = &vulkanGPUTextInfo{index: index}
|
||||
inMemoryProperties = false
|
||||
inMemoryHeaps = false
|
||||
continue
|
||||
}
|
||||
|
||||
if current == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "deviceType") {
|
||||
current.deviceType = parseVulkanValue(line)
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "deviceName") {
|
||||
current.name = parseVulkanValue(line)
|
||||
continue
|
||||
}
|
||||
|
||||
if line == "VkPhysicalDeviceMemoryProperties:" {
|
||||
inMemoryProperties = true
|
||||
inMemoryHeaps = false
|
||||
flushHeap()
|
||||
continue
|
||||
}
|
||||
|
||||
if !inMemoryProperties {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "memoryHeaps:") {
|
||||
inMemoryHeaps = true
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "memoryTypes:") {
|
||||
flushHeap()
|
||||
inMemoryProperties = false
|
||||
inMemoryHeaps = false
|
||||
continue
|
||||
}
|
||||
|
||||
if !inMemoryHeaps {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "memoryHeaps[") {
|
||||
flushHeap()
|
||||
inHeap = true
|
||||
continue
|
||||
}
|
||||
|
||||
if !inHeap {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "size") {
|
||||
if size, ok := parseVulkanUintValue(line); ok {
|
||||
heapSize = size
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "budget") {
|
||||
if budget, ok := parseVulkanUintValue(line); ok {
|
||||
heapBudget = budget
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "usage") {
|
||||
if usage, ok := parseVulkanUintValue(line); ok {
|
||||
heapUsage = usage
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(line, "MEMORY_HEAP_DEVICE_LOCAL_BIT") {
|
||||
heapDeviceLocal = true
|
||||
}
|
||||
}
|
||||
|
||||
flushHeap()
|
||||
flushGPU()
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
func parseVulkanGPUHeader(line string) (int, bool) {
|
||||
if !strings.HasPrefix(line, "GPU") || !strings.HasSuffix(line, ":") {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
index, err := strconv.Atoi(strings.TrimSuffix(strings.TrimPrefix(line, "GPU"), ":"))
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return index, true
|
||||
}
|
||||
|
||||
func parseVulkanValue(line string) string {
|
||||
_, value, ok := strings.Cut(line, "=")
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func parseVulkanUintValue(line string) (uint64, bool) {
|
||||
value := parseVulkanValue(line)
|
||||
fields := strings.Fields(value)
|
||||
if len(fields) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseUint(fields[0], 0, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return parsed, true
|
||||
}
|
||||
|
||||
// getAppleGPUMemory detects Apple Silicon GPUs using system_profiler (macOS only).
|
||||
// Apple Silicon uses unified memory, so GPU memory is reported as system RAM.
|
||||
func getAppleGPUMemory() []GPUMemoryInfo {
|
||||
|
||||
Reference in New Issue
Block a user