mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 01:47:18 -04:00
Compare commits
13 Commits
v4.5.2
...
feat/syncs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0bdfc23b6 | ||
|
|
c894336898 | ||
|
|
b3d1c3b4a7 | ||
|
|
e4e3fde68b | ||
|
|
64150ca7ab | ||
|
|
f98b0f1c1e | ||
|
|
2c96c2d08e | ||
|
|
f01a969f7b | ||
|
|
56600eec3e | ||
|
|
c4fa256cdf | ||
|
|
17c1fc74b2 | ||
|
|
068d397acf | ||
|
|
5b3572f8b8 |
21
.github/workflows/release.yaml
vendored
21
.github/workflows/release.yaml
vendored
@@ -24,6 +24,11 @@ jobs:
|
||||
args: release --clean
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
MACOS_SIGN_P12: ${{ secrets.MACOS_CERTIFICATE }}
|
||||
MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||
launcher-build-darwin:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
@@ -35,9 +40,19 @@ jobs:
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.23
|
||||
- name: Build launcher for macOS ARM64
|
||||
run: |
|
||||
make build-launcher-darwin
|
||||
- name: Import signing certificate
|
||||
env:
|
||||
MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
|
||||
MACOS_CERTIFICATE_PWD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||
MACOS_CI_KEYCHAIN_PWD: ${{ secrets.MACOS_CI_KEYCHAIN_PWD }}
|
||||
run: bash contrib/macos/sign-and-notarize.sh import-cert
|
||||
- name: Build, sign and notarize the DMG
|
||||
env:
|
||||
MACOS_SIGN_IDENTITY: ${{ secrets.MACOS_SIGN_IDENTITY }}
|
||||
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||
run: make release-launcher-darwin
|
||||
- name: Upload DMG to Release
|
||||
uses: softprops/action-gh-release@v3
|
||||
with:
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -94,3 +94,6 @@ core/http/react-ui/test-results/
|
||||
|
||||
# SDD / brainstorm scratch (agent-driven development)
|
||||
.superpowers/
|
||||
|
||||
# Local Apple signing material (never commit)
|
||||
.certs/
|
||||
|
||||
@@ -9,7 +9,8 @@ source:
|
||||
enabled: true
|
||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
||||
builds:
|
||||
- main: ./cmd/local-ai
|
||||
- id: local-ai
|
||||
main: ./cmd/local-ai
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ldflags:
|
||||
@@ -35,3 +36,19 @@ snapshot:
|
||||
version_template: "{{ .Tag }}-next"
|
||||
changelog:
|
||||
use: github-native
|
||||
# Sign + notarize the macOS server binary via the quill backend (runs on Linux,
|
||||
# no macOS runner needed). Disabled automatically when MACOS_SIGN_P12 is unset
|
||||
# (forks / PRs), so those builds stay unsigned and green.
|
||||
notarize:
|
||||
macos:
|
||||
- enabled: '{{ isEnvSet "MACOS_SIGN_P12" }}'
|
||||
ids:
|
||||
- local-ai
|
||||
sign:
|
||||
certificate: "{{.Env.MACOS_SIGN_P12}}"
|
||||
password: "{{.Env.MACOS_SIGN_PASSWORD}}"
|
||||
notarize:
|
||||
issuer_id: "{{.Env.MACOS_NOTARY_ISSUER_ID}}"
|
||||
key_id: "{{.Env.MACOS_NOTARY_KEY_ID}}"
|
||||
key: "{{.Env.MACOS_NOTARY_KEY}}"
|
||||
wait: true
|
||||
|
||||
35
Makefile
35
Makefile
@@ -1453,13 +1453,32 @@ docs: docs/static/gallery.html
|
||||
########################################################
|
||||
|
||||
## fyne cross-platform build
|
||||
build-launcher-darwin: build-launcher
|
||||
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
|
||||
--appName "LocalAI" \
|
||||
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
|
||||
--bundleIdentifier "com.localai.launcher" \
|
||||
--iconPath "core/http/static/logo.png" \
|
||||
--outputDir "dist/"
|
||||
# Build LocalAI.app from the launcher via fyne (metadata read from cmd/launcher/FyneApp.toml).
|
||||
# Signing happens via contrib/macos/sign-and-notarize.sh, which is a no-op when the signing
|
||||
# secrets are unset, so unsigned local/fork builds keep working.
|
||||
build-launcher-darwin:
|
||||
rm -rf dist/LocalAI.app cmd/launcher/LocalAI.app
|
||||
mkdir -p dist
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os darwin -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)
|
||||
mv cmd/launcher/LocalAI.app dist/LocalAI.app
|
||||
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.app
|
||||
|
||||
# Wrap the (signed) app into a drag-to-Applications DMG via hdiutil, then sign the DMG.
|
||||
dmg-launcher-darwin: build-launcher-darwin
|
||||
rm -rf dist/dmg dist/LocalAI.dmg
|
||||
mkdir -p dist/dmg
|
||||
cp -R dist/LocalAI.app dist/dmg/LocalAI.app
|
||||
ln -s /Applications dist/dmg/Applications
|
||||
hdiutil create -volname "LocalAI" -srcfolder dist/dmg -ov -format UDZO dist/LocalAI.dmg
|
||||
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.dmg
|
||||
|
||||
# Submit the DMG to Apple notarization and staple the ticket (no-op without notary secrets).
|
||||
notarize-launcher-darwin: dmg-launcher-darwin
|
||||
bash contrib/macos/sign-and-notarize.sh notarize dist/LocalAI.dmg
|
||||
|
||||
# Single entrypoint for CI: build -> sign app -> dmg -> sign dmg -> notarize -> staple.
|
||||
release-launcher-darwin: notarize-launcher-darwin
|
||||
@echo "dist/LocalAI.dmg is ready"
|
||||
|
||||
build-launcher-linux:
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv LocalAI.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
|
||||
@@ -16,7 +16,15 @@ cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS has no glibc loader to bundle. The piper binary links its bundled
|
||||
# libs (libucd, libespeak-ng, libpiper_phonemize, libonnxruntime) via
|
||||
# @rpath but ships with no LC_RPATH, so dyld aborts at launch with
|
||||
# "Library not loaded: @rpath/libucd.dylib ... no LC_RPATH's found".
|
||||
# Add an @loader_path/lib rpath so @rpath resolves to package/lib/.
|
||||
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/piper"
|
||||
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
|
||||
@@ -4,7 +4,12 @@ set -ex
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
|
||||
@@ -15,7 +15,14 @@ cp -avf $CURDIR/run.sh $CURDIR/package/
|
||||
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS has no glibc loader to bundle. silero-vad links its bundled
|
||||
# libonnxruntime via @rpath but ships with no LC_RPATH, so dyld can't find
|
||||
# it at runtime. Add an @loader_path/lib rpath so @rpath resolves to
|
||||
# package/lib/ (matching the piper darwin fix, #10525).
|
||||
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/silero-vad"
|
||||
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
|
||||
@@ -3,7 +3,11 @@ set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
|
||||
8
cmd/launcher/FyneApp.toml
Normal file
8
cmd/launcher/FyneApp.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
Website = "https://localai.io"
|
||||
|
||||
[Details]
|
||||
Icon = "../../core/http/static/logo.png"
|
||||
Name = "LocalAI"
|
||||
ID = "com.localai.launcher"
|
||||
Version = "0.0.0"
|
||||
Build = 1
|
||||
14
contrib/macos/Launcher.entitlements
Normal file
14
contrib/macos/Launcher.entitlements
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.network.client</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.server</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-jit</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
84
contrib/macos/sign-and-notarize.sh
Executable file
84
contrib/macos/sign-and-notarize.sh
Executable file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bash
|
||||
# Code-sign and notarize macOS artifacts for LocalAI.
|
||||
# Every sub-command is a no-op (exit 0) when its required secret is unset,
|
||||
# so unsigned builds (forks, local dev, PRs) keep working.
|
||||
set -euo pipefail
|
||||
|
||||
ENTITLEMENTS="contrib/macos/Launcher.entitlements"
|
||||
KEYCHAIN="localai-ci.keychain-db"
|
||||
|
||||
cmd_import_cert() {
|
||||
if [ -z "${MACOS_CERTIFICATE:-}" ]; then
|
||||
echo "[sign] MACOS_CERTIFICATE unset: skipping cert import (unsigned build)"
|
||||
return 0
|
||||
fi
|
||||
local certfile keychain_pwd default_keychain
|
||||
certfile="$(mktemp).p12"
|
||||
keychain_pwd="${MACOS_CI_KEYCHAIN_PWD:?MACOS_CI_KEYCHAIN_PWD required when signing}"
|
||||
echo "$MACOS_CERTIFICATE" | base64 --decode > "$certfile"
|
||||
security create-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||
security set-keychain-settings -lut 21600 "$KEYCHAIN"
|
||||
security unlock-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||
security import "$certfile" -k "$KEYCHAIN" -P "${MACOS_CERTIFICATE_PWD:?}" \
|
||||
-T /usr/bin/codesign -T /usr/bin/security
|
||||
security set-key-partition-list -S apple-tool:,apple:,codesign: \
|
||||
-s -k "$keychain_pwd" "$KEYCHAIN" >/dev/null
|
||||
default_keychain="$(security default-keychain | tr -d ' "')"
|
||||
security list-keychains -d user -s "$KEYCHAIN" "$default_keychain"
|
||||
rm -f "$certfile"
|
||||
echo "[sign] certificate imported into $KEYCHAIN"
|
||||
}
|
||||
|
||||
cmd_sign() {
|
||||
local target="$1"
|
||||
if [ -z "${MACOS_SIGN_IDENTITY:-}" ]; then
|
||||
echo "[sign] MACOS_SIGN_IDENTITY unset: skipping codesign of $target"
|
||||
return 0
|
||||
fi
|
||||
case "$target" in
|
||||
*.app)
|
||||
# Hardened runtime + entitlements are required for notarizing the app bundle.
|
||||
codesign --deep --force --options runtime --timestamp \
|
||||
--entitlements "$ENTITLEMENTS" \
|
||||
--sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||
;;
|
||||
*)
|
||||
# A disk image carries no entitlements/runtime; just sign the container.
|
||||
codesign --force --timestamp --sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||
;;
|
||||
esac
|
||||
codesign --verify --strict --verbose=2 "$target"
|
||||
echo "[sign] signed $target"
|
||||
}
|
||||
|
||||
cmd_notarize() {
|
||||
local dmg="$1"
|
||||
if [ -z "${MACOS_NOTARY_KEY:-}" ]; then
|
||||
echo "[notarize] MACOS_NOTARY_KEY unset: skipping notarization of $dmg"
|
||||
return 0
|
||||
fi
|
||||
local keyfile
|
||||
keyfile="$(mktemp).p8"
|
||||
echo "$MACOS_NOTARY_KEY" | base64 --decode > "$keyfile"
|
||||
xcrun notarytool submit "$dmg" \
|
||||
--key "$keyfile" \
|
||||
--key-id "${MACOS_NOTARY_KEY_ID:?}" \
|
||||
--issuer "${MACOS_NOTARY_ISSUER_ID:?}" \
|
||||
--wait
|
||||
rm -f "$keyfile"
|
||||
xcrun stapler staple "$dmg"
|
||||
xcrun stapler validate "$dmg"
|
||||
echo "[notarize] notarized and stapled $dmg"
|
||||
}
|
||||
|
||||
main() {
|
||||
local sub="${1:-}"; shift || true
|
||||
case "$sub" in
|
||||
import-cert) cmd_import_cert ;;
|
||||
sign) cmd_sign "$@" ;;
|
||||
notarize) cmd_notarize "$@" ;;
|
||||
*) echo "usage: $0 {import-cert|sign <path>|notarize <dmg>}" >&2; exit 2 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -37,6 +37,8 @@ func (a *Application) RestartAgentJobService() error {
|
||||
if d.JobStore != nil {
|
||||
agentJobService.SetDistributedJobStore(d.JobStore)
|
||||
}
|
||||
// Keep agent tasks consistent across replicas (same client the dispatcher uses).
|
||||
agentJobService.SetTaskSyncNATS(d.Nats)
|
||||
}
|
||||
|
||||
// Start the service
|
||||
|
||||
@@ -604,6 +604,10 @@ func (a *Application) StartAgentPool() {
|
||||
usm.SetJobDBStore(s)
|
||||
}
|
||||
}
|
||||
// Keep per-user agent tasks consistent across replicas (nil in standalone).
|
||||
if d := a.Distributed(); d != nil {
|
||||
usm.SetJobSyncNATS(d.Nats)
|
||||
}
|
||||
aps.SetUserServicesManager(usm)
|
||||
|
||||
a.agentPoolService.Store(aps)
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/jobs"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/core/services/monitoring"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/routing/admission"
|
||||
@@ -279,6 +280,9 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
if application.agentJobService != nil {
|
||||
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
|
||||
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
|
||||
// Keep agent tasks consistent across replicas (jobs already sync via the
|
||||
// dispatcher + DB read-through). Same NATS client the dispatcher uses.
|
||||
application.agentJobService.SetTaskSyncNATS(distSvc.Nats)
|
||||
}
|
||||
// Wire skill store into AgentPoolService (wired at pool start time via closure)
|
||||
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
|
||||
@@ -330,9 +334,14 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
gs := application.galleryService
|
||||
sys := options.SystemState
|
||||
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
||||
gs.OnModelsChanged = func(_ messaging.CacheInvalidateEvent) {
|
||||
if err := application.ModelConfigLoader().LoadModelConfigsFromPath(sys.Model.ModelsPath, cfgLoaderOpts...); err != nil {
|
||||
xlog.Warn("Failed to reload model configs after peer invalidation", "error", err)
|
||||
gs.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||
// ApplyRemoteChange honors the op: a "delete" prunes the element
|
||||
// (a reload-from-path is additive and cannot drop it), anything
|
||||
// else reloads from disk; a named element's running instance is
|
||||
// shut down so the new config takes effect. The originating
|
||||
// replica reloads inline and never depends on this path.
|
||||
if err := modeladmin.ApplyRemoteChange(application.ModelConfigLoader(), application.modelLoader, sys.Model.ModelsPath, evt, cfgLoaderOpts...); err != nil {
|
||||
xlog.Warn("Failed to apply peer model config change", "error", err)
|
||||
}
|
||||
}
|
||||
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
||||
|
||||
@@ -23,8 +23,10 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/finetune"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/quantization"
|
||||
|
||||
@@ -400,25 +402,45 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
|
||||
// Fine-tuning routes
|
||||
fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
|
||||
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||
// fine-tune jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||
var ftNats messaging.MessagingClient
|
||||
var ftStore *distributed.FineTuneStore
|
||||
if d := application.Distributed(); d != nil {
|
||||
ftNats = d.Nats
|
||||
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
||||
ftStore = d.DistStores.FineTune
|
||||
}
|
||||
}
|
||||
ftService := finetune.NewFineTuneService(
|
||||
application.ApplicationConfig(),
|
||||
application.ModelLoader(),
|
||||
application.ModelConfigLoader(),
|
||||
ftNats,
|
||||
ftStore,
|
||||
)
|
||||
if d := application.Distributed(); d != nil {
|
||||
ftService.SetNATSClient(d.Nats)
|
||||
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
||||
ftService.SetFineTuneStore(d.DistStores.FineTune)
|
||||
}
|
||||
}
|
||||
routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)
|
||||
|
||||
// Quantization routes
|
||||
quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
|
||||
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||
// quantization jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||
var quantNats messaging.MessagingClient
|
||||
var quantStore *distributed.QuantStore
|
||||
if d := application.Distributed(); d != nil {
|
||||
quantNats = d.Nats
|
||||
if d.DistStores != nil && d.DistStores.Quant != nil {
|
||||
quantStore = d.DistStores.Quant
|
||||
}
|
||||
}
|
||||
qService := quantization.NewQuantizationService(
|
||||
application.ApplicationConfig(),
|
||||
application.ModelLoader(),
|
||||
application.ModelConfigLoader(),
|
||||
quantNats,
|
||||
quantStore,
|
||||
)
|
||||
routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
|
||||
// @Param name path string true "Model name"
|
||||
// @Success 200 {object} map[string]any "success message"
|
||||
// @Router /api/models/config-json/{name} [patch]
|
||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -173,6 +173,14 @@ func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, app
|
||||
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
||||
}
|
||||
|
||||
// Patch rewrites the config on disk and reloads only the local loader;
|
||||
// tell peers to refresh so the change is consistent across replicas.
|
||||
// No-op in standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelName, "install")
|
||||
}
|
||||
|
||||
return c.JSON(http.StatusOK, map[string]any{
|
||||
"success": true,
|
||||
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
||||
|
||||
@@ -45,7 +45,7 @@ var _ = Describe("Config Metadata Endpoints", func() {
|
||||
app = echo.New()
|
||||
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
||||
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
|
||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, nil, appConfig))
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -55,7 +56,7 @@ func GetEditModelPage(cl *config.ModelConfigLoader, appConfig *config.Applicatio
|
||||
}
|
||||
|
||||
// EditModelEndpoint handles updating existing model configurations
|
||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -70,6 +71,17 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
||||
if err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||
}
|
||||
|
||||
// Tell peer replicas to refresh their in-memory config: this endpoint
|
||||
// only reloaded the local loader. A rename is a delete of the old name
|
||||
// plus an install of the new one. No-op in standalone mode.
|
||||
if gs != nil {
|
||||
if result.Renamed {
|
||||
gs.BroadcastModelsChanged(result.OldName, "delete")
|
||||
}
|
||||
gs.BroadcastModelsChanged(result.NewName, "install")
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
||||
if result.Renamed {
|
||||
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
||||
|
||||
@@ -56,7 +56,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
app := echo.New()
|
||||
// Set up a simple renderer for the test
|
||||
app.Renderer = &testRenderer{}
|
||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, applicationConfig))
|
||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, nil, applicationConfig))
|
||||
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
||||
|
||||
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
||||
@@ -106,7 +106,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(exists).To(BeTrue())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
||||
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
||||
@@ -163,7 +163,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
req := httptest.NewRequest(
|
||||
"POST",
|
||||
@@ -204,7 +204,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
req := httptest.NewRequest(
|
||||
"POST",
|
||||
|
||||
@@ -125,7 +125,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
|
||||
}
|
||||
|
||||
// ImportModelEndpoint handles creating new model configurations
|
||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
// Get the raw body
|
||||
body, err := io.ReadAll(c.Request().Body)
|
||||
@@ -245,6 +245,13 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
|
||||
}
|
||||
return c.JSON(http.StatusInternalServerError, response)
|
||||
}
|
||||
// Tell peer replicas to load the newly-created config from the shared
|
||||
// models dir: this endpoint only reloaded the local loader. No-op in
|
||||
// standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelConfig.Name, "install")
|
||||
}
|
||||
|
||||
// Return success response
|
||||
response := ModelResponse{
|
||||
Success: true,
|
||||
|
||||
@@ -60,7 +60,10 @@ func GetNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
id := c.Param("id")
|
||||
node, err := registry.Get(ctx, id)
|
||||
// GetWithExtras (not Get) so the response carries the node's labels,
|
||||
// loaded-model count, and in-flight total — the bare BackendNode keeps
|
||||
// labels in a separate table, leaving the detail view's label list empty.
|
||||
node, err := registry.GetWithExtras(ctx, id)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
// @Failure 404 {object} ModelResponse
|
||||
// @Failure 500 {object} ModelResponse
|
||||
// @Router /api/models/{name}/{action} [put]
|
||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -36,6 +37,14 @@ func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoade
|
||||
if err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||
}
|
||||
|
||||
// Enabling/disabling rewrites the config on disk and reloads only the
|
||||
// local loader; tell peers to refresh so the model's availability is
|
||||
// consistent across replicas. No-op in standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelName, "install")
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
||||
if action == modeladmin.ActionDisable {
|
||||
msg += " The model will not be loaded on demand until re-enabled."
|
||||
|
||||
@@ -72,19 +72,19 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
||||
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
||||
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
||||
// Custom model import endpoint
|
||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, appConfig), adminMiddleware)
|
||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// URI model import endpoint
|
||||
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
||||
|
||||
// Custom model edit endpoint
|
||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// List model aliases endpoint
|
||||
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
||||
|
||||
// Toggle model enable/disable endpoint
|
||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// Toggle model pinned status endpoint
|
||||
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
||||
|
||||
@@ -922,7 +922,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
|
||||
// PATCH config endpoint - partial update using nested JSON merge
|
||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// VRAM estimation endpoint
|
||||
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
||||
|
||||
@@ -30,6 +30,8 @@ import (
|
||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/jobs"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -43,8 +45,18 @@ type AgentJobService struct {
|
||||
configLoader *config.ModelConfigLoader
|
||||
evaluator *templates.Evaluator
|
||||
|
||||
// tasks is the cross-replica task store: an in-memory map kept consistent
|
||||
// across replicas via NATS, with read-through to the configured persister
|
||||
// (file in standalone, PostgreSQL in distributed). Unlike jobs - which already
|
||||
// converge via the dispatcher + DB read-through - tasks previously read
|
||||
// in-memory only, so ListTasks went stale on non-originating replicas.
|
||||
tasks *syncstate.SyncedMap[string, schema.Task]
|
||||
// taskNats is the distributed NATS client backing the tasks SyncedMap. It is
|
||||
// not available at construction time, so it is injected via SetTaskSyncNATS
|
||||
// during distributed wiring; nil keeps tasks in-memory-only (standalone).
|
||||
taskNats messaging.MessagingClient
|
||||
|
||||
// Storage (in-memory primary, persister for secondary persistence)
|
||||
tasks *xsync.SyncedMap[string, schema.Task]
|
||||
jobs *xsync.SyncedMap[string, schema.Job]
|
||||
persister JobPersister
|
||||
userID string // Scoping: empty for global (main service), set for per-user instances
|
||||
@@ -96,6 +108,31 @@ func (s *AgentJobService) SetDistributedJobStore(store *jobs.JobStore) {
|
||||
s.persister = &dbJobPersister{store: store}
|
||||
}
|
||||
|
||||
// SetTaskSyncNATS wires the distributed NATS client used to keep agent *tasks*
|
||||
// consistent across replicas (jobs already converge via the dispatcher + DB
|
||||
// read-through, so they are left untouched). The client is not available when the
|
||||
// service is constructed, so it is injected here during distributed wiring and the
|
||||
// tasks SyncedMap is rebuilt to pick it up. It is always called before Start /
|
||||
// hydrate, while the map is still empty, so rebuilding loses no state. Passing nil
|
||||
// (standalone) keeps the map in-memory-only with no broadcast.
|
||||
func (s *AgentJobService) SetTaskSyncNATS(nats messaging.MessagingClient) {
|
||||
s.taskNats = nats
|
||||
s.buildTasksMap()
|
||||
}
|
||||
|
||||
// buildTasksMap (re)constructs the cross-replica tasks SyncedMap from the current
|
||||
// taskNats. The Store adapter reads s.persister/s.userID live, so a persister swap
|
||||
// (SetDistributedJobStore) needs no rebuild; only the NATS client, fixed at
|
||||
// New-time, forces one - hence SetTaskSyncNATS calls this.
|
||||
func (s *AgentJobService) buildTasksMap() {
|
||||
s.tasks = syncstate.New(syncstate.Config[string, schema.Task]{
|
||||
Name: "agent.tasks",
|
||||
Key: func(t schema.Task) string { return t.ID },
|
||||
Nats: s.taskNats,
|
||||
Store: &taskStoreAdapter{svc: s},
|
||||
})
|
||||
}
|
||||
|
||||
// Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
|
||||
func (s *AgentJobService) Dispatcher() DistributedDispatcher {
|
||||
return s.dispatcher
|
||||
@@ -106,13 +143,6 @@ func (s *AgentJobService) DBStore() *jobs.JobStore {
|
||||
return s.rawDBStore
|
||||
}
|
||||
|
||||
// saveTasks persists tasks via the configured persister (file or DB).
|
||||
func (s *AgentJobService) saveTasks(task schema.Task) {
|
||||
if err := s.persister.SaveTask(s.userID, task); err != nil {
|
||||
xlog.Warn("Failed to persist task", "error", err, "task_id", task.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// saveJobs persists jobs via the configured persister (file or DB).
|
||||
func (s *AgentJobService) saveJobs(job schema.Job) {
|
||||
if err := s.persister.SaveJob(s.userID, job); err != nil {
|
||||
@@ -129,18 +159,8 @@ func (s *AgentJobService) LoadFromDB() {
|
||||
|
||||
// loadFromPersister loads tasks and jobs from the configured persister into memory.
|
||||
func (s *AgentJobService) loadFromPersister() {
|
||||
if tasks, err := s.persister.LoadTasks(s.userID); err != nil {
|
||||
if err := s.hydrateTasks(s.appConfig.Context); err != nil {
|
||||
xlog.Warn("Failed to load tasks from persister", "error", err)
|
||||
} else {
|
||||
for _, task := range tasks {
|
||||
s.tasks.Set(task.ID, task)
|
||||
if task.Enabled && task.Cron != "" {
|
||||
if err := s.ScheduleCronTask(task); err != nil {
|
||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
||||
}
|
||||
|
||||
if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
|
||||
@@ -153,6 +173,27 @@ func (s *AgentJobService) loadFromPersister() {
|
||||
}
|
||||
}
|
||||
|
||||
// hydrateTasks loads tasks into the cross-replica SyncedMap and (re)schedules
|
||||
// cron entries for enabled tasks. Hydration goes through the SyncedMap's Store
|
||||
// read-through (Start), not Set, so it neither re-persists nor re-broadcasts the
|
||||
// loaded tasks. Each service instance hydrates exactly once: the main service via
|
||||
// Start -> loadFromPersister, per-user services via LoadFromDB or LoadTasksFromFile.
|
||||
func (s *AgentJobService) hydrateTasks(ctx context.Context) error {
|
||||
if err := s.tasks.Start(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
tasks := s.tasks.List()
|
||||
for _, task := range tasks {
|
||||
if task.Enabled && task.Cron != "" {
|
||||
if err := s.ScheduleCronTask(task); err != nil {
|
||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
||||
return nil
|
||||
}
|
||||
|
||||
// JobExecution represents a job to be executed
|
||||
type JobExecution struct {
|
||||
Job schema.Job
|
||||
@@ -200,21 +241,19 @@ func NewAgentJobServiceWithPaths(
|
||||
) *AgentJobService {
|
||||
retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)
|
||||
|
||||
tasks := xsync.NewSyncedMap[string, schema.Task]()
|
||||
jobsMap := xsync.NewSyncedMap[string, schema.Job]()
|
||||
|
||||
return &AgentJobService{
|
||||
s := &AgentJobService{
|
||||
appConfig: appConfig,
|
||||
modelLoader: modelLoader,
|
||||
configLoader: configLoader,
|
||||
evaluator: evaluator,
|
||||
tasks: tasks,
|
||||
jobs: jobsMap,
|
||||
persister: &fileJobPersister{
|
||||
tasks: tasks,
|
||||
jobs: jobsMap,
|
||||
tasksFile: tasksFile,
|
||||
jobsFile: jobsFile,
|
||||
taskSet: make(map[string]schema.Task),
|
||||
},
|
||||
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
||||
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
||||
@@ -222,25 +261,17 @@ func NewAgentJobServiceWithPaths(
|
||||
cronEntries: xsync.NewSyncedMap[string, cron.EntryID](),
|
||||
retentionDays: retentionDays,
|
||||
}
|
||||
// Build the cross-replica tasks map standalone (nil NATS); SetTaskSyncNATS
|
||||
// rebuilds it with the distributed client once that is available, before Start.
|
||||
s.buildTasksMap()
|
||||
return s
|
||||
}
|
||||
|
||||
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
||||
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
||||
// mode it loads from the database.
|
||||
func (s *AgentJobService) LoadTasksFromFile() error {
|
||||
tasks, err := s.persister.LoadTasks(s.userID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, task := range tasks {
|
||||
s.tasks.Set(task.ID, task)
|
||||
if task.Enabled && task.Cron != "" {
|
||||
if err := s.ScheduleCronTask(task); err != nil {
|
||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return s.hydrateTasks(s.appConfig.Context)
|
||||
}
|
||||
|
||||
// SaveTasksToFile flushes the current tasks map via the persister. File
|
||||
@@ -293,8 +324,12 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
||||
task.Enabled = true // Default to enabled
|
||||
}
|
||||
|
||||
// Store task
|
||||
s.tasks.Set(id, task)
|
||||
// Store task: Set updates the in-memory map, write-throughs to the persister
|
||||
// (file or DB), and broadcasts the create to peer replicas. Background ctx
|
||||
// because CreateTask carries no request ctx (mirrors the finetune service).
|
||||
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||
return "", fmt.Errorf("failed to persist task: %w", err)
|
||||
}
|
||||
|
||||
// Schedule cron if enabled and has cron expression
|
||||
if task.Enabled && task.Cron != "" {
|
||||
@@ -303,16 +338,15 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
s.saveTasks(task)
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// UpdateTask updates an existing task
|
||||
func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
||||
if !s.tasks.Exists(id) {
|
||||
existing, ok := s.tasks.Get(id)
|
||||
if !ok {
|
||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||
}
|
||||
existing := s.tasks.Get(id)
|
||||
|
||||
// Preserve ID and CreatedAt
|
||||
task.ID = id
|
||||
@@ -324,8 +358,10 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
||||
s.UnscheduleCronTask(id)
|
||||
}
|
||||
|
||||
// Store updated task
|
||||
s.tasks.Set(id, task)
|
||||
// Store updated task: write-through + broadcast (see CreateTask).
|
||||
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||
return fmt.Errorf("failed to persist task: %w", err)
|
||||
}
|
||||
|
||||
// Schedule new cron if enabled and has cron expression
|
||||
if task.Enabled && task.Cron != "" {
|
||||
@@ -334,24 +370,22 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
||||
}
|
||||
}
|
||||
|
||||
s.saveTasks(task)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteTask deletes a task
|
||||
func (s *AgentJobService) DeleteTask(id string) error {
|
||||
if !s.tasks.Exists(id) {
|
||||
if _, ok := s.tasks.Get(id); !ok {
|
||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||
}
|
||||
|
||||
// Unschedule cron
|
||||
s.UnscheduleCronTask(id)
|
||||
|
||||
// Remove from memory
|
||||
s.tasks.Delete(id)
|
||||
|
||||
if err := s.persister.DeleteTask(id); err != nil {
|
||||
xlog.Warn("Failed to delete task from persister", "error", err, "task_id", id)
|
||||
// Delete removes from the in-memory map, deletes from the persister, and
|
||||
// broadcasts the removal to peer replicas.
|
||||
if err := s.tasks.Delete(context.Background(), id); err != nil {
|
||||
xlog.Warn("Failed to delete task from store", "error", err, "task_id", id)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -359,8 +393,8 @@ func (s *AgentJobService) DeleteTask(id string) error {
|
||||
|
||||
// GetTask retrieves a task by ID
|
||||
func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
||||
task := s.tasks.Get(id)
|
||||
if task.ID == "" {
|
||||
task, ok := s.tasks.Get(id)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||
}
|
||||
return &task, nil
|
||||
@@ -368,7 +402,7 @@ func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
||||
|
||||
// ListTasks returns all tasks, sorted by creation date (newest first)
|
||||
func (s *AgentJobService) ListTasks() []schema.Task {
|
||||
tasks := s.tasks.Values()
|
||||
tasks := s.tasks.List()
|
||||
// Sort by CreatedAt descending (newest first), then by Name for stability
|
||||
slices.SortFunc(tasks, func(a, b schema.Task) int {
|
||||
if a.CreatedAt.Equal(b.CreatedAt) {
|
||||
@@ -397,8 +431,8 @@ func (s *AgentJobService) buildPrompt(templateStr string, params map[string]stri
|
||||
// ExecuteJob creates and queues a job for execution
|
||||
// multimedia can be nil for backward compatibility
|
||||
func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
|
||||
task := s.tasks.Get(taskID)
|
||||
if task.ID == "" {
|
||||
task, ok := s.tasks.Get(taskID)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
|
||||
}
|
||||
|
||||
@@ -1451,6 +1485,12 @@ func (s *AgentJobService) Stop() error {
|
||||
if s.cronScheduler != nil {
|
||||
s.cronScheduler.Stop()
|
||||
}
|
||||
// Release the tasks SyncedMap subscription / background workers.
|
||||
if s.tasks != nil {
|
||||
if err := s.tasks.Close(); err != nil {
|
||||
xlog.Warn("Error closing tasks sync map", "error", err)
|
||||
}
|
||||
}
|
||||
xlog.Info("AgentJobService stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -14,24 +14,38 @@ import (
|
||||
)
|
||||
|
||||
// fileJobPersister persists tasks and jobs to JSON files.
|
||||
// It holds references to the service's syncmaps and serializes the entire
|
||||
// map contents on each save (bulk write). Reads at runtime return nil
|
||||
// (the in-memory map is the authoritative source); LoadTasks/LoadJobs
|
||||
// are used only at startup to bootstrap the syncmaps.
|
||||
//
|
||||
// Jobs serialize the service's in-memory jobs syncmap on each save (bulk write).
|
||||
// Tasks are kept in this persister's own taskSet map instead: the tasks SyncedMap
|
||||
// calls SaveTask/DeleteTask while holding its internal lock (write-through), so
|
||||
// reading back the SyncedMap here would re-enter that lock and deadlock. The
|
||||
// self-contained taskSet, seeded by LoadTasks, lets a per-task write rewrite the
|
||||
// whole bulk file without touching the SyncedMap.
|
||||
//
|
||||
// Runtime reads (GetJob/ListJobs) return nil (the in-memory state is the
|
||||
// authoritative source); LoadTasks/LoadJobs bootstrap state at startup.
|
||||
type fileJobPersister struct {
|
||||
tasks *xsync.SyncedMap[string, schema.Task]
|
||||
jobs *xsync.SyncedMap[string, schema.Job]
|
||||
tasksFile string
|
||||
jobsFile string
|
||||
mu sync.Mutex
|
||||
// taskSet is the persister's own view of all tasks, seeded by LoadTasks and
|
||||
// updated by SaveTask/DeleteTask. The bulk JSON file is rewritten from it.
|
||||
taskSet map[string]schema.Task
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) SaveTask(_ string, _ schema.Task) error {
|
||||
return p.saveTasksToFile()
|
||||
func (p *fileJobPersister) SaveTask(_ string, task schema.Task) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.taskSet[task.ID] = task
|
||||
return p.writeTasksLocked()
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) DeleteTask(_ string) error {
|
||||
return p.saveTasksToFile()
|
||||
func (p *fileJobPersister) DeleteTask(taskID string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
delete(p.taskSet, taskID)
|
||||
return p.writeTasksLocked()
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
|
||||
@@ -43,7 +57,9 @@ func (p *fileJobPersister) DeleteJob(_ string) error {
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) FlushTasks() error {
|
||||
return p.saveTasksToFile()
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
return p.writeTasksLocked()
|
||||
}
|
||||
|
||||
func (p *fileJobPersister) FlushJobs() error {
|
||||
@@ -83,6 +99,12 @@ func (p *fileJobPersister) LoadTasks(_ string) ([]schema.Task, error) {
|
||||
return nil, fmt.Errorf("failed to parse tasks file: %w", err)
|
||||
}
|
||||
|
||||
// Seed the in-memory set so subsequent per-task SaveTask/DeleteTask merge into
|
||||
// (rather than overwrite) the persisted tasks when the bulk file is rewritten.
|
||||
for _, t := range tf.Tasks {
|
||||
p.taskSet[t.ID] = t
|
||||
}
|
||||
|
||||
xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
|
||||
return tf.Tasks, nil
|
||||
}
|
||||
@@ -118,19 +140,20 @@ func (p *fileJobPersister) CleanupOldJobs(_ time.Duration) (int64, error) {
|
||||
return 0, nil // cleanup handled via in-memory filtering
|
||||
}
|
||||
|
||||
// saveTasksToFile serializes the entire tasks map to the JSON file.
|
||||
func (p *fileJobPersister) saveTasksToFile() error {
|
||||
// writeTasksLocked serializes the persister's task set to the JSON file. Callers
|
||||
// must hold p.mu.
|
||||
func (p *fileJobPersister) writeTasksLocked() error {
|
||||
if p.tasksFile == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
tf := schema.TasksFile{
|
||||
Tasks: p.tasks.Values(),
|
||||
tasks := make([]schema.Task, 0, len(p.taskSet))
|
||||
for _, t := range p.taskSet {
|
||||
tasks = append(tasks, t)
|
||||
}
|
||||
|
||||
tf := schema.TasksFile{Tasks: tasks}
|
||||
|
||||
data, err := json.MarshalIndent(tf, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal tasks: %w", err)
|
||||
|
||||
@@ -20,28 +20,26 @@ var _ = Describe("JobPersister", func() {
|
||||
Context("fileJobPersister", func() {
|
||||
var (
|
||||
p *fileJobPersister
|
||||
tasks *xsync.SyncedMap[string, schema.Task]
|
||||
jobsMap *xsync.SyncedMap[string, schema.Job]
|
||||
tmpDir string
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
tmpDir = GinkgoT().TempDir()
|
||||
tasks = xsync.NewSyncedMap[string, schema.Task]()
|
||||
jobsMap = xsync.NewSyncedMap[string, schema.Job]()
|
||||
p = &fileJobPersister{
|
||||
tasks: tasks,
|
||||
jobs: jobsMap,
|
||||
tasksFile: filepath.Join(tmpDir, "tasks.json"),
|
||||
jobsFile: filepath.Join(tmpDir, "jobs.json"),
|
||||
// taskSet is the persister's own task view (decoupled from the tasks
|
||||
// SyncedMap to avoid re-entering its lock during write-through).
|
||||
taskSet: make(map[string]schema.Task),
|
||||
}
|
||||
})
|
||||
|
||||
It("SaveTask writes all tasks to file", func() {
|
||||
tasks.Set("t1", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})
|
||||
tasks.Set("t2", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})
|
||||
|
||||
Expect(p.SaveTask("", schema.Task{})).To(Succeed())
|
||||
Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})).To(Succeed())
|
||||
Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})).To(Succeed())
|
||||
|
||||
// Verify file contents
|
||||
data, err := os.ReadFile(p.tasksFile)
|
||||
@@ -52,11 +50,9 @@ var _ = Describe("JobPersister", func() {
|
||||
})
|
||||
|
||||
It("DeleteTask writes updated tasks to file", func() {
|
||||
tasks.Set("t1", schema.Task{ID: "t1", Name: "Keep"})
|
||||
tasks.Set("t2", schema.Task{ID: "t2", Name: "Delete"})
|
||||
Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Keep"})).To(Succeed())
|
||||
Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Delete"})).To(Succeed())
|
||||
|
||||
// Simulate deletion from memory (caller does this before calling persister)
|
||||
tasks.Delete("t2")
|
||||
Expect(p.DeleteTask("t2")).To(Succeed())
|
||||
|
||||
data, err := os.ReadFile(p.tasksFile)
|
||||
|
||||
152
core/services/agentpool/task_sync_test.go
Normal file
152
core/services/agentpool/task_sync_test.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package agentpool
|
||||
|
||||
// White-box tests (package agentpool) so a spec can build two AgentJobService
|
||||
// instances sharing one in-memory bus and assert that agent *tasks* converge
|
||||
// across replicas - the bug this migration fixes (ListTasks used to read
|
||||
// in-memory only, so a task created on replica A was invisible on replica B).
|
||||
// Jobs are deliberately untouched here: they already converge via the dispatcher
|
||||
// + DB read-through.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
)
|
||||
|
||||
// newTaskSyncService builds an AgentJobService wired to the given bus and a
|
||||
// throwaway data dir (so the file persister has somewhere to write). Model/config
|
||||
// loaders are nil because the task sync paths under test never touch them.
|
||||
func newTaskSyncService(bus messaging.MessagingClient) *AgentJobService {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
sysState := &system.SystemState{}
|
||||
sysState.Model.ModelsPath = tmpDir
|
||||
appConfig := config.NewApplicationConfig(
|
||||
config.WithDynamicConfigDir(tmpDir),
|
||||
config.WithContext(context.Background()),
|
||||
)
|
||||
appConfig.SystemState = sysState
|
||||
|
||||
svc := NewAgentJobServiceWithPaths(appConfig, nil, nil, nil,
|
||||
// Distinct per-replica files so the file persister write-through never
|
||||
// crosses replicas: convergence here must be proven via the bus alone.
|
||||
tmpDir+"/tasks.json", tmpDir+"/jobs.json")
|
||||
svc.SetTaskSyncNATS(bus)
|
||||
return svc
|
||||
}
|
||||
|
||||
var _ = Describe("AgentJobService task cross-replica sync", func() {
|
||||
Describe("two replicas sharing one bus", func() {
|
||||
var (
|
||||
bus *testutil.FakeBus
|
||||
a, b *AgentJobService
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
// One shared bus, two replicas: exactly the distributed topology where a
|
||||
// round-robin request may land on a replica that did not originate the
|
||||
// change.
|
||||
bus = testutil.NewFakeBus()
|
||||
a = newTaskSyncService(bus)
|
||||
b = newTaskSyncService(bus)
|
||||
// Start hydrates (empty here) and subscribes both replicas to deltas.
|
||||
Expect(a.Start(context.Background())).To(Succeed())
|
||||
Expect(b.Start(context.Background())).To(Succeed())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
Expect(a.Stop()).To(Succeed())
|
||||
Expect(b.Stop()).To(Succeed())
|
||||
})
|
||||
|
||||
It("makes a task created on A visible via B's GetTask and ListTasks", func() {
|
||||
id, err := a.CreateTask(schema.Task{Name: "Shared", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
got, err := b.GetTask(id)
|
||||
Expect(err).NotTo(HaveOccurred(), "B must see a task A just created")
|
||||
Expect(got.Name).To(Equal("Shared"))
|
||||
|
||||
listed := b.ListTasks()
|
||||
Expect(listed).To(HaveLen(1))
|
||||
Expect(listed[0].ID).To(Equal(id))
|
||||
})
|
||||
|
||||
It("propagates a task update from A to B", func() {
|
||||
id, err := a.CreateTask(schema.Task{Name: "Before", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
Expect(a.UpdateTask(id, schema.Task{Name: "After", Model: "m", Prompt: "p"})).To(Succeed())
|
||||
|
||||
got, err := b.GetTask(id)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(got.Name).To(Equal("After"), "an update on A must be visible on B")
|
||||
})
|
||||
|
||||
It("removes a task from B when it is deleted on A", func() {
|
||||
id, err := a.CreateTask(schema.Task{Name: "Doomed", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
_, err = b.GetTask(id)
|
||||
Expect(err).NotTo(HaveOccurred(), "precondition: B must have the task before the delete")
|
||||
|
||||
Expect(a.DeleteTask(id)).To(Succeed())
|
||||
|
||||
_, err = b.GetTask(id)
|
||||
Expect(err).To(HaveOccurred(), "a delete on A must remove the task from B")
|
||||
Expect(b.ListTasks()).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("does not re-broadcast a delta it received (echo-loop guard)", func() {
|
||||
subject := messaging.SubjectSyncStateDelta("agent.tasks")
|
||||
|
||||
_, err := a.CreateTask(schema.Task{Name: "Once", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Exactly one publish: A's create. B applies it without re-publishing,
|
||||
// otherwise this would be 2+ and a real bus would storm.
|
||||
Expect(bus.PublishCount(subject)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ListTasks ordering and scoping", func() {
|
||||
var svc *AgentJobService
|
||||
|
||||
BeforeEach(func() {
|
||||
svc = newTaskSyncService(testutil.NewFakeBus())
|
||||
Expect(svc.Start(context.Background())).To(Succeed())
|
||||
})
|
||||
AfterEach(func() { Expect(svc.Stop()).To(Succeed()) })
|
||||
|
||||
It("sorts newest-first, breaking ties by name", func() {
|
||||
// CreateTask stamps CreatedAt with time.Now(); space them out so ordering
|
||||
// is deterministic rather than relying on the sub-millisecond gap.
|
||||
oldID, err := svc.CreateTask(schema.Task{Name: "Old", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
newID, err := svc.CreateTask(schema.Task{Name: "New", Model: "m", Prompt: "p"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
listed := svc.ListTasks()
|
||||
Expect(listed).To(HaveLen(2))
|
||||
Expect(listed[0].ID).To(Equal(newID), "newest first")
|
||||
Expect(listed[1].ID).To(Equal(oldID))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("compile-time adapter contract", func() {
|
||||
It("satisfies syncstate.Store for tasks", func() {
|
||||
// Mirrors the var assertion in task_syncstore.go; keeps the type
|
||||
// referenced from a spec so drift surfaces here too.
|
||||
var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
|
||||
Expect(&taskStoreAdapter{}).ToNot(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
47
core/services/agentpool/task_syncstore.go
Normal file
47
core/services/agentpool/task_syncstore.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package agentpool
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
)
|
||||
|
||||
// taskStoreAdapter bridges the existing JobPersister (file- or DB-backed) to the
|
||||
// generic syncstate.Store the tasks SyncedMap consumes. Only tasks are migrated:
|
||||
// jobs already converge across replicas via the dispatcher (NATS) plus the DB
|
||||
// read-through in ListJobs/GetJob, whereas ListTasks read in-memory only and so
|
||||
// went stale on replicas that did not originate the change.
|
||||
//
|
||||
// The adapter reads svc.persister and svc.userID live (rather than capturing
|
||||
// them) because both are configured by setters - SetDistributedJobStore swaps the
|
||||
// file persister for the DB one, SetUserID scopes per-user queries - AFTER the
|
||||
// service, and thus this adapter, is constructed. Reading them at call time means
|
||||
// the SyncedMap never has to be rebuilt when the persister is swapped.
|
||||
//
|
||||
// The SyncedMap value type is schema.Task: the exact shape ListTasks returns, so
|
||||
// reads need no conversion and REST responses are provably unchanged.
|
||||
type taskStoreAdapter struct {
|
||||
svc *AgentJobService
|
||||
}
|
||||
|
||||
// compile-time assertion that the adapter satisfies the component's Store.
|
||||
var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
|
||||
|
||||
// List hydrates the map from durable storage on Start/reconnect: the file's task
|
||||
// list (standalone) or every task row (DB / distributed).
|
||||
func (a *taskStoreAdapter) List(_ context.Context) ([]schema.Task, error) {
|
||||
return a.svc.persister.LoadTasks(a.svc.userID)
|
||||
}
|
||||
|
||||
// Upsert write-through persists a single task created/updated locally; the
|
||||
// SyncedMap then broadcasts the delta to peers.
|
||||
func (a *taskStoreAdapter) Upsert(_ context.Context, task schema.Task) error {
|
||||
return a.svc.persister.SaveTask(a.svc.userID, task)
|
||||
}
|
||||
|
||||
// Delete write-through removes a task locally; the SyncedMap then broadcasts the
|
||||
// removal to peers.
|
||||
func (a *taskStoreAdapter) Delete(_ context.Context, id string) error {
|
||||
return a.svc.persister.DeleteTask(id)
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/mudler/LocalAGI/webui/collections"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/jobs"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -28,6 +29,9 @@ type UserServicesManager struct {
|
||||
// Shared distributed backends (set once, inherited by per-user job services)
|
||||
jobDispatcher DistributedDispatcher
|
||||
jobDBStore *jobs.JobStore
|
||||
// jobNats keeps per-user agent tasks consistent across replicas (nil in
|
||||
// standalone). Inherited by each per-user AgentJobService.
|
||||
jobNats messaging.MessagingClient
|
||||
}
|
||||
|
||||
// NewUserServicesManager creates a new UserServicesManager.
|
||||
@@ -162,6 +166,10 @@ func (m *UserServicesManager) GetJobs(userID string) (*AgentJobService, error) {
|
||||
if m.jobDispatcher != nil {
|
||||
svc.SetDistributedBackends(m.jobDispatcher)
|
||||
}
|
||||
// Inherit the NATS client so per-user tasks broadcast across replicas. Must be
|
||||
// set before the hydrate below (LoadFromDB / LoadTasksFromFile) so the tasks
|
||||
// SyncedMap is rebuilt with the client while it is still empty.
|
||||
svc.SetTaskSyncNATS(m.jobNats)
|
||||
if m.jobDBStore != nil {
|
||||
svc.SetDistributedJobStore(m.jobDBStore)
|
||||
// Load tasks/jobs from DB immediately (per-user services skip Start())
|
||||
@@ -189,6 +197,12 @@ func (m *UserServicesManager) SetJobDBStore(s *jobs.JobStore) {
|
||||
m.jobDBStore = s
|
||||
}
|
||||
|
||||
// SetJobSyncNATS sets the NATS client used to keep per-user agent tasks consistent
|
||||
// across replicas.
|
||||
func (m *UserServicesManager) SetJobSyncNATS(nats messaging.MessagingClient) {
|
||||
m.jobNats = nats
|
||||
}
|
||||
|
||||
// ListAllUserIDs returns all user IDs that have scoped data directories.
|
||||
func (m *UserServicesManager) ListAllUserIDs() ([]string, error) {
|
||||
return m.storage.ListUserDirs()
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/services/advisorylock"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/clause"
|
||||
)
|
||||
|
||||
// FineTuneJobRecord tracks fine-tune jobs in PostgreSQL.
|
||||
@@ -80,6 +81,34 @@ func (s *FineTuneStore) List(userID string) ([]FineTuneJobRecord, error) {
|
||||
return jobs, q.Find(&jobs).Error
|
||||
}
|
||||
|
||||
// ListAll returns every fine-tune job across all users. The SyncedMap that backs
|
||||
// FineTuneService is a single global map (the REST API filters by user at read
|
||||
// time), so hydrate needs the full set rather than the per-user List above.
|
||||
func (s *FineTuneStore) ListAll() ([]FineTuneJobRecord, error) {
|
||||
var jobs []FineTuneJobRecord
|
||||
return jobs, s.db.Order("created_at DESC").Find(&jobs).Error
|
||||
}
|
||||
|
||||
// Upsert idempotently inserts or fully replaces a job row by primary key. The
|
||||
// SyncedMap write-through path issues a single Set per mutation regardless of
|
||||
// whether the job already exists, so it needs one create-or-update primitive
|
||||
// (Create alone fails on a duplicate key, UpdateStatus alone misses new rows and
|
||||
// only touches a few columns).
|
||||
func (s *FineTuneStore) Upsert(job *FineTuneJobRecord) error {
|
||||
if job.ID == "" {
|
||||
job.ID = uuid.New().String()
|
||||
}
|
||||
now := time.Now()
|
||||
if job.CreatedAt.IsZero() {
|
||||
job.CreatedAt = now
|
||||
}
|
||||
job.UpdatedAt = now
|
||||
return s.db.Clauses(clause.OnConflict{
|
||||
Columns: []clause.Column{{Name: "id"}},
|
||||
UpdateAll: true,
|
||||
}).Create(job).Error
|
||||
}
|
||||
|
||||
// UpdateStatus updates the status and message of a fine-tune job.
|
||||
func (s *FineTuneStore) UpdateStatus(id, status, message string) error {
|
||||
return s.db.Model(&FineTuneJobRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||
|
||||
13
core/services/distributed/finetune_suite_test.go
Normal file
13
core/services/distributed/finetune_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package distributed_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestDistributed(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Distributed Suite")
|
||||
}
|
||||
61
core/services/distributed/finetune_test.go
Normal file
61
core/services/distributed/finetune_test.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package distributed_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
var _ = Describe("FineTuneStore", func() {
|
||||
var store *distributed.FineTuneStore
|
||||
|
||||
BeforeEach(func() {
|
||||
db := testutil.SetupTestDB()
|
||||
var err error
|
||||
store, err = distributed.NewFineTuneStore(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
Describe("ListAll", func() {
|
||||
It("returns jobs across all users (unlike per-user List)", func() {
|
||||
Expect(store.Create(&distributed.FineTuneJobRecord{ID: "j1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
Expect(store.Create(&distributed.FineTuneJobRecord{ID: "j2", UserID: "u2", Status: "queued"})).To(Succeed())
|
||||
|
||||
all, err := store.ListAll()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(all).To(HaveLen(2))
|
||||
|
||||
perUser, err := store.List("u1")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(perUser).To(HaveLen(1), "List stays per-user")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Upsert", func() {
|
||||
It("inserts a new row", func() {
|
||||
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
|
||||
got, err := store.Get("up-1")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("queued"))
|
||||
})
|
||||
|
||||
It("idempotently updates an existing row on a repeated key", func() {
|
||||
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-2", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
// Second Upsert with the same primary key must update, not error on a
|
||||
// duplicate-key violation (this is the SyncedMap write-through contract).
|
||||
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-2", UserID: "u1", Status: "completed", Message: "done"})).To(Succeed())
|
||||
|
||||
got, err := store.Get("up-2")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("completed"))
|
||||
Expect(got.Message).To(Equal("done"))
|
||||
|
||||
all, err := store.ListAll()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(all).To(HaveLen(1), "upsert must not create a duplicate")
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
type Stores struct {
|
||||
Gallery *GalleryStore
|
||||
FineTune *FineTuneStore
|
||||
Quant *QuantStore
|
||||
Skills *SkillStore
|
||||
}
|
||||
|
||||
@@ -26,15 +27,21 @@ func InitStores(db *gorm.DB) (*Stores, error) {
|
||||
return nil, fmt.Errorf("fine-tune store: %w", err)
|
||||
}
|
||||
|
||||
quant, err := NewQuantStore(db)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("quantization store: %w", err)
|
||||
}
|
||||
|
||||
skills, err := NewSkillStore(db)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("skills store: %w", err)
|
||||
}
|
||||
|
||||
xlog.Info("Distributed stores initialized (Gallery, FineTune, Skills)")
|
||||
xlog.Info("Distributed stores initialized (Gallery, FineTune, Quant, Skills)")
|
||||
return &Stores{
|
||||
Gallery: gallery,
|
||||
FineTune: ft,
|
||||
Quant: quant,
|
||||
Skills: skills,
|
||||
}, nil
|
||||
}
|
||||
|
||||
105
core/services/distributed/quant.go
Normal file
105
core/services/distributed/quant.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package distributed
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/services/advisorylock"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/clause"
|
||||
)
|
||||
|
||||
// QuantJobRecord tracks quantization jobs in PostgreSQL. The columns mirror the
|
||||
// API shape (schema.QuantizationJob); the structured Config and ExtraOptions are
|
||||
// serialized into JSON text columns so a record fully reconstructs the job.
|
||||
type QuantJobRecord struct {
|
||||
ID string `gorm:"primaryKey;size:36" json:"id"`
|
||||
UserID string `gorm:"index;size:36" json:"user_id,omitempty"`
|
||||
Model string `gorm:"size:255" json:"model"`
|
||||
Backend string `gorm:"size:64" json:"backend"`
|
||||
ModelID string `gorm:"size:255" json:"model_id,omitempty"`
|
||||
QuantizationType string `gorm:"size:32" json:"quantization_type"`
|
||||
Status string `gorm:"index;size:32;default:queued" json:"status"` // queued, downloading, converting, quantizing, completed, failed, stopped
|
||||
Message string `gorm:"type:text" json:"message,omitempty"`
|
||||
OutputDir string `gorm:"size:512" json:"output_dir,omitempty"`
|
||||
OutputFile string `gorm:"size:512" json:"output_file,omitempty"`
|
||||
ConfigJSON string `gorm:"column:config;type:text" json:"-"`
|
||||
ExtraOptsJSON string `gorm:"column:extra_options;type:text" json:"-"`
|
||||
ImportStatus string `gorm:"size:32" json:"import_status,omitempty"`
|
||||
ImportMessage string `gorm:"type:text" json:"import_message,omitempty"`
|
||||
ImportModelName string `gorm:"size:255" json:"import_model_name,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
func (QuantJobRecord) TableName() string { return "quantization_jobs" }
|
||||
|
||||
// QuantStore manages quantization job state in PostgreSQL.
|
||||
type QuantStore struct {
|
||||
db *gorm.DB
|
||||
}
|
||||
|
||||
// NewQuantStore creates a new QuantStore and auto-migrates.
|
||||
// Uses a PostgreSQL advisory lock to prevent concurrent migration races
|
||||
// when multiple instances (frontend + workers) start at the same time.
|
||||
func NewQuantStore(db *gorm.DB) (*QuantStore, error) {
|
||||
if err := advisorylock.WithLockCtx(context.Background(), db, advisorylock.KeySchemaMigrate, func() error {
|
||||
return db.AutoMigrate(&QuantJobRecord{})
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("migrating quantization_jobs: %w", err)
|
||||
}
|
||||
return &QuantStore{db: db}, nil
|
||||
}
|
||||
|
||||
// Create stores a new quantization job.
|
||||
func (s *QuantStore) Create(job *QuantJobRecord) error {
|
||||
if job.ID == "" {
|
||||
job.ID = uuid.New().String()
|
||||
}
|
||||
job.CreatedAt = time.Now()
|
||||
job.UpdatedAt = job.CreatedAt
|
||||
return s.db.Create(job).Error
|
||||
}
|
||||
|
||||
// Get retrieves a quantization job by ID.
|
||||
func (s *QuantStore) Get(id string) (*QuantJobRecord, error) {
|
||||
var job QuantJobRecord
|
||||
if err := s.db.First(&job, "id = ?", id).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &job, nil
|
||||
}
|
||||
|
||||
// ListAll returns every quantization job across all users. The SyncedMap that
|
||||
// backs QuantizationService is a single global map (the REST API filters by user
|
||||
// at read time), so hydrate needs the full set.
|
||||
func (s *QuantStore) ListAll() ([]QuantJobRecord, error) {
|
||||
var jobs []QuantJobRecord
|
||||
return jobs, s.db.Order("created_at DESC").Find(&jobs).Error
|
||||
}
|
||||
|
||||
// Upsert idempotently inserts or fully replaces a job row by primary key. The
|
||||
// SyncedMap write-through path issues a single Set per mutation regardless of
|
||||
// whether the job already exists, so it needs one create-or-update primitive
|
||||
// (Create alone fails on a duplicate key).
|
||||
func (s *QuantStore) Upsert(job *QuantJobRecord) error {
|
||||
if job.ID == "" {
|
||||
job.ID = uuid.New().String()
|
||||
}
|
||||
now := time.Now()
|
||||
if job.CreatedAt.IsZero() {
|
||||
job.CreatedAt = now
|
||||
}
|
||||
job.UpdatedAt = now
|
||||
return s.db.Clauses(clause.OnConflict{
|
||||
Columns: []clause.Column{{Name: "id"}},
|
||||
UpdateAll: true,
|
||||
}).Create(job).Error
|
||||
}
|
||||
|
||||
// Delete removes a quantization job.
|
||||
func (s *QuantStore) Delete(id string) error {
|
||||
return s.db.Where("id = ?", id).Delete(&QuantJobRecord{}).Error
|
||||
}
|
||||
57
core/services/distributed/quant_test.go
Normal file
57
core/services/distributed/quant_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package distributed_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
var _ = Describe("QuantStore", func() {
|
||||
var store *distributed.QuantStore
|
||||
|
||||
BeforeEach(func() {
|
||||
db := testutil.SetupTestDB()
|
||||
var err error
|
||||
store, err = distributed.NewQuantStore(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
Describe("ListAll", func() {
|
||||
It("returns jobs across all users", func() {
|
||||
Expect(store.Create(&distributed.QuantJobRecord{ID: "j1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
Expect(store.Create(&distributed.QuantJobRecord{ID: "j2", UserID: "u2", Status: "queued"})).To(Succeed())
|
||||
|
||||
all, err := store.ListAll()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(all).To(HaveLen(2))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Upsert", func() {
|
||||
It("inserts a new row", func() {
|
||||
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
|
||||
got, err := store.Get("up-1")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("queued"))
|
||||
})
|
||||
|
||||
It("idempotently updates an existing row on a repeated key", func() {
|
||||
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-2", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||
// Second Upsert with the same primary key must update, not error on a
|
||||
// duplicate-key violation (this is the SyncedMap write-through contract).
|
||||
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-2", UserID: "u1", Status: "completed", Message: "done"})).To(Succeed())
|
||||
|
||||
got, err := store.Get("up-2")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("completed"))
|
||||
Expect(got.Message).To(Equal("done"))
|
||||
|
||||
all, err := store.ListAll()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(all).To(HaveLen(1), "upsert must not create a duplicate")
|
||||
})
|
||||
})
|
||||
})
|
||||
13
core/services/finetune/finetune_suite_test.go
Normal file
13
core/services/finetune/finetune_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package finetune
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestFinetune(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Finetune Suite")
|
||||
}
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -32,44 +33,63 @@ type FineTuneService struct {
|
||||
modelLoader *model.ModelLoader
|
||||
configLoader *config.ModelConfigLoader
|
||||
|
||||
mu sync.Mutex
|
||||
jobs map[string]*schema.FineTuneJob
|
||||
// mu serializes the read-modify-write of job values. The SyncedMap guards its
|
||||
// own map structure, but a job is a pointer mutated in place (e.g. the export
|
||||
// goroutine), so the service still needs a lock to keep those field updates
|
||||
// and the subsequent Set atomic with respect to readers.
|
||||
mu sync.Mutex
|
||||
|
||||
// Distributed mode (nil when not in distributed mode)
|
||||
natsClient messaging.Publisher
|
||||
fineTuneStore *distributed.FineTuneStore
|
||||
// jobs is the cross-replica job store: an in-memory map kept consistent across
|
||||
// replicas via NATS, optionally read-through to PostgreSQL in distributed mode.
|
||||
jobs *syncstate.SyncedMap[string, *schema.FineTuneJob]
|
||||
}
|
||||
|
||||
// SetNATSClient sets the NATS client for distributed progress publishing.
|
||||
func (s *FineTuneService) SetNATSClient(nc messaging.Publisher) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.natsClient = nc
|
||||
}
|
||||
|
||||
// SetFineTuneStore sets the PostgreSQL fine-tune store for distributed persistence.
|
||||
func (s *FineTuneService) SetFineTuneStore(store *distributed.FineTuneStore) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.fineTuneStore = store
|
||||
}
|
||||
|
||||
// NewFineTuneService creates a new FineTuneService.
|
||||
// NewFineTuneService creates a new FineTuneService. In distributed mode pass the
|
||||
// shared NATS client and PostgreSQL store so jobs stay consistent across
|
||||
// replicas; pass nil for both in standalone mode, where the disk Loader hydrates
|
||||
// the map and there is nothing to broadcast.
|
||||
func NewFineTuneService(
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelLoader *model.ModelLoader,
|
||||
configLoader *config.ModelConfigLoader,
|
||||
nats messaging.MessagingClient,
|
||||
store *distributed.FineTuneStore,
|
||||
) *FineTuneService {
|
||||
s := &FineTuneService{
|
||||
appConfig: appConfig,
|
||||
modelLoader: modelLoader,
|
||||
configLoader: configLoader,
|
||||
jobs: make(map[string]*schema.FineTuneJob),
|
||||
}
|
||||
s.loadAllJobs()
|
||||
|
||||
// Only attach a Store interface when a concrete store exists, otherwise the
|
||||
// SyncedMap would see a non-nil interface wrapping a nil pointer and try to
|
||||
// hydrate/write through a nil DB.
|
||||
var syncStore syncstate.Store[string, *schema.FineTuneJob]
|
||||
if store != nil {
|
||||
syncStore = &fineTuneStoreAdapter{store: store}
|
||||
}
|
||||
|
||||
s.jobs = syncstate.New(syncstate.Config[string, *schema.FineTuneJob]{
|
||||
Name: "finetune.jobs",
|
||||
Key: func(j *schema.FineTuneJob) string { return j.ID },
|
||||
Nats: nats,
|
||||
Store: syncStore,
|
||||
Loader: s.loadJobsFromDisk, // ignored when Store is set (distributed mode)
|
||||
})
|
||||
|
||||
// Hydrate + subscribe. A hydrate failure must not take the server down: log
|
||||
// and continue degraded (standalone), mirroring the OpCache wiring.
|
||||
if err := s.jobs.Start(appConfig.Context); err != nil {
|
||||
xlog.Warn("FineTune SyncedMap start failed; running degraded", "error", err)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Close releases the SyncedMap subscription and background workers.
|
||||
func (s *FineTuneService) Close() error {
|
||||
return s.jobs.Close()
|
||||
}
|
||||
|
||||
// fineTuneBaseDir returns the base directory for fine-tune job data.
|
||||
func (s *FineTuneService) fineTuneBaseDir() string {
|
||||
return filepath.Join(s.appConfig.DataPath, "fine-tune")
|
||||
@@ -100,15 +120,18 @@ func (s *FineTuneService) saveJobState(job *schema.FineTuneJob) {
|
||||
}
|
||||
}
|
||||
|
||||
// loadAllJobs scans the fine-tune directory for persisted jobs and loads them.
|
||||
func (s *FineTuneService) loadAllJobs() {
|
||||
// loadJobsFromDisk scans the fine-tune directory for persisted jobs and returns
|
||||
// them. It is the SyncedMap Loader used in standalone mode (no DB); the returned
|
||||
// slice hydrates the map on Start.
|
||||
func (s *FineTuneService) loadJobsFromDisk(_ context.Context) ([]*schema.FineTuneJob, error) {
|
||||
baseDir := s.fineTuneBaseDir()
|
||||
entries, err := os.ReadDir(baseDir)
|
||||
if err != nil {
|
||||
// Directory doesn't exist yet — that's fine
|
||||
return
|
||||
// Directory doesn't exist yet — that's fine, start empty.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var jobs []*schema.FineTuneJob
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
@@ -137,12 +160,13 @@ func (s *FineTuneService) loadAllJobs() {
|
||||
job.ExportMessage = "Server restarted while export was running"
|
||||
}
|
||||
|
||||
s.jobs[job.ID] = &job
|
||||
jobs = append(jobs, &job)
|
||||
}
|
||||
|
||||
if len(s.jobs) > 0 {
|
||||
xlog.Info("Loaded persisted fine-tune jobs", "count", len(s.jobs))
|
||||
if len(jobs) > 0 {
|
||||
xlog.Info("Loaded persisted fine-tune jobs", "count", len(jobs))
|
||||
}
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// StartJob starts a new fine-tuning job.
|
||||
@@ -236,27 +260,13 @@ func (s *FineTuneService) StartJob(ctx context.Context, userID string, req schem
|
||||
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||
Config: &req,
|
||||
}
|
||||
s.jobs[jobID] = job
|
||||
s.saveJobState(job)
|
||||
|
||||
// Persist to PostgreSQL in distributed mode
|
||||
if s.fineTuneStore != nil {
|
||||
configJSON, _ := json.Marshal(req)
|
||||
extraJSON, _ := json.Marshal(req.ExtraOptions)
|
||||
s.fineTuneStore.Create(&distributed.FineTuneJobRecord{
|
||||
ID: jobID,
|
||||
UserID: userID,
|
||||
Model: req.Model,
|
||||
Backend: backendName,
|
||||
ModelID: modelID,
|
||||
TrainingType: req.TrainingType,
|
||||
TrainingMethod: req.TrainingMethod,
|
||||
Status: "queued",
|
||||
OutputDir: outputDir,
|
||||
ConfigJSON: string(configJSON),
|
||||
ExtraOptsJSON: string(extraJSON),
|
||||
})
|
||||
// Set write-through persists to PostgreSQL (distributed) and broadcasts to
|
||||
// peer replicas; the disk state.json is written separately for restart
|
||||
// recovery / standalone hydrate.
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
return nil, fmt.Errorf("failed to persist job: %w", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
|
||||
return &schema.FineTuneJobResponse{
|
||||
ID: jobID,
|
||||
@@ -270,7 +280,7 @@ func (s *FineTuneService) GetJob(userID, jobID string) (*schema.FineTuneJob, err
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||
}
|
||||
@@ -286,7 +296,7 @@ func (s *FineTuneService) ListJobs(userID string) []*schema.FineTuneJob {
|
||||
defer s.mu.Unlock()
|
||||
|
||||
var result []*schema.FineTuneJob
|
||||
for _, job := range s.jobs {
|
||||
for _, job := range s.jobs.List() {
|
||||
if userID == "" || job.UserID == userID {
|
||||
result = append(result, job)
|
||||
}
|
||||
@@ -302,7 +312,7 @@ func (s *FineTuneService) ListJobs(userID string) []*schema.FineTuneJob {
|
||||
// StopJob stops a running fine-tuning job.
|
||||
func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, saveCheckpoint bool) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -323,10 +333,10 @@ func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, sav
|
||||
s.mu.Lock()
|
||||
job.Status = "stopped"
|
||||
job.Message = "Training stopped by user"
|
||||
s.saveJobState(job)
|
||||
if s.fineTuneStore != nil {
|
||||
s.fineTuneStore.UpdateStatus(jobID, "stopped", "Training stopped by user")
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
xlog.Warn("Failed to persist stopped job", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
|
||||
return nil
|
||||
@@ -335,7 +345,7 @@ func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, sav
|
||||
// DeleteJob removes a fine-tuning job and its associated data from disk.
|
||||
func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -360,9 +370,10 @@ func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
||||
}
|
||||
|
||||
exportModelName := job.ExportModelName
|
||||
delete(s.jobs, jobID)
|
||||
if s.fineTuneStore != nil {
|
||||
s.fineTuneStore.Delete(jobID)
|
||||
// Delete write-through removes the DB row (distributed) and broadcasts the
|
||||
// removal to peer replicas. DeleteJob has no ctx, so use Background.
|
||||
if err := s.jobs.Delete(context.Background(), jobID); err != nil {
|
||||
xlog.Warn("Failed to delete job from store", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
@@ -398,7 +409,7 @@ func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
||||
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
||||
func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.FineTuneProgressEvent)) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -427,7 +438,7 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
||||
}, func(update *pb.FineTuneProgressUpdate) {
|
||||
// Update job status and persist
|
||||
s.mu.Lock()
|
||||
if j, ok := s.jobs[jobID]; ok {
|
||||
if j, ok := s.jobs.Get(jobID); ok {
|
||||
// Don't let progress updates overwrite terminal states
|
||||
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
||||
if !isTerminal {
|
||||
@@ -436,10 +447,10 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
||||
if update.Message != "" {
|
||||
j.Message = update.Message
|
||||
}
|
||||
s.saveJobState(j)
|
||||
if s.fineTuneStore != nil {
|
||||
s.fineTuneStore.UpdateStatus(jobID, j.Status, j.Message)
|
||||
if err := s.jobs.Set(ctx, j); err != nil {
|
||||
xlog.Warn("Failed to persist progress update", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(j)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
@@ -474,7 +485,7 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
||||
// ListCheckpoints lists checkpoints for a job.
|
||||
func (s *FineTuneService) ListCheckpoints(ctx context.Context, userID, jobID string) ([]*pb.CheckpointInfo, error) {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -520,7 +531,7 @@ func sanitizeModelName(s string) string {
|
||||
// ExportModel starts an async model export from a checkpoint and returns the intended model name immediately.
|
||||
func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string, req schema.ExportRequest) (string, error) {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return "", fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -572,6 +583,9 @@ func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string,
|
||||
job.ExportStatus = "exporting"
|
||||
job.ExportMessage = ""
|
||||
job.ExportModelName = ""
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
xlog.Warn("Failed to persist export start", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
|
||||
@@ -662,24 +676,30 @@ func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string,
|
||||
|
||||
xlog.Info("Model exported and registered", "job_id", jobID, "model_name", modelName, "format", req.ExportFormat)
|
||||
|
||||
// Runs after the HTTP request returns, so use Background rather than the
|
||||
// (now likely cancelled) request ctx for the write-through.
|
||||
s.mu.Lock()
|
||||
job.ExportStatus = "completed"
|
||||
job.ExportModelName = modelName
|
||||
job.ExportMessage = ""
|
||||
s.saveJobState(job)
|
||||
if s.fineTuneStore != nil {
|
||||
s.fineTuneStore.UpdateExportStatus(jobID, "completed", "", modelName)
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist export completion", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
// setExportMessage updates the export message and persists the job state.
|
||||
// setExportMessage updates the export message and persists the job state. Called
|
||||
// from the background export goroutine, so it uses Background for write-through.
|
||||
func (s *FineTuneService) setExportMessage(job *schema.FineTuneJob, msg string) {
|
||||
s.mu.Lock()
|
||||
job.ExportMessage = msg
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist export message", "job_id", job.ID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}
|
||||
@@ -687,7 +707,7 @@ func (s *FineTuneService) setExportMessage(job *schema.FineTuneJob, msg string)
|
||||
// GetExportedModelPath returns the path to the exported model directory and its name.
|
||||
func (s *FineTuneService) GetExportedModelPath(userID, jobID string) (string, string, error) {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return "", "", fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -723,10 +743,10 @@ func (s *FineTuneService) setExportFailed(job *schema.FineTuneJob, message strin
|
||||
s.mu.Lock()
|
||||
job.ExportStatus = "failed"
|
||||
job.ExportMessage = message
|
||||
s.saveJobState(job)
|
||||
if s.fineTuneStore != nil {
|
||||
s.fineTuneStore.UpdateExportStatus(job.ID, "failed", message, "")
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist export failure", "job_id", job.ID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
|
||||
185
core/services/finetune/service_test.go
Normal file
185
core/services/finetune/service_test.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package finetune
|
||||
|
||||
// White-box tests (package finetune) so a spec can drive the service's internal
|
||||
// SyncedMap the same way StartJob does (via jobs.Set) without standing up a
|
||||
// training backend, then assert the cross-replica reads (GetJob/ListJobs) and
|
||||
// the adapter conversions that keep REST responses byte-for-byte unchanged.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// newTestService builds a standalone FineTuneService wired to the given bus. The
|
||||
// model/config loaders are nil because the read/sync paths under test never touch
|
||||
// them; the data dir is a throwaway temp dir so the disk Loader finds nothing.
|
||||
func newTestService(bus *testutil.FakeBus) *FineTuneService {
|
||||
appConfig := &config.ApplicationConfig{
|
||||
Context: context.Background(),
|
||||
DataPath: GinkgoT().TempDir(),
|
||||
}
|
||||
return NewFineTuneService(appConfig, nil, nil, bus, nil)
|
||||
}
|
||||
|
||||
var _ = Describe("FineTuneService", func() {
|
||||
ctx := context.Background()
|
||||
|
||||
Describe("cross-replica job visibility", func() {
|
||||
var (
|
||||
bus *testutil.FakeBus
|
||||
a, b *FineTuneService
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
// One shared bus, two replicas: exactly the distributed topology where
|
||||
// a round-robin request may land on a replica that did not originate
|
||||
// the change.
|
||||
bus = testutil.NewFakeBus()
|
||||
a = newTestService(bus)
|
||||
b = newTestService(bus)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
})
|
||||
|
||||
It("makes a job created on A visible via B's GetJob and ListJobs", func() {
|
||||
job := &schema.FineTuneJob{ID: "job-1", UserID: "user-1", Status: "queued", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
// StartJob persists via jobs.Set; drive that directly to avoid a backend.
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
|
||||
got, err := b.GetJob("user-1", "job-1")
|
||||
Expect(err).ToNot(HaveOccurred(), "B must see a job A just created")
|
||||
Expect(got.Status).To(Equal("queued"))
|
||||
|
||||
listed := b.ListJobs("user-1")
|
||||
Expect(listed).To(HaveLen(1))
|
||||
Expect(listed[0].ID).To(Equal("job-1"))
|
||||
})
|
||||
|
||||
It("removes a job from B when it is deleted on A", func() {
|
||||
job := &schema.FineTuneJob{ID: "job-2", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
_, err := b.GetJob("user-1", "job-2")
|
||||
Expect(err).ToNot(HaveOccurred(), "precondition: B must have the job before the delete")
|
||||
|
||||
Expect(a.jobs.Delete(ctx, "job-2")).To(Succeed())
|
||||
|
||||
_, err = b.GetJob("user-1", "job-2")
|
||||
Expect(err).To(HaveOccurred(), "a delete on A must remove the job from B")
|
||||
})
|
||||
|
||||
It("propagates a status update from A to B", func() {
|
||||
job := &schema.FineTuneJob{ID: "job-3", UserID: "user-1", Status: "training", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
|
||||
updated := &schema.FineTuneJob{ID: "job-3", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, updated)).To(Succeed())
|
||||
|
||||
got, err := b.GetJob("user-1", "job-3")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("completed"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ListJobs", func() {
|
||||
var svc *FineTuneService
|
||||
|
||||
BeforeEach(func() {
|
||||
svc = newTestService(testutil.NewFakeBus())
|
||||
})
|
||||
AfterEach(func() { Expect(svc.Close()).To(Succeed()) })
|
||||
|
||||
It("filters by user and sorts newest-first", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "old", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "new", UserID: "u1", CreatedAt: "2026-06-27T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "other", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||
|
||||
jobs := svc.ListJobs("u1")
|
||||
Expect(jobs).To(HaveLen(2), "only u1's jobs")
|
||||
Expect(jobs[0].ID).To(Equal("new"), "newest first")
|
||||
Expect(jobs[1].ID).To(Equal("old"))
|
||||
})
|
||||
|
||||
It("returns every user's jobs when the userID filter is empty", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "a", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "b", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||
|
||||
Expect(svc.ListJobs("")).To(HaveLen(2))
|
||||
})
|
||||
|
||||
It("rejects GetJob for a job owned by another user", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "x", UserID: "owner", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
|
||||
_, err := svc.GetJob("intruder", "x")
|
||||
Expect(err).To(HaveOccurred(), "a different user must not read someone else's job")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("store adapter conversion", func() {
|
||||
// The SyncedMap value type is *schema.FineTuneJob (the exact REST shape).
|
||||
// These specs prove the DB adapter round-trips it losslessly, so hydrate
|
||||
// and write-through in distributed mode keep responses unchanged.
|
||||
It("round-trips a job through jobToRecord/recordToJob preserving the API shape", func() {
|
||||
original := &schema.FineTuneJob{
|
||||
ID: "rt-1",
|
||||
UserID: "user-1",
|
||||
Model: "base-model",
|
||||
Backend: "trl",
|
||||
ModelID: "trl-finetune-rt-1",
|
||||
TrainingType: "lora",
|
||||
TrainingMethod: "sft",
|
||||
Status: "completed",
|
||||
Message: "done",
|
||||
OutputDir: "/data/fine-tune/rt-1",
|
||||
ExtraOptions: map[string]string{"hf_token": "secret"},
|
||||
CreatedAt: "2026-06-27T10:00:00Z",
|
||||
ExportStatus: "completed",
|
||||
ExportMessage: "",
|
||||
ExportModelName: "base-model-ft-rt-1",
|
||||
Config: &schema.FineTuneJobRequest{Model: "base-model", Backend: "trl", DatasetSource: "data.jsonl"},
|
||||
}
|
||||
|
||||
rec := jobToRecord(original)
|
||||
Expect(rec.ID).To(Equal("rt-1"))
|
||||
Expect(rec.ConfigJSON).ToNot(BeEmpty(), "structured config must serialize into the JSON column")
|
||||
Expect(rec.ExtraOptsJSON).ToNot(BeEmpty())
|
||||
|
||||
back := recordToJob(rec)
|
||||
Expect(back.ID).To(Equal(original.ID))
|
||||
Expect(back.UserID).To(Equal(original.UserID))
|
||||
Expect(back.Model).To(Equal(original.Model))
|
||||
Expect(back.Backend).To(Equal(original.Backend))
|
||||
Expect(back.ModelID).To(Equal(original.ModelID))
|
||||
Expect(back.TrainingType).To(Equal(original.TrainingType))
|
||||
Expect(back.TrainingMethod).To(Equal(original.TrainingMethod))
|
||||
Expect(back.Status).To(Equal(original.Status))
|
||||
Expect(back.Message).To(Equal(original.Message))
|
||||
Expect(back.OutputDir).To(Equal(original.OutputDir))
|
||||
Expect(back.ExportStatus).To(Equal(original.ExportStatus))
|
||||
Expect(back.ExportModelName).To(Equal(original.ExportModelName))
|
||||
Expect(back.CreatedAt).To(Equal(original.CreatedAt))
|
||||
Expect(back.ExtraOptions).To(Equal(original.ExtraOptions))
|
||||
Expect(back.Config).ToNot(BeNil())
|
||||
Expect(back.Config.DatasetSource).To(Equal("data.jsonl"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("compile-time adapter contract", func() {
|
||||
It("satisfies syncstate.Store for *distributed.FineTuneStore", func() {
|
||||
// Guards against drift between the adapter and the component interface;
|
||||
// the var assertion in syncstore.go covers it at build time, this keeps
|
||||
// the type referenced from a spec too.
|
||||
var _ *distributed.FineTuneStore
|
||||
Expect(&fineTuneStoreAdapter{}).ToNot(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
114
core/services/finetune/syncstore.go
Normal file
114
core/services/finetune/syncstore.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package finetune
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
)
|
||||
|
||||
// fineTuneStoreAdapter bridges the distributed PostgreSQL FineTuneStore to the
|
||||
// generic syncstate.Store the SyncedMap consumes. It is only wired in distributed
|
||||
// mode; standalone leaves Store nil and hydrates from disk via a Loader instead.
|
||||
//
|
||||
// The SyncedMap value type is *schema.FineTuneJob (the exact shape the REST API
|
||||
// returns) so reads need no conversion and the response JSON is provably
|
||||
// unchanged. The adapter is the single place that translates between that API
|
||||
// shape and the DB FineTuneJobRecord.
|
||||
type fineTuneStoreAdapter struct {
|
||||
store *distributed.FineTuneStore
|
||||
}
|
||||
|
||||
// compile-time assertion that the adapter satisfies the component's Store.
|
||||
var _ syncstate.Store[string, *schema.FineTuneJob] = (*fineTuneStoreAdapter)(nil)
|
||||
|
||||
func (a *fineTuneStoreAdapter) List(_ context.Context) ([]*schema.FineTuneJob, error) {
|
||||
records, err := a.store.ListAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jobs := make([]*schema.FineTuneJob, 0, len(records))
|
||||
for i := range records {
|
||||
jobs = append(jobs, recordToJob(&records[i]))
|
||||
}
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (a *fineTuneStoreAdapter) Upsert(_ context.Context, job *schema.FineTuneJob) error {
|
||||
return a.store.Upsert(jobToRecord(job))
|
||||
}
|
||||
|
||||
func (a *fineTuneStoreAdapter) Delete(_ context.Context, id string) error {
|
||||
return a.store.Delete(id)
|
||||
}
|
||||
|
||||
// recordToJob maps a persisted DB record back to the API shape, reconstructing
|
||||
// the structured Config / ExtraOptions from their JSON columns.
|
||||
func recordToJob(r *distributed.FineTuneJobRecord) *schema.FineTuneJob {
|
||||
job := &schema.FineTuneJob{
|
||||
ID: r.ID,
|
||||
UserID: r.UserID,
|
||||
Model: r.Model,
|
||||
Backend: r.Backend,
|
||||
ModelID: r.ModelID,
|
||||
TrainingType: r.TrainingType,
|
||||
TrainingMethod: r.TrainingMethod,
|
||||
Status: r.Status,
|
||||
Message: r.Message,
|
||||
OutputDir: r.OutputDir,
|
||||
ExportStatus: r.ExportStatus,
|
||||
ExportMessage: r.ExportMessage,
|
||||
ExportModelName: r.ExportModelName,
|
||||
CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339),
|
||||
}
|
||||
if r.ExtraOptsJSON != "" {
|
||||
// Best-effort: a malformed column must not drop the whole job from the API.
|
||||
_ = json.Unmarshal([]byte(r.ExtraOptsJSON), &job.ExtraOptions)
|
||||
}
|
||||
if r.ConfigJSON != "" {
|
||||
var cfg schema.FineTuneJobRequest
|
||||
if err := json.Unmarshal([]byte(r.ConfigJSON), &cfg); err == nil {
|
||||
job.Config = &cfg
|
||||
}
|
||||
}
|
||||
return job
|
||||
}
|
||||
|
||||
// jobToRecord maps the API shape to a DB record for write-through, serializing
|
||||
// the structured Config / ExtraOptions into their JSON columns. CreatedAt is
|
||||
// parsed back from the RFC3339 string the service stamps; an unparseable value
|
||||
// is left zero so FineTuneStore.Upsert stamps "now".
|
||||
func jobToRecord(job *schema.FineTuneJob) *distributed.FineTuneJobRecord {
|
||||
rec := &distributed.FineTuneJobRecord{
|
||||
ID: job.ID,
|
||||
UserID: job.UserID,
|
||||
Model: job.Model,
|
||||
Backend: job.Backend,
|
||||
ModelID: job.ModelID,
|
||||
TrainingType: job.TrainingType,
|
||||
TrainingMethod: job.TrainingMethod,
|
||||
Status: job.Status,
|
||||
Message: job.Message,
|
||||
OutputDir: job.OutputDir,
|
||||
ExportStatus: job.ExportStatus,
|
||||
ExportMessage: job.ExportMessage,
|
||||
ExportModelName: job.ExportModelName,
|
||||
}
|
||||
if job.Config != nil {
|
||||
if data, err := json.Marshal(job.Config); err == nil {
|
||||
rec.ConfigJSON = string(data)
|
||||
}
|
||||
}
|
||||
if job.ExtraOptions != nil {
|
||||
if data, err := json.Marshal(job.ExtraOptions); err == nil {
|
||||
rec.ExtraOptsJSON = string(data)
|
||||
}
|
||||
}
|
||||
if t, err := time.Parse(time.RFC3339, job.CreatedAt); err == nil {
|
||||
rec.CreatedAt = t
|
||||
}
|
||||
return rec
|
||||
}
|
||||
@@ -404,6 +404,36 @@ var _ = Describe("GalleryService cache invalidation broadcasts", func() {
|
||||
Element: "x", Op: "install",
|
||||
})).To(Succeed())
|
||||
})
|
||||
|
||||
It("BroadcastModelsChanged delivers the element and op to a peer's OnModelsChanged", func() {
|
||||
var (
|
||||
mu sync.Mutex
|
||||
seen []messaging.CacheInvalidateEvent
|
||||
)
|
||||
svcB.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||
mu.Lock()
|
||||
seen = append(seen, evt)
|
||||
mu.Unlock()
|
||||
}
|
||||
Expect(svcA.SubscribeBroadcasts()).To(Succeed())
|
||||
Expect(svcB.SubscribeBroadcasts()).To(Succeed())
|
||||
|
||||
// An admin edit on replica A must reach replica B over the same subject
|
||||
// the gallery path uses, so B refreshes its in-memory config loader.
|
||||
svcA.BroadcastModelsChanged("my-alias", "install")
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
Expect(seen).To(ContainElement(messaging.CacheInvalidateEvent{
|
||||
Element: "my-alias", Op: "install",
|
||||
}))
|
||||
})
|
||||
|
||||
It("BroadcastModelsChanged is a no-op when NATS is not wired (standalone)", func() {
|
||||
standalone := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
// No SetNATSClient: must not panic and must simply do nothing.
|
||||
Expect(func() { standalone.BroadcastModelsChanged("x", "delete") }).ToNot(Panic())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("GalleryService PostgreSQL hydration", func() {
|
||||
|
||||
@@ -201,6 +201,24 @@ func (g *GalleryService) publishCacheInvalidate(subject string, evt messaging.Ca
|
||||
}
|
||||
}
|
||||
|
||||
// BroadcastModelsChanged notifies peer replicas that a model config was
|
||||
// created, edited, or removed out-of-band of the gallery install/delete
|
||||
// channel (e.g. the admin /models/edit, /models/import and
|
||||
// /models/toggle-state endpoints, which write the YAML and reload only the
|
||||
// local in-memory loader). Peers receive it via OnModelsChanged and refresh
|
||||
// their own ModelConfigLoader so a request load-balanced to any replica sees
|
||||
// the same config. No-op in standalone mode (no NATS client).
|
||||
//
|
||||
// op is "install" for a create/edit (the element must be (re)loaded from
|
||||
// disk) or "delete" for a removal (the element must be pruned from memory,
|
||||
// which a reload-from-path cannot do because the loader is additive).
|
||||
func (g *GalleryService) BroadcastModelsChanged(element, op string) {
|
||||
g.publishCacheInvalidate(messaging.SubjectCacheInvalidateModels, messaging.CacheInvalidateEvent{
|
||||
Element: element,
|
||||
Op: op,
|
||||
})
|
||||
}
|
||||
|
||||
// mergeStatus is the broadcast-side merge: it updates the in-memory map from
|
||||
// a peer's GalleryProgressEvent without re-publishing to NATS or re-writing
|
||||
// to PostgreSQL. UpdateStatus is the local-write entry point and does both;
|
||||
|
||||
@@ -22,6 +22,14 @@ const subscribeConfirmTimeout = 5 * time.Second
|
||||
type Client struct {
|
||||
conn *nats.Conn
|
||||
mu sync.RWMutex
|
||||
|
||||
// reconnectCbs are invoked after the underlying connection is
|
||||
// re-established. nats.go transparently resubscribes existing
|
||||
// subscriptions on reconnect, but it cannot know that a consumer kept
|
||||
// derived in-memory state (e.g. syncstate.SyncedMap) that may have drifted
|
||||
// while the link was down — these callbacks let such consumers re-hydrate.
|
||||
cbMu sync.Mutex
|
||||
reconnectCbs []func()
|
||||
}
|
||||
|
||||
// New creates a new NATS client with auto-reconnect.
|
||||
@@ -31,6 +39,10 @@ func New(url string, opts ...Option) (*Client, error) {
|
||||
o(&cfg)
|
||||
}
|
||||
|
||||
// Allocate the client up front so the reconnect handler closure can reach
|
||||
// it; conn is populated after nats.Connect succeeds below.
|
||||
c := &Client{}
|
||||
|
||||
natsOpts := []nats.Option{
|
||||
nats.RetryOnFailedConnect(true),
|
||||
nats.MaxReconnects(-1),
|
||||
@@ -41,6 +53,7 @@ func New(url string, opts ...Option) (*Client, error) {
|
||||
}),
|
||||
nats.ReconnectHandler(func(_ *nats.Conn) {
|
||||
xlog.Info("NATS reconnected")
|
||||
c.runReconnectCallbacks()
|
||||
}),
|
||||
nats.ClosedHandler(func(_ *nats.Conn) {
|
||||
xlog.Info("NATS connection closed")
|
||||
@@ -103,7 +116,33 @@ func New(url string, opts ...Option) (*Client, error) {
|
||||
return nil, fmt.Errorf("connecting to NATS at %s: %w", sanitize.URL(url), err)
|
||||
}
|
||||
|
||||
return &Client{conn: nc}, nil
|
||||
c.conn = nc
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// OnReconnect registers a callback invoked after the NATS connection is
|
||||
// re-established. It is consumed via an optional interface type-assertion
|
||||
// (interface{ OnReconnect(func()) }) rather than being added to MessagingClient,
|
||||
// so the messaging abstraction stays minimal and standalone/test clients are not
|
||||
// forced to implement reconnect semantics. A nil callback is ignored.
|
||||
func (c *Client) OnReconnect(cb func()) {
|
||||
if cb == nil {
|
||||
return
|
||||
}
|
||||
c.cbMu.Lock()
|
||||
c.reconnectCbs = append(c.reconnectCbs, cb)
|
||||
c.cbMu.Unlock()
|
||||
}
|
||||
|
||||
// runReconnectCallbacks invokes registered reconnect callbacks. It copies the
|
||||
// slice under the lock so a callback that (re)registers cannot deadlock.
|
||||
func (c *Client) runReconnectCallbacks() {
|
||||
c.cbMu.Lock()
|
||||
cbs := append([]func(){}, c.reconnectCbs...)
|
||||
c.cbMu.Unlock()
|
||||
for _, cb := range cbs {
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// Publish marshals data as JSON and publishes it to the given subject.
|
||||
|
||||
@@ -380,6 +380,20 @@ func SubjectCacheInvalidateCollection(name string) string {
|
||||
return "cache.invalidate.collections." + sanitizeSubjectToken(name)
|
||||
}
|
||||
|
||||
// SyncedMap State Sync (Pub/Sub — broadcast to all frontends)
|
||||
//
|
||||
// The reusable syncstate.SyncedMap component publishes a {op,key,value} delta on
|
||||
// this subject whenever a replica mutates a piece of cross-replica in-memory
|
||||
// state. Peers subscribe and apply the delta to their own map, so a round-robin
|
||||
// API request that lands on a replica which did not originate the change still
|
||||
// sees it. Convergence on (re)connect is done by re-hydrating from the durable
|
||||
// source, so no request/reply snapshot subject is needed here.
|
||||
func SubjectSyncStateDelta(name string) string {
|
||||
return subjectSyncStatePrefix + sanitizeSubjectToken(name) + ".delta"
|
||||
}
|
||||
|
||||
const subjectSyncStatePrefix = "state."
|
||||
|
||||
// Prefix-Cache Routing Sync (Pub/Sub - broadcast to all frontends)
|
||||
//
|
||||
// Frontends share prefix-cache observations so a request routed to any replica
|
||||
|
||||
53
core/services/modeladmin/remote_sync.go
Normal file
53
core/services/modeladmin/remote_sync.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package modeladmin
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// opDelete is the CacheInvalidateEvent.Op value the gallery delete path and the
|
||||
// admin delete endpoint use; a delete must prune (a reload-from-path cannot).
|
||||
const opDelete = "delete"
|
||||
|
||||
// ApplyRemoteChange refreshes this replica's in-memory model state from a peer
|
||||
// replica's model-config change broadcast (messaging.CacheInvalidateEvent on
|
||||
// SubjectCacheInvalidateModels). It is the subscriber-side counterpart to
|
||||
// GalleryService.BroadcastModelsChanged.
|
||||
//
|
||||
// The op matters because LoadModelConfigsFromPath is additive: it loads every
|
||||
// YAML on disk into the loader but never removes an entry whose file is gone.
|
||||
// So a delete cannot be propagated by a plain reload - the deleted element must
|
||||
// be explicitly pruned. Specifically:
|
||||
//
|
||||
// - op == "delete" with a named element: prune that element from the loader.
|
||||
// - otherwise: reload all configs from disk (picks up creates and edits).
|
||||
//
|
||||
// In both cases, when an element is named, any running instance on this replica
|
||||
// is shut down (best-effort) so the next request rebuilds it from the new
|
||||
// config instead of serving the stale one - mirroring what the originating
|
||||
// replica does on a local edit/delete.
|
||||
//
|
||||
// ml may be nil (no running instances to shut down). modelsPath and opts are
|
||||
// forwarded to LoadModelConfigsFromPath.
|
||||
func ApplyRemoteChange(cl *config.ModelConfigLoader, ml *model.ModelLoader, modelsPath string, evt messaging.CacheInvalidateEvent, opts ...config.ConfigLoaderOption) error {
|
||||
if evt.Op == opDelete && evt.Element != "" {
|
||||
cl.RemoveModelConfig(evt.Element)
|
||||
} else if err := cl.LoadModelConfigsFromPath(modelsPath, opts...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Drop any running instance of the affected model so the next request
|
||||
// rebuilds it from the refreshed config instead of serving the stale one.
|
||||
// Best-effort: the model may not be loaded on this replica, which surfaces
|
||||
// as a benign error here.
|
||||
if ml != nil && evt.Element != "" {
|
||||
if err := ml.ShutdownModel(evt.Element); err != nil {
|
||||
xlog.Debug("ApplyRemoteChange: could not shut down model instance (likely not loaded)",
|
||||
"model", evt.Element, "error", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
80
core/services/modeladmin/remote_sync_test.go
Normal file
80
core/services/modeladmin/remote_sync_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package modeladmin
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
)
|
||||
|
||||
var _ = Describe("ApplyRemoteChange", func() {
|
||||
var (
|
||||
dir string
|
||||
loader *config.ModelConfigLoader
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
dir = GinkgoT().TempDir()
|
||||
loader = config.NewModelConfigLoader(dir)
|
||||
})
|
||||
|
||||
writeYAML := func(name string, body map[string]any) {
|
||||
body["name"] = name
|
||||
data, err := yaml.Marshal(body)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(os.WriteFile(filepath.Join(dir, name+".yaml"), data, 0644)).To(Succeed())
|
||||
}
|
||||
|
||||
It("loads a peer-created config from disk on an install event", func() {
|
||||
// Peer wrote the YAML to the shared models dir; this replica has not
|
||||
// loaded it yet (empty in-memory loader).
|
||||
writeYAML("peer-alias", map[string]any{"alias": "qwen"})
|
||||
_, ok := loader.GetModelConfig("peer-alias")
|
||||
Expect(ok).To(BeFalse(), "precondition: not yet in memory")
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||
Element: "peer-alias", Op: "install",
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok = loader.GetModelConfig("peer-alias")
|
||||
Expect(ok).To(BeTrue(), "install event must reload the new config from disk")
|
||||
})
|
||||
|
||||
It("prunes a peer-deleted config that a reload-from-path cannot drop", func() {
|
||||
// Model is present in memory (loaded earlier) but its file is now gone
|
||||
// from the shared dir. LoadModelConfigsFromPath is additive, so only an
|
||||
// explicit prune can remove it - this is the cross-replica delete bug.
|
||||
writeYAML("doomed", map[string]any{"alias": "qwen"})
|
||||
Expect(loader.LoadModelConfigsFromPath(dir)).To(Succeed())
|
||||
_, ok := loader.GetModelConfig("doomed")
|
||||
Expect(ok).To(BeTrue(), "precondition: in memory")
|
||||
Expect(os.Remove(filepath.Join(dir, "doomed.yaml"))).To(Succeed())
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||
Element: "doomed", Op: "delete",
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok = loader.GetModelConfig("doomed")
|
||||
Expect(ok).To(BeFalse(), "delete event must prune the element from memory")
|
||||
})
|
||||
|
||||
It("does a full reload when no element is named", func() {
|
||||
writeYAML("m1", map[string]any{"alias": "qwen"})
|
||||
writeYAML("m2", map[string]any{"alias": "qwen"})
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok1 := loader.GetModelConfig("m1")
|
||||
_, ok2 := loader.GetModelConfig("m2")
|
||||
Expect(ok1).To(BeTrue())
|
||||
Expect(ok2).To(BeTrue())
|
||||
})
|
||||
})
|
||||
@@ -673,6 +673,49 @@ func (r *NodeRegistry) Get(ctx context.Context, nodeID string) (*BackendNode, er
|
||||
return &node, nil
|
||||
}
|
||||
|
||||
// GetWithExtras returns a single node enriched with the same computed fields as
|
||||
// ListWithExtras (labels, loaded-model count, in-flight total). The plain Get
|
||||
// returns a bare BackendNode whose Labels live in a separate table, so the node
|
||||
// detail view needs this to show a node's existing labels and live counts.
|
||||
func (r *NodeRegistry) GetWithExtras(ctx context.Context, nodeID string) (*NodeWithExtras, error) {
|
||||
node, err := r.Get(ctx, nodeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
labels := make(map[string]string)
|
||||
nodeLabels, err := r.GetNodeLabels(ctx, nodeID)
|
||||
if err != nil {
|
||||
xlog.Warn("GetWithExtras: failed to get labels", "node", nodeID, "error", err)
|
||||
} else {
|
||||
for _, l := range nodeLabels {
|
||||
labels[l.Key] = l.Value
|
||||
}
|
||||
}
|
||||
|
||||
var modelCount int64
|
||||
if err := r.db.WithContext(ctx).Model(&NodeModel{}).
|
||||
Where("node_id = ? AND state = ?", nodeID, "loaded").
|
||||
Count(&modelCount).Error; err != nil {
|
||||
xlog.Warn("GetWithExtras: failed to get model count", "node", nodeID, "error", err)
|
||||
}
|
||||
|
||||
var inFlight struct{ Total int }
|
||||
if err := r.db.WithContext(ctx).Model(&NodeModel{}).
|
||||
Select("COALESCE(SUM(in_flight), 0) as total").
|
||||
Where("node_id = ? AND state IN ?", nodeID, []string{"loaded", "unloading"}).
|
||||
Scan(&inFlight).Error; err != nil {
|
||||
xlog.Warn("GetWithExtras: failed to get in-flight count", "node", nodeID, "error", err)
|
||||
}
|
||||
|
||||
return &NodeWithExtras{
|
||||
BackendNode: *node,
|
||||
ModelCount: int(modelCount),
|
||||
InFlightCount: inFlight.Total,
|
||||
Labels: labels,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetByName returns a single node by name.
|
||||
func (r *NodeRegistry) GetByName(ctx context.Context, name string) (*BackendNode, error) {
|
||||
var node BackendNode
|
||||
|
||||
@@ -646,6 +646,38 @@ var _ = Describe("NodeRegistry", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Describe("GetWithExtras", func() {
|
||||
It("returns the node enriched with its labels map", func() {
|
||||
node := makeNode("extras-node", "10.0.0.80:50051", 8_000_000_000)
|
||||
Expect(registry.Register(context.Background(), node, true)).To(Succeed())
|
||||
Expect(registry.SetNodeLabel(context.Background(), node.ID, "env", "prod")).To(Succeed())
|
||||
Expect(registry.SetNodeLabel(context.Background(), node.ID, "region", "us-east")).To(Succeed())
|
||||
|
||||
got, err := registry.GetWithExtras(context.Background(), node.ID)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got).ToNot(BeNil())
|
||||
Expect(got.ID).To(Equal(node.ID))
|
||||
Expect(got.Name).To(Equal("extras-node"))
|
||||
Expect(got.Labels).To(Equal(map[string]string{"env": "prod", "region": "us-east"}))
|
||||
})
|
||||
|
||||
It("returns an empty (non-nil) labels map when the node has none", func() {
|
||||
node := makeNode("extras-no-labels", "10.0.0.81:50051", 8_000_000_000)
|
||||
Expect(registry.Register(context.Background(), node, true)).To(Succeed())
|
||||
|
||||
got, err := registry.GetWithExtras(context.Background(), node.ID)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got).ToNot(BeNil())
|
||||
Expect(got.Labels).ToNot(BeNil())
|
||||
Expect(got.Labels).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns an error for an unknown node", func() {
|
||||
_, err := registry.GetWithExtras(context.Background(), "does-not-exist")
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("FindNodesBySelector", func() {
|
||||
It("returns nodes matching all labels in selector", func() {
|
||||
n1 := makeNode("sel-match", "10.0.0.80:50051", 8_000_000_000)
|
||||
|
||||
13
core/services/quantization/quantization_suite_test.go
Normal file
13
core/services/quantization/quantization_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package quantization
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestQuantization(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Quantization Suite")
|
||||
}
|
||||
@@ -17,6 +17,9 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery/importers"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -30,26 +33,63 @@ type QuantizationService struct {
|
||||
modelLoader *model.ModelLoader
|
||||
configLoader *config.ModelConfigLoader
|
||||
|
||||
mu sync.Mutex
|
||||
jobs map[string]*schema.QuantizationJob
|
||||
// mu serializes the read-modify-write of job values. The SyncedMap guards its
|
||||
// own map structure, but a job is a pointer mutated in place (e.g. the import
|
||||
// goroutine), so the service still needs a lock to keep those field updates and
|
||||
// the subsequent Set atomic with respect to readers.
|
||||
mu sync.Mutex
|
||||
|
||||
// jobs is the cross-replica job store: an in-memory map kept consistent across
|
||||
// replicas via NATS, optionally read-through to PostgreSQL in distributed mode.
|
||||
jobs *syncstate.SyncedMap[string, *schema.QuantizationJob]
|
||||
}
|
||||
|
||||
// NewQuantizationService creates a new QuantizationService.
|
||||
// NewQuantizationService creates a new QuantizationService. In distributed mode
|
||||
// pass the shared NATS client and PostgreSQL store so jobs stay consistent across
|
||||
// replicas; pass nil for both in standalone mode, where the disk Loader hydrates
|
||||
// the map and there is nothing to broadcast.
|
||||
func NewQuantizationService(
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelLoader *model.ModelLoader,
|
||||
configLoader *config.ModelConfigLoader,
|
||||
nats messaging.MessagingClient,
|
||||
store *distributed.QuantStore,
|
||||
) *QuantizationService {
|
||||
s := &QuantizationService{
|
||||
appConfig: appConfig,
|
||||
modelLoader: modelLoader,
|
||||
configLoader: configLoader,
|
||||
jobs: make(map[string]*schema.QuantizationJob),
|
||||
}
|
||||
s.loadAllJobs()
|
||||
|
||||
// Only attach a Store interface when a concrete store exists, otherwise the
|
||||
// SyncedMap would see a non-nil interface wrapping a nil pointer and try to
|
||||
// hydrate/write through a nil DB.
|
||||
var syncStore syncstate.Store[string, *schema.QuantizationJob]
|
||||
if store != nil {
|
||||
syncStore = &quantStoreAdapter{store: store}
|
||||
}
|
||||
|
||||
s.jobs = syncstate.New(syncstate.Config[string, *schema.QuantizationJob]{
|
||||
Name: "quant.jobs",
|
||||
Key: func(j *schema.QuantizationJob) string { return j.ID },
|
||||
Nats: nats,
|
||||
Store: syncStore,
|
||||
Loader: s.loadJobsFromDisk, // ignored when Store is set (distributed mode)
|
||||
})
|
||||
|
||||
// Hydrate + subscribe. A hydrate failure must not take the server down: log and
|
||||
// continue degraded (standalone), mirroring the FineTune/OpCache wiring.
|
||||
if err := s.jobs.Start(appConfig.Context); err != nil {
|
||||
xlog.Warn("Quantization SyncedMap start failed; running degraded", "error", err)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Close releases the SyncedMap subscription and background workers.
|
||||
func (s *QuantizationService) Close() error {
|
||||
return s.jobs.Close()
|
||||
}
|
||||
|
||||
// quantizationBaseDir returns the base directory for quantization job data.
|
||||
func (s *QuantizationService) quantizationBaseDir() string {
|
||||
return filepath.Join(s.appConfig.DataPath, "quantization")
|
||||
@@ -80,15 +120,18 @@ func (s *QuantizationService) saveJobState(job *schema.QuantizationJob) {
|
||||
}
|
||||
}
|
||||
|
||||
// loadAllJobs scans the quantization directory for persisted jobs and loads them.
|
||||
func (s *QuantizationService) loadAllJobs() {
|
||||
// loadJobsFromDisk scans the quantization directory for persisted jobs and
|
||||
// returns them. It is the SyncedMap Loader used in standalone mode (no DB); the
|
||||
// returned slice hydrates the map on Start.
|
||||
func (s *QuantizationService) loadJobsFromDisk(_ context.Context) ([]*schema.QuantizationJob, error) {
|
||||
baseDir := s.quantizationBaseDir()
|
||||
entries, err := os.ReadDir(baseDir)
|
||||
if err != nil {
|
||||
// Directory doesn't exist yet — that's fine
|
||||
return
|
||||
// Directory doesn't exist yet — that's fine, start empty.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var jobs []*schema.QuantizationJob
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
@@ -117,12 +160,13 @@ func (s *QuantizationService) loadAllJobs() {
|
||||
job.ImportMessage = "Server restarted while import was running"
|
||||
}
|
||||
|
||||
s.jobs[job.ID] = &job
|
||||
jobs = append(jobs, &job)
|
||||
}
|
||||
|
||||
if len(s.jobs) > 0 {
|
||||
xlog.Info("Loaded persisted quantization jobs", "count", len(s.jobs))
|
||||
if len(jobs) > 0 {
|
||||
xlog.Info("Loaded persisted quantization jobs", "count", len(jobs))
|
||||
}
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// StartJob starts a new quantization job.
|
||||
@@ -188,7 +232,12 @@ func (s *QuantizationService) StartJob(ctx context.Context, userID string, req s
|
||||
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||
Config: &req,
|
||||
}
|
||||
s.jobs[jobID] = job
|
||||
// Set write-through persists to PostgreSQL (distributed) and broadcasts to
|
||||
// peer replicas; the disk state.json is written separately for restart
|
||||
// recovery / standalone hydrate.
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
return nil, fmt.Errorf("failed to persist job: %w", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
|
||||
return &schema.QuantizationJobResponse{
|
||||
@@ -203,7 +252,7 @@ func (s *QuantizationService) GetJob(userID, jobID string) (*schema.Quantization
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||
}
|
||||
@@ -219,7 +268,7 @@ func (s *QuantizationService) ListJobs(userID string) []*schema.QuantizationJob
|
||||
defer s.mu.Unlock()
|
||||
|
||||
var result []*schema.QuantizationJob
|
||||
for _, job := range s.jobs {
|
||||
for _, job := range s.jobs.List() {
|
||||
if userID == "" || job.UserID == userID {
|
||||
result = append(result, job)
|
||||
}
|
||||
@@ -235,7 +284,7 @@ func (s *QuantizationService) ListJobs(userID string) []*schema.QuantizationJob
|
||||
// StopJob stops a running quantization job.
|
||||
func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -256,6 +305,9 @@ func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string)
|
||||
s.mu.Lock()
|
||||
job.Status = "stopped"
|
||||
job.Message = "Quantization stopped by user"
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
xlog.Warn("Failed to persist stopped job", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
|
||||
@@ -265,7 +317,7 @@ func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string)
|
||||
// DeleteJob removes a quantization job and its associated data from disk.
|
||||
func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -289,7 +341,11 @@ func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
||||
}
|
||||
|
||||
importModelName := job.ImportModelName
|
||||
delete(s.jobs, jobID)
|
||||
// Delete write-through removes the DB row (distributed) and broadcasts the
|
||||
// removal to peer replicas. DeleteJob has no ctx, so use Background.
|
||||
if err := s.jobs.Delete(context.Background(), jobID); err != nil {
|
||||
xlog.Warn("Failed to delete job from store", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
// Remove job directory (state.json, output files)
|
||||
@@ -324,7 +380,7 @@ func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
||||
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
||||
func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.QuantizationProgressEvent)) error {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -353,7 +409,7 @@ func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID
|
||||
}, func(update *pb.QuantizationProgressUpdate) {
|
||||
// Update job status and persist
|
||||
s.mu.Lock()
|
||||
if j, ok := s.jobs[jobID]; ok {
|
||||
if j, ok := s.jobs.Get(jobID); ok {
|
||||
// Don't let progress updates overwrite terminal states
|
||||
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
||||
if !isTerminal {
|
||||
@@ -365,6 +421,9 @@ func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID
|
||||
if update.OutputFile != "" {
|
||||
j.OutputFile = update.OutputFile
|
||||
}
|
||||
if err := s.jobs.Set(ctx, j); err != nil {
|
||||
xlog.Warn("Failed to persist progress update", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(j)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
@@ -399,7 +458,7 @@ func sanitizeQuantModelName(s string) string {
|
||||
// ImportModel imports a quantized model into LocalAI asynchronously.
|
||||
func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID string, req schema.QuantizationImportRequest) (string, error) {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return "", fmt.Errorf("job not found: %s", jobID)
|
||||
@@ -459,6 +518,9 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
||||
job.ImportStatus = "importing"
|
||||
job.ImportMessage = ""
|
||||
job.ImportModelName = ""
|
||||
if err := s.jobs.Set(ctx, job); err != nil {
|
||||
xlog.Warn("Failed to persist import start", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
|
||||
@@ -514,10 +576,15 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
||||
|
||||
xlog.Info("Quantized model imported and registered", "job_id", jobID, "model_name", modelName)
|
||||
|
||||
// Runs after the HTTP request returns, so use Background rather than the
|
||||
// (now likely cancelled) request ctx for the write-through.
|
||||
s.mu.Lock()
|
||||
job.ImportStatus = "completed"
|
||||
job.ImportModelName = modelName
|
||||
job.ImportMessage = ""
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist import completion", "job_id", jobID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
@@ -525,10 +592,14 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
// setImportMessage updates the import message and persists the job state.
|
||||
// setImportMessage updates the import message and persists the job state. Called
|
||||
// from the background import goroutine, so it uses Background for write-through.
|
||||
func (s *QuantizationService) setImportMessage(job *schema.QuantizationJob, msg string) {
|
||||
s.mu.Lock()
|
||||
job.ImportMessage = msg
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist import message", "job_id", job.ID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}
|
||||
@@ -539,6 +610,9 @@ func (s *QuantizationService) setImportFailed(job *schema.QuantizationJob, messa
|
||||
s.mu.Lock()
|
||||
job.ImportStatus = "failed"
|
||||
job.ImportMessage = message
|
||||
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||
xlog.Warn("Failed to persist import failure", "job_id", job.ID, "error", err)
|
||||
}
|
||||
s.saveJobState(job)
|
||||
s.mu.Unlock()
|
||||
}
|
||||
@@ -546,7 +620,7 @@ func (s *QuantizationService) setImportFailed(job *schema.QuantizationJob, messa
|
||||
// GetOutputPath returns the path to the quantized model file and a download name.
|
||||
func (s *QuantizationService) GetOutputPath(userID, jobID string) (string, string, error) {
|
||||
s.mu.Lock()
|
||||
job, ok := s.jobs[jobID]
|
||||
job, ok := s.jobs.Get(jobID)
|
||||
if !ok {
|
||||
s.mu.Unlock()
|
||||
return "", "", fmt.Errorf("job not found: %s", jobID)
|
||||
|
||||
187
core/services/quantization/service_test.go
Normal file
187
core/services/quantization/service_test.go
Normal file
@@ -0,0 +1,187 @@
|
||||
package quantization
|
||||
|
||||
// White-box tests (package quantization) so a spec can drive the service's
|
||||
// internal SyncedMap the same way StartJob does (via jobs.Set) without standing
|
||||
// up a quantization backend, then assert the cross-replica reads
|
||||
// (GetJob/ListJobs) and the adapter conversions that keep REST responses
|
||||
// byte-for-byte unchanged.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// newTestService builds a standalone QuantizationService wired to the given bus.
|
||||
// The model/config loaders are nil because the read/sync paths under test never
|
||||
// touch them; the data dir is a throwaway temp dir so the disk Loader finds
|
||||
// nothing.
|
||||
func newTestService(bus *testutil.FakeBus) *QuantizationService {
|
||||
appConfig := &config.ApplicationConfig{
|
||||
Context: context.Background(),
|
||||
DataPath: GinkgoT().TempDir(),
|
||||
}
|
||||
return NewQuantizationService(appConfig, nil, nil, bus, nil)
|
||||
}
|
||||
|
||||
var _ = Describe("QuantizationService", func() {
|
||||
ctx := context.Background()
|
||||
|
||||
Describe("cross-replica job visibility", func() {
|
||||
var (
|
||||
bus *testutil.FakeBus
|
||||
a, b *QuantizationService
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
// One shared bus, two replicas: exactly the distributed topology where a
|
||||
// round-robin request may land on a replica that did not originate the
|
||||
// change.
|
||||
bus = testutil.NewFakeBus()
|
||||
a = newTestService(bus)
|
||||
b = newTestService(bus)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
})
|
||||
|
||||
It("makes a job created on A visible via B's GetJob and ListJobs", func() {
|
||||
job := &schema.QuantizationJob{ID: "job-1", UserID: "user-1", Status: "queued", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
// StartJob persists via jobs.Set; drive that directly to avoid a backend.
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
|
||||
got, err := b.GetJob("user-1", "job-1")
|
||||
Expect(err).ToNot(HaveOccurred(), "B must see a job A just created")
|
||||
Expect(got.Status).To(Equal("queued"))
|
||||
|
||||
listed := b.ListJobs("user-1")
|
||||
Expect(listed).To(HaveLen(1))
|
||||
Expect(listed[0].ID).To(Equal("job-1"))
|
||||
})
|
||||
|
||||
It("removes a job from B when it is deleted on A", func() {
|
||||
job := &schema.QuantizationJob{ID: "job-2", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
_, err := b.GetJob("user-1", "job-2")
|
||||
Expect(err).ToNot(HaveOccurred(), "precondition: B must have the job before the delete")
|
||||
|
||||
Expect(a.jobs.Delete(ctx, "job-2")).To(Succeed())
|
||||
|
||||
_, err = b.GetJob("user-1", "job-2")
|
||||
Expect(err).To(HaveOccurred(), "a delete on A must remove the job from B")
|
||||
})
|
||||
|
||||
It("propagates a status update from A to B", func() {
|
||||
job := &schema.QuantizationJob{ID: "job-3", UserID: "user-1", Status: "quantizing", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||
|
||||
updated := &schema.QuantizationJob{ID: "job-3", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||
Expect(a.jobs.Set(ctx, updated)).To(Succeed())
|
||||
|
||||
got, err := b.GetJob("user-1", "job-3")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(got.Status).To(Equal("completed"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ListJobs", func() {
|
||||
var svc *QuantizationService
|
||||
|
||||
BeforeEach(func() {
|
||||
svc = newTestService(testutil.NewFakeBus())
|
||||
})
|
||||
AfterEach(func() { Expect(svc.Close()).To(Succeed()) })
|
||||
|
||||
It("filters by user and sorts newest-first", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "old", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "new", UserID: "u1", CreatedAt: "2026-06-27T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "other", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||
|
||||
jobs := svc.ListJobs("u1")
|
||||
Expect(jobs).To(HaveLen(2), "only u1's jobs")
|
||||
Expect(jobs[0].ID).To(Equal("new"), "newest first")
|
||||
Expect(jobs[1].ID).To(Equal("old"))
|
||||
})
|
||||
|
||||
It("returns every user's jobs when the userID filter is empty", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "a", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "b", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||
|
||||
Expect(svc.ListJobs("")).To(HaveLen(2))
|
||||
})
|
||||
|
||||
It("rejects GetJob for a job owned by another user", func() {
|
||||
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "x", UserID: "owner", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||
|
||||
_, err := svc.GetJob("intruder", "x")
|
||||
Expect(err).To(HaveOccurred(), "a different user must not read someone else's job")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("store adapter conversion", func() {
|
||||
// The SyncedMap value type is *schema.QuantizationJob (the exact REST shape).
|
||||
// These specs prove the DB adapter round-trips it losslessly, so hydrate and
|
||||
// write-through in distributed mode keep responses unchanged.
|
||||
It("round-trips a job through jobToRecord/recordToJob preserving the API shape", func() {
|
||||
original := &schema.QuantizationJob{
|
||||
ID: "rt-1",
|
||||
UserID: "user-1",
|
||||
Model: "base-model",
|
||||
Backend: "llama-cpp-quantization",
|
||||
ModelID: "llama-cpp-quantization-quantize-rt-1",
|
||||
QuantizationType: "q4_k_m",
|
||||
Status: "completed",
|
||||
Message: "done",
|
||||
OutputDir: "/data/quantization/rt-1",
|
||||
OutputFile: "/data/quantization/rt-1/model.gguf",
|
||||
ExtraOptions: map[string]string{"hf_token": "secret"},
|
||||
CreatedAt: "2026-06-27T10:00:00Z",
|
||||
ImportStatus: "completed",
|
||||
ImportMessage: "",
|
||||
ImportModelName: "base-model-q4_k_m-rt-1",
|
||||
Config: &schema.QuantizationJobRequest{Model: "base-model", Backend: "llama-cpp-quantization", QuantizationType: "q4_k_m"},
|
||||
}
|
||||
|
||||
rec := jobToRecord(original)
|
||||
Expect(rec.ID).To(Equal("rt-1"))
|
||||
Expect(rec.ConfigJSON).ToNot(BeEmpty(), "structured config must serialize into the JSON column")
|
||||
Expect(rec.ExtraOptsJSON).ToNot(BeEmpty())
|
||||
|
||||
back := recordToJob(rec)
|
||||
Expect(back.ID).To(Equal(original.ID))
|
||||
Expect(back.UserID).To(Equal(original.UserID))
|
||||
Expect(back.Model).To(Equal(original.Model))
|
||||
Expect(back.Backend).To(Equal(original.Backend))
|
||||
Expect(back.ModelID).To(Equal(original.ModelID))
|
||||
Expect(back.QuantizationType).To(Equal(original.QuantizationType))
|
||||
Expect(back.Status).To(Equal(original.Status))
|
||||
Expect(back.Message).To(Equal(original.Message))
|
||||
Expect(back.OutputDir).To(Equal(original.OutputDir))
|
||||
Expect(back.OutputFile).To(Equal(original.OutputFile))
|
||||
Expect(back.ImportStatus).To(Equal(original.ImportStatus))
|
||||
Expect(back.ImportModelName).To(Equal(original.ImportModelName))
|
||||
Expect(back.CreatedAt).To(Equal(original.CreatedAt))
|
||||
Expect(back.ExtraOptions).To(Equal(original.ExtraOptions))
|
||||
Expect(back.Config).ToNot(BeNil())
|
||||
Expect(back.Config.QuantizationType).To(Equal("q4_k_m"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("compile-time adapter contract", func() {
|
||||
It("satisfies syncstate.Store for *distributed.QuantStore", func() {
|
||||
// Guards against drift between the adapter and the component interface;
|
||||
// the var assertion in syncstore.go covers it at build time, this keeps
|
||||
// the type referenced from a spec too.
|
||||
var _ *distributed.QuantStore
|
||||
Expect(&quantStoreAdapter{}).ToNot(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
114
core/services/quantization/syncstore.go
Normal file
114
core/services/quantization/syncstore.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package quantization
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
)
|
||||
|
||||
// quantStoreAdapter bridges the distributed PostgreSQL QuantStore to the generic
|
||||
// syncstate.Store the SyncedMap consumes. It is only wired in distributed mode;
|
||||
// standalone leaves Store nil and hydrates from disk via a Loader instead.
|
||||
//
|
||||
// The SyncedMap value type is *schema.QuantizationJob (the exact shape the REST
|
||||
// API returns) so reads need no conversion and the response JSON is provably
|
||||
// unchanged. The adapter is the single place that translates between that API
|
||||
// shape and the DB QuantJobRecord.
|
||||
type quantStoreAdapter struct {
|
||||
store *distributed.QuantStore
|
||||
}
|
||||
|
||||
// compile-time assertion that the adapter satisfies the component's Store.
|
||||
var _ syncstate.Store[string, *schema.QuantizationJob] = (*quantStoreAdapter)(nil)
|
||||
|
||||
func (a *quantStoreAdapter) List(_ context.Context) ([]*schema.QuantizationJob, error) {
|
||||
records, err := a.store.ListAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jobs := make([]*schema.QuantizationJob, 0, len(records))
|
||||
for i := range records {
|
||||
jobs = append(jobs, recordToJob(&records[i]))
|
||||
}
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (a *quantStoreAdapter) Upsert(_ context.Context, job *schema.QuantizationJob) error {
|
||||
return a.store.Upsert(jobToRecord(job))
|
||||
}
|
||||
|
||||
func (a *quantStoreAdapter) Delete(_ context.Context, id string) error {
|
||||
return a.store.Delete(id)
|
||||
}
|
||||
|
||||
// recordToJob maps a persisted DB record back to the API shape, reconstructing
|
||||
// the structured Config / ExtraOptions from their JSON columns.
|
||||
func recordToJob(r *distributed.QuantJobRecord) *schema.QuantizationJob {
|
||||
job := &schema.QuantizationJob{
|
||||
ID: r.ID,
|
||||
UserID: r.UserID,
|
||||
Model: r.Model,
|
||||
Backend: r.Backend,
|
||||
ModelID: r.ModelID,
|
||||
QuantizationType: r.QuantizationType,
|
||||
Status: r.Status,
|
||||
Message: r.Message,
|
||||
OutputDir: r.OutputDir,
|
||||
OutputFile: r.OutputFile,
|
||||
ImportStatus: r.ImportStatus,
|
||||
ImportMessage: r.ImportMessage,
|
||||
ImportModelName: r.ImportModelName,
|
||||
CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339),
|
||||
}
|
||||
if r.ExtraOptsJSON != "" {
|
||||
// Best-effort: a malformed column must not drop the whole job from the API.
|
||||
_ = json.Unmarshal([]byte(r.ExtraOptsJSON), &job.ExtraOptions)
|
||||
}
|
||||
if r.ConfigJSON != "" {
|
||||
var cfg schema.QuantizationJobRequest
|
||||
if err := json.Unmarshal([]byte(r.ConfigJSON), &cfg); err == nil {
|
||||
job.Config = &cfg
|
||||
}
|
||||
}
|
||||
return job
|
||||
}
|
||||
|
||||
// jobToRecord maps the API shape to a DB record for write-through, serializing
|
||||
// the structured Config / ExtraOptions into their JSON columns. CreatedAt is
|
||||
// parsed back from the RFC3339 string the service stamps; an unparseable value is
|
||||
// left zero so QuantStore.Upsert stamps "now".
|
||||
func jobToRecord(job *schema.QuantizationJob) *distributed.QuantJobRecord {
|
||||
rec := &distributed.QuantJobRecord{
|
||||
ID: job.ID,
|
||||
UserID: job.UserID,
|
||||
Model: job.Model,
|
||||
Backend: job.Backend,
|
||||
ModelID: job.ModelID,
|
||||
QuantizationType: job.QuantizationType,
|
||||
Status: job.Status,
|
||||
Message: job.Message,
|
||||
OutputDir: job.OutputDir,
|
||||
OutputFile: job.OutputFile,
|
||||
ImportStatus: job.ImportStatus,
|
||||
ImportMessage: job.ImportMessage,
|
||||
ImportModelName: job.ImportModelName,
|
||||
}
|
||||
if job.Config != nil {
|
||||
if data, err := json.Marshal(job.Config); err == nil {
|
||||
rec.ConfigJSON = string(data)
|
||||
}
|
||||
}
|
||||
if job.ExtraOptions != nil {
|
||||
if data, err := json.Marshal(job.ExtraOptions); err == nil {
|
||||
rec.ExtraOptsJSON = string(data)
|
||||
}
|
||||
}
|
||||
if t, err := time.Parse(time.RFC3339, job.CreatedAt); err == nil {
|
||||
rec.CreatedAt = t
|
||||
}
|
||||
return rec
|
||||
}
|
||||
286
core/services/syncstate/syncstate.go
Normal file
286
core/services/syncstate/syncstate.go
Normal file
@@ -0,0 +1,286 @@
|
||||
// Package syncstate provides SyncedMap, a reusable cross-replica in-memory map.
|
||||
//
|
||||
// LocalAI in distributed mode runs multiple frontend replicas behind a
|
||||
// round-robin load balancer. Several features keep process-local in-memory state
|
||||
// that is surfaced to the HTTP/UI API; without cross-replica sync a poll that
|
||||
// lands on a replica which did not originate a change sees stale or missing data.
|
||||
// SyncedMap collapses the three legs each feature otherwise hand-wires - an
|
||||
// in-memory map, a NATS broadcast/apply path, and optional durable read-through -
|
||||
// into one well-tested component so cross-replica consistency is a configuration
|
||||
// choice rather than a bespoke re-implementation.
|
||||
package syncstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// Op values carried on the wire and passed to OnApply.
|
||||
const (
|
||||
opSet = "set"
|
||||
opDelete = "delete"
|
||||
)
|
||||
|
||||
// Store is optional durable backing for a SyncedMap. In distributed mode it is a
|
||||
// single shared DB, so the apply path (a delta received from a peer) updates
|
||||
// memory only and never re-writes the Store.
|
||||
type Store[K comparable, V any] interface {
|
||||
List(ctx context.Context) ([]V, error)
|
||||
Upsert(ctx context.Context, v V) error
|
||||
Delete(ctx context.Context, k K) error
|
||||
}
|
||||
|
||||
// Config configures a SyncedMap.
|
||||
type Config[K comparable, V any] struct {
|
||||
Name string // subject namespace, e.g. "finetune.jobs"
|
||||
Key func(V) K // extract the key from a value
|
||||
Nats messaging.MessagingClient // nil => standalone: in-memory only, no broadcast/subscribe
|
||||
Store Store[K, V] // optional read-through persistence
|
||||
Loader func(ctx context.Context) ([]V, error) // source when there is no Store (e.g. disk reload)
|
||||
OnApply func(op string, k K, v V) // optional hook after an applied change (e.g. ShutdownModel)
|
||||
Reconcile time.Duration // optional periodic re-hydrate; 0 = off
|
||||
}
|
||||
|
||||
// delta is the JSON wire envelope broadcast on every local mutation. Value is
|
||||
// omitempty so a delete carries only op+key.
|
||||
type delta[K comparable, V any] struct {
|
||||
Op string `json:"op"`
|
||||
Key K `json:"key"`
|
||||
Value V `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
// SyncedMap is a cross-replica in-memory map. A local write (Set/Delete) updates
|
||||
// memory, the optional durable Store, then broadcasts a delta to peers. A peer's
|
||||
// delta updates memory only and fires OnApply - it never re-broadcasts and never
|
||||
// writes the Store. That structural split is the echo-loop guard (same pattern as
|
||||
// galleryop.mergeStatus / OpCache.applyStart): receiving your own broadcast just
|
||||
// re-applies an idempotent value to memory, so there is no storm and no
|
||||
// double-write.
|
||||
type SyncedMap[K comparable, V any] struct {
|
||||
cfg Config[K, V]
|
||||
|
||||
mu sync.RWMutex
|
||||
data map[K]V
|
||||
|
||||
sub Subscription
|
||||
|
||||
// lifeCtx outlives Start's argument: a reconnect callback or reconcile tick
|
||||
// can fire long after Start returns, so they must not be tied to a ctx the
|
||||
// caller may cancel. Close cancels it.
|
||||
lifeCtx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
// Subscription is the subset of messaging.Subscription the component holds onto.
|
||||
type Subscription = messaging.Subscription
|
||||
|
||||
// New constructs a SyncedMap. Call Start to hydrate and begin syncing.
|
||||
func New[K comparable, V any](cfg Config[K, V]) *SyncedMap[K, V] {
|
||||
return &SyncedMap[K, V]{cfg: cfg, data: make(map[K]V)}
|
||||
}
|
||||
|
||||
func (m *SyncedMap[K, V]) subject() string {
|
||||
return messaging.SubjectSyncStateDelta(m.cfg.Name)
|
||||
}
|
||||
|
||||
// Start hydrates from the source, subscribes for peer deltas, registers a
|
||||
// reconnect re-hydrate (when the client supports it), and starts the optional
|
||||
// reconcile ticker.
|
||||
func (m *SyncedMap[K, V]) Start(ctx context.Context) error {
|
||||
if err := m.hydrate(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.lifeCtx, m.cancel = context.WithCancel(context.Background())
|
||||
|
||||
if m.cfg.Nats != nil {
|
||||
sub, err := messaging.SubscribeJSON(m.cfg.Nats, m.subject(), m.apply)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.sub = sub
|
||||
|
||||
// nats.go transparently resubscribes on reconnect, but it cannot know we
|
||||
// kept derived in-memory state that may have drifted while the link was
|
||||
// down, so re-hydrate from the durable source. Detected via an optional
|
||||
// interface so MessagingClient itself stays minimal; standalone/test
|
||||
// clients without the method simply fall back to the reconcile ticker.
|
||||
if r, ok := m.cfg.Nats.(interface{ OnReconnect(func()) }); ok {
|
||||
r.OnReconnect(func() {
|
||||
if err := m.hydrate(m.lifeCtx); err != nil {
|
||||
xlog.Warn("syncstate: reconnect re-hydrate failed", "name", m.cfg.Name, "error", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if m.cfg.Reconcile > 0 {
|
||||
m.wg.Add(1)
|
||||
go m.reconcileLoop()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close unsubscribes and stops the reconcile ticker.
|
||||
func (m *SyncedMap[K, V]) Close() error {
|
||||
if m.cancel != nil {
|
||||
m.cancel()
|
||||
}
|
||||
m.wg.Wait()
|
||||
if m.sub != nil {
|
||||
return m.sub.Unsubscribe()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set updates the value locally, writes through the Store, then broadcasts.
|
||||
// Per the data-flow contract the Store write happens under the lock so memory and
|
||||
// durable state move together; the broadcast is best-effort after unlocking.
|
||||
func (m *SyncedMap[K, V]) Set(ctx context.Context, v V) error {
|
||||
k := m.cfg.Key(v)
|
||||
m.mu.Lock()
|
||||
m.data[k] = v
|
||||
if m.cfg.Store != nil {
|
||||
if err := m.cfg.Store.Upsert(ctx, v); err != nil {
|
||||
m.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.mu.Unlock()
|
||||
m.publish(opSet, k, v)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes the key locally, deletes it from the Store, then broadcasts.
|
||||
func (m *SyncedMap[K, V]) Delete(ctx context.Context, k K) error {
|
||||
m.mu.Lock()
|
||||
delete(m.data, k)
|
||||
if m.cfg.Store != nil {
|
||||
if err := m.cfg.Store.Delete(ctx, k); err != nil {
|
||||
m.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.mu.Unlock()
|
||||
var zero V
|
||||
m.publish(opDelete, k, zero)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns the value for k and whether it was present.
|
||||
func (m *SyncedMap[K, V]) Get(k K) (V, bool) {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
v, ok := m.data[k]
|
||||
return v, ok
|
||||
}
|
||||
|
||||
// List returns a snapshot slice of all values.
|
||||
func (m *SyncedMap[K, V]) List() []V {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
out := make([]V, 0, len(m.data))
|
||||
for _, v := range m.data {
|
||||
out = append(out, v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Snapshot returns a copy of the underlying map.
|
||||
func (m *SyncedMap[K, V]) Snapshot() map[K]V {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
out := make(map[K]V, len(m.data))
|
||||
for k, v := range m.data {
|
||||
out[k] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// publish broadcasts a delta. Standalone (nil Nats) is a strict no-op.
|
||||
func (m *SyncedMap[K, V]) publish(op string, k K, v V) {
|
||||
if m.cfg.Nats == nil {
|
||||
return
|
||||
}
|
||||
if err := m.cfg.Nats.Publish(m.subject(), delta[K, V]{Op: op, Key: k, Value: v}); err != nil {
|
||||
xlog.Warn("syncstate: failed to broadcast delta", "name", m.cfg.Name, "op", op, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// apply handles a peer's delta: memory-only update plus OnApply. It deliberately
|
||||
// never writes the Store nor re-publishes - that is the echo-loop guard.
|
||||
func (m *SyncedMap[K, V]) apply(d delta[K, V]) {
|
||||
switch d.Op {
|
||||
case opSet:
|
||||
m.mu.Lock()
|
||||
m.data[d.Key] = d.Value
|
||||
m.mu.Unlock()
|
||||
case opDelete:
|
||||
m.mu.Lock()
|
||||
delete(m.data, d.Key)
|
||||
m.mu.Unlock()
|
||||
default:
|
||||
xlog.Warn("syncstate: ignoring delta with unknown op", "name", m.cfg.Name, "op", d.Op)
|
||||
return
|
||||
}
|
||||
if m.cfg.OnApply != nil {
|
||||
m.cfg.OnApply(d.Op, d.Key, d.Value)
|
||||
}
|
||||
}
|
||||
|
||||
// hydrate replaces the whole map from the durable source: Store if present, else
|
||||
// Loader. With neither, a late joiner starts empty and catches up via deltas
|
||||
// (acceptable only for ephemeral state).
|
||||
func (m *SyncedMap[K, V]) hydrate(ctx context.Context) error {
|
||||
var (
|
||||
vals []V
|
||||
err error
|
||||
)
|
||||
switch {
|
||||
case m.cfg.Store != nil:
|
||||
vals, err = m.cfg.Store.List(ctx)
|
||||
case m.cfg.Loader != nil:
|
||||
vals, err = m.cfg.Loader(ctx)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.replaceAll(vals)
|
||||
return nil
|
||||
}
|
||||
|
||||
// replaceAll atomically swaps the map contents for the given values, keyed via
|
||||
// cfg.Key.
|
||||
func (m *SyncedMap[K, V]) replaceAll(vals []V) {
|
||||
next := make(map[K]V, len(vals))
|
||||
for _, v := range vals {
|
||||
next[m.cfg.Key(v)] = v
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.data = next
|
||||
m.mu.Unlock()
|
||||
}
|
||||
|
||||
// reconcileLoop periodically re-hydrates to repair silent drift (missed deltas).
|
||||
func (m *SyncedMap[K, V]) reconcileLoop() {
|
||||
defer m.wg.Done()
|
||||
t := time.NewTicker(m.cfg.Reconcile)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-m.lifeCtx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
if err := m.hydrate(m.lifeCtx); err != nil {
|
||||
xlog.Warn("syncstate: reconcile re-hydrate failed", "name", m.cfg.Name, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
13
core/services/syncstate/syncstate_suite_test.go
Normal file
13
core/services/syncstate/syncstate_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package syncstate_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestSyncstate(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Syncstate Suite")
|
||||
}
|
||||
291
core/services/syncstate/syncstate_test.go
Normal file
291
core/services/syncstate/syncstate_test.go
Normal file
@@ -0,0 +1,291 @@
|
||||
package syncstate_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// job is a minimal JSON-serializable value stand-in for the real cross-replica
|
||||
// records (finetune/quant/agent jobs) the component is built for.
|
||||
type job struct {
|
||||
ID string `json:"id"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
func jobKey(j *job) string { return j.ID }
|
||||
|
||||
const stateName = "test.jobs"
|
||||
|
||||
func deltaSubject() string { return messaging.SubjectSyncStateDelta(stateName) }
|
||||
|
||||
// fakeStore is an in-memory Store that records call counts so specs can assert
|
||||
// the write-through-vs-apply split (local writes hit the Store; applied deltas
|
||||
// must not).
|
||||
type fakeStore struct {
|
||||
mu sync.Mutex
|
||||
data map[string]*job
|
||||
upsertCalls int
|
||||
deleteCalls int
|
||||
listCalls int
|
||||
}
|
||||
|
||||
func newFakeStore(seed ...*job) *fakeStore {
|
||||
s := &fakeStore{data: map[string]*job{}}
|
||||
for _, j := range seed {
|
||||
s.data[j.ID] = j
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *fakeStore) List(_ context.Context) ([]*job, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.listCalls++
|
||||
out := make([]*job, 0, len(s.data))
|
||||
for _, j := range s.data {
|
||||
out = append(out, j)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *fakeStore) Upsert(_ context.Context, j *job) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.upsertCalls++
|
||||
s.data[j.ID] = j
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeStore) Delete(_ context.Context, k string) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.deleteCalls++
|
||||
delete(s.data, k)
|
||||
return nil
|
||||
}
|
||||
|
||||
// add simulates a peer replica writing to the shared DB out-of-band (e.g. while
|
||||
// this replica was partitioned), so a re-hydrate can be observed to pick it up.
|
||||
func (s *fakeStore) add(j *job) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.data[j.ID] = j
|
||||
}
|
||||
|
||||
func (s *fakeStore) counts() (upsert, del, list int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.upsertCalls, s.deleteCalls, s.listCalls
|
||||
}
|
||||
|
||||
var _ = Describe("SyncedMap", func() {
|
||||
ctx := context.Background()
|
||||
|
||||
Describe("cross-replica delta propagation", func() {
|
||||
var (
|
||||
bus *testutil.FakeBus
|
||||
a, b *syncstate.SyncedMap[string, *job]
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
bus = testutil.NewFakeBus()
|
||||
a = syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||
b = syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||
Expect(a.Start(ctx)).To(Succeed())
|
||||
Expect(b.Start(ctx)).To(Succeed())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
})
|
||||
|
||||
It("propagates a Set on A to B", func() {
|
||||
Expect(a.Set(ctx, &job{ID: "1", Status: "running"})).To(Succeed())
|
||||
|
||||
got, ok := b.Get("1")
|
||||
Expect(ok).To(BeTrue(), "replica B should see the value A just set")
|
||||
Expect(got.Status).To(Equal("running"))
|
||||
})
|
||||
|
||||
It("prunes a Delete on A from B", func() {
|
||||
Expect(a.Set(ctx, &job{ID: "1", Status: "running"})).To(Succeed())
|
||||
_, present := b.Get("1")
|
||||
Expect(present).To(BeTrue(), "precondition: B must have the value before the delete")
|
||||
|
||||
Expect(a.Delete(ctx, "1")).To(Succeed())
|
||||
|
||||
_, ok := b.Get("1")
|
||||
Expect(ok).To(BeFalse(), "a delete on A must remove the key from B")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("hydration", func() {
|
||||
It("hydrates on Start from a preloaded Store", func() {
|
||||
store := newFakeStore(&job{ID: "x", Status: "done"})
|
||||
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Store: store})
|
||||
Expect(m.Start(ctx)).To(Succeed())
|
||||
|
||||
got, ok := m.Get("x")
|
||||
Expect(ok).To(BeTrue(), "Start must populate the map from the Store")
|
||||
Expect(got.Status).To(Equal("done"))
|
||||
})
|
||||
|
||||
It("uses the Loader when Store is nil", func() {
|
||||
m := syncstate.New(syncstate.Config[string, *job]{
|
||||
Name: stateName,
|
||||
Key: jobKey,
|
||||
Loader: func(_ context.Context) ([]*job, error) {
|
||||
return []*job{{ID: "l", Status: "loaded"}}, nil
|
||||
},
|
||||
})
|
||||
Expect(m.Start(ctx)).To(Succeed())
|
||||
|
||||
got, ok := m.Get("l")
|
||||
Expect(ok).To(BeTrue(), "Loader output must hydrate the map when there is no Store")
|
||||
Expect(got.Status).To(Equal("loaded"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("echo-loop guard", func() {
|
||||
It("applies its own broadcast once and does not re-publish", func() {
|
||||
bus := testutil.NewFakeBus()
|
||||
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||
b := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||
Expect(a.Start(ctx)).To(Succeed())
|
||||
Expect(b.Start(ctx)).To(Succeed())
|
||||
defer func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
}()
|
||||
|
||||
Expect(a.Set(ctx, &job{ID: "e", Status: "running"})).To(Succeed())
|
||||
|
||||
// One local write must produce exactly one broadcast: A and B both
|
||||
// receive it and apply to memory, but the apply path never re-publishes.
|
||||
Expect(bus.PublishCount(deltaSubject())).To(Equal(1),
|
||||
"the apply path must not re-broadcast, otherwise replicas storm")
|
||||
Expect(a.List()).To(HaveLen(1), "A must not double-store its own echo")
|
||||
_, ok := b.Get("e")
|
||||
Expect(ok).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Store write-through vs apply", func() {
|
||||
It("writes the Store on local Set/Delete but not on an applied delta", func() {
|
||||
bus := testutil.NewFakeBus()
|
||||
storeA := newFakeStore()
|
||||
storeB := newFakeStore()
|
||||
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: storeA})
|
||||
b := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: storeB})
|
||||
Expect(a.Start(ctx)).To(Succeed())
|
||||
Expect(b.Start(ctx)).To(Succeed())
|
||||
defer func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
}()
|
||||
|
||||
Expect(a.Set(ctx, &job{ID: "w", Status: "running"})).To(Succeed())
|
||||
|
||||
upA, _, _ := storeA.counts()
|
||||
upB, _, _ := storeB.counts()
|
||||
Expect(upA).To(Equal(1), "local Set must write through to its own Store")
|
||||
Expect(upB).To(Equal(0), "the apply path must never write the peer's Store")
|
||||
|
||||
Expect(a.Delete(ctx, "w")).To(Succeed())
|
||||
_, delA, _ := storeA.counts()
|
||||
_, delB, _ := storeB.counts()
|
||||
Expect(delA).To(Equal(1), "local Delete must delete from its own Store")
|
||||
Expect(delB).To(Equal(0), "the apply path must never delete from the peer's Store")
|
||||
})
|
||||
})
|
||||
|
||||
Describe("OnApply hook", func() {
|
||||
It("fires with the correct op and key on an applied delta", func() {
|
||||
bus := testutil.NewFakeBus()
|
||||
var (
|
||||
mu sync.Mutex
|
||||
ops []string
|
||||
keys []string
|
||||
)
|
||||
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||
b := syncstate.New(syncstate.Config[string, *job]{
|
||||
Name: stateName, Key: jobKey, Nats: bus,
|
||||
OnApply: func(op string, k string, _ *job) {
|
||||
mu.Lock()
|
||||
ops = append(ops, op)
|
||||
keys = append(keys, k)
|
||||
mu.Unlock()
|
||||
},
|
||||
})
|
||||
Expect(a.Start(ctx)).To(Succeed())
|
||||
Expect(b.Start(ctx)).To(Succeed())
|
||||
defer func() {
|
||||
Expect(a.Close()).To(Succeed())
|
||||
Expect(b.Close()).To(Succeed())
|
||||
}()
|
||||
|
||||
Expect(a.Set(ctx, &job{ID: "o", Status: "running"})).To(Succeed())
|
||||
Expect(a.Delete(ctx, "o")).To(Succeed())
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
Expect(ops).To(Equal([]string{"set", "delete"}))
|
||||
Expect(keys).To(Equal([]string{"o", "o"}))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("standalone (nil Nats)", func() {
|
||||
It("works in-memory with no panic and nothing to broadcast", func() {
|
||||
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey})
|
||||
Expect(m.Start(ctx)).To(Succeed())
|
||||
defer func() { Expect(m.Close()).To(Succeed()) }()
|
||||
|
||||
Expect(func() {
|
||||
Expect(m.Set(ctx, &job{ID: "s", Status: "running"})).To(Succeed())
|
||||
}).ToNot(Panic())
|
||||
|
||||
got, ok := m.Get("s")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(got.Status).To(Equal("running"))
|
||||
Expect(m.List()).To(HaveLen(1))
|
||||
Expect(m.Snapshot()).To(HaveKey("s"))
|
||||
|
||||
Expect(m.Delete(ctx, "s")).To(Succeed())
|
||||
_, ok = m.Get("s")
|
||||
Expect(ok).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("reconnect re-hydrate", func() {
|
||||
It("re-reads the source when the messaging client reconnects", func() {
|
||||
bus := testutil.NewFakeBus()
|
||||
store := newFakeStore(&job{ID: "init", Status: "running"})
|
||||
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: store})
|
||||
Expect(m.Start(ctx)).To(Succeed())
|
||||
defer func() { Expect(m.Close()).To(Succeed()) }()
|
||||
|
||||
_, ok := m.Get("init")
|
||||
Expect(ok).To(BeTrue())
|
||||
|
||||
// A peer writes to the shared DB while we are unaware (no delta seen).
|
||||
store.add(&job{ID: "late", Status: "running"})
|
||||
_, ok = m.Get("late")
|
||||
Expect(ok).To(BeFalse(), "the new row should not appear before a re-hydrate")
|
||||
|
||||
bus.TriggerReconnect()
|
||||
|
||||
_, ok = m.Get("late")
|
||||
Expect(ok).To(BeTrue(), "reconnect must re-hydrate from the source and pick up drift")
|
||||
_, _, list := store.counts()
|
||||
Expect(list).To(Equal(2), "exactly one Start hydrate plus one reconnect re-hydrate")
|
||||
})
|
||||
})
|
||||
})
|
||||
160
core/services/testutil/fakebus.go
Normal file
160
core/services/testutil/fakebus.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package testutil
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
)
|
||||
|
||||
// FakeBus is an in-memory messaging.MessagingClient that delivers each published
|
||||
// message synchronously to every registered subscriber whose subject filter
|
||||
// matches, including NATS-style wildcard subjects (`*` matches exactly one
|
||||
// token).
|
||||
//
|
||||
// Synchronous delivery keeps specs deterministic: the moment Publish returns,
|
||||
// every matching subscriber's handler has already run, so the spec body can read
|
||||
// the resulting state without polling. It is the shared test double for every
|
||||
// cross-replica-sync adopter (gallery, syncstate, ...) so they exercise the same
|
||||
// delivery semantics. It deliberately depends only on the standard library and
|
||||
// the messaging package — no test framework — so it is importable anywhere.
|
||||
type FakeBus struct {
|
||||
mu sync.Mutex
|
||||
subs []fakeBusSub
|
||||
// publishCounts records how many messages were published per subject, so a
|
||||
// spec can assert the echo-loop guard (an applied delta must not re-publish).
|
||||
publishCounts map[string]int
|
||||
|
||||
// reconnectCbs back the optional OnReconnect/TriggerReconnect pair, letting a
|
||||
// spec exercise the component's reconnect re-hydrate path without a real
|
||||
// NATS server.
|
||||
reconnectCbs []func()
|
||||
}
|
||||
|
||||
type fakeBusSub struct {
|
||||
subject string
|
||||
handler func([]byte)
|
||||
}
|
||||
|
||||
// NewFakeBus returns a ready-to-use in-memory bus.
|
||||
func NewFakeBus() *FakeBus {
|
||||
return &FakeBus{publishCounts: map[string]int{}}
|
||||
}
|
||||
|
||||
// subjectMatches reports whether a subscription filter matches a concrete
|
||||
// subject, honoring the single-token `*` wildcard used by NATS.
|
||||
func subjectMatches(filter, subject string) bool {
|
||||
if filter == subject {
|
||||
return true
|
||||
}
|
||||
fp := strings.Split(filter, ".")
|
||||
sp := strings.Split(subject, ".")
|
||||
if len(fp) != len(sp) {
|
||||
return false
|
||||
}
|
||||
for i := range fp {
|
||||
if fp[i] == "*" {
|
||||
continue
|
||||
}
|
||||
if fp[i] != sp[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Publish marshals data as JSON and delivers it synchronously to every matching
|
||||
// subscriber.
|
||||
func (b *FakeBus) Publish(subject string, data any) error {
|
||||
payload, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.mu.Lock()
|
||||
b.publishCounts[subject]++
|
||||
subs := append([]fakeBusSub(nil), b.subs...)
|
||||
b.mu.Unlock()
|
||||
for _, s := range subs {
|
||||
if subjectMatches(s.subject, subject) {
|
||||
s.handler(payload)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PublishCount returns how many messages were published on the exact subject.
|
||||
func (b *FakeBus) PublishCount(subject string) int {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
return b.publishCounts[subject]
|
||||
}
|
||||
|
||||
type fakeBusSubscription struct {
|
||||
bus *FakeBus
|
||||
subRef fakeBusSub
|
||||
}
|
||||
|
||||
func (s *fakeBusSubscription) Unsubscribe() error {
|
||||
s.bus.mu.Lock()
|
||||
defer s.bus.mu.Unlock()
|
||||
for i, candidate := range s.bus.subs {
|
||||
if candidate.subject == s.subRef.subject {
|
||||
s.bus.subs = append(s.bus.subs[:i], s.bus.subs[i+1:]...)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *FakeBus) Subscribe(subject string, handler func([]byte)) (messaging.Subscription, error) {
|
||||
sub := fakeBusSub{subject: subject, handler: handler}
|
||||
b.mu.Lock()
|
||||
b.subs = append(b.subs, sub)
|
||||
b.mu.Unlock()
|
||||
return &fakeBusSubscription{bus: b, subRef: sub}, nil
|
||||
}
|
||||
|
||||
func (b *FakeBus) QueueSubscribe(subject, _ string, handler func([]byte)) (messaging.Subscription, error) {
|
||||
return b.Subscribe(subject, handler)
|
||||
}
|
||||
|
||||
func (b *FakeBus) QueueSubscribeReply(string, string, func([]byte, func([]byte))) (messaging.Subscription, error) {
|
||||
return &fakeBusSubscription{bus: b}, nil
|
||||
}
|
||||
|
||||
func (b *FakeBus) SubscribeReply(string, func([]byte, func([]byte))) (messaging.Subscription, error) {
|
||||
return &fakeBusSubscription{bus: b}, nil
|
||||
}
|
||||
|
||||
func (b *FakeBus) Request(string, []byte, time.Duration) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (b *FakeBus) IsConnected() bool { return true }
|
||||
func (b *FakeBus) Close() {}
|
||||
|
||||
// OnReconnect mirrors *messaging.Client.OnReconnect so a spec can drive the
|
||||
// component's reconnect re-hydrate path. The component detects this method via an
|
||||
// optional interface assertion; implementing it here keeps the fake a faithful
|
||||
// stand-in for the concrete client.
|
||||
func (b *FakeBus) OnReconnect(cb func()) {
|
||||
if cb == nil {
|
||||
return
|
||||
}
|
||||
b.mu.Lock()
|
||||
b.reconnectCbs = append(b.reconnectCbs, cb)
|
||||
b.mu.Unlock()
|
||||
}
|
||||
|
||||
// TriggerReconnect runs every registered reconnect callback, simulating a NATS
|
||||
// reconnect event.
|
||||
func (b *FakeBus) TriggerReconnect() {
|
||||
b.mu.Lock()
|
||||
cbs := append([]func(){}, b.reconnectCbs...)
|
||||
b.mu.Unlock()
|
||||
for _, cb := range cbs {
|
||||
cb()
|
||||
}
|
||||
}
|
||||
@@ -22,13 +22,16 @@ Download the latest DMG from GitHub releases:
|
||||
3. Drag the LocalAI application to your Applications folder
|
||||
4. Launch LocalAI from your Applications folder
|
||||
|
||||
## Known Issues
|
||||
## Verification
|
||||
|
||||
> **Note**: The DMGs are not signed by Apple and may show as quarantined.
|
||||
>
|
||||
> **Workaround**: See [this issue](https://github.com/mudler/LocalAI/issues/6268) for details on how to bypass the quarantine.
|
||||
>
|
||||
> **Fix tracking**: The signing issue is being tracked in [this issue](https://github.com/mudler/LocalAI/issues/6244).
|
||||
The `LocalAI.dmg` (and the app inside it) and the `local-ai` server binary are
|
||||
signed with an Apple Developer ID and notarized by Apple, so they launch with no
|
||||
quarantine prompt or workaround. To inspect the signature yourself:
|
||||
|
||||
```bash
|
||||
spctl --assess --type open --context context:primary-signature -v /Applications/LocalAI.app
|
||||
codesign --verify --deep --strict --verbose=2 /Applications/LocalAI.app
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.5.0"
|
||||
"version": "v4.5.2"
|
||||
}
|
||||
|
||||
@@ -1,4 +1,58 @@
|
||||
---
|
||||
- name: "ornith-1.0-9b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
|
||||
description: |
|
||||
[](https://deep-reinforce.com/ornith.html)
|
||||
|
||||
# Ornith-1.0-9B-GGUF
|
||||
|
||||
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
|
||||
|
||||
Highlights:
|
||||
|
||||
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
|
||||
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
|
||||
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
|
||||
|
||||
## Ornith 1.0 9B
|
||||
|
||||
This model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.
|
||||
|
||||
### Benchmarks
|
||||
|
||||
Ornith-1.0-9B
|
||||
Qwen3.5-9B
|
||||
Qwen3.5-35B
|
||||
Gemma4-12B
|
||||
Gemma4-31B
|
||||
|
||||
Agentic Coding
|
||||
|
||||
...
|
||||
license: "mit"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
model: llama-cpp/models/Ornith-1.0-9B-GGUF/ornith-1.0-9b-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Ornith-1.0-9B-GGUF/ornith-1.0-9b-Q4_K_M.gguf
|
||||
sha256: 5720d1f671b4996481274fffe01868c3c36e87c135cc8538471cc7bd6087b106
|
||||
uri: https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF/resolve/main/ornith-1.0-9b-Q4_K_M.gguf
|
||||
- name: "ornith-1.0-35b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -141,6 +141,38 @@ copy_elf_deps() {
|
||||
done < <(ldd "$elf" 2>/dev/null | awk '/=>/ && $3 ~ /^\// {print $3}')
|
||||
}
|
||||
|
||||
# Sweep the transitive shared-library dependencies of everything already
|
||||
# bundled in a lib dir. The per-vendor packagers below copy an explicit
|
||||
# allowlist of top-level runtime libs, but those libs pull in transitive deps
|
||||
# that aren't in the list (e.g. ROCm's librocprofiler-register.so.0, libnuma,
|
||||
# libdrm_amdgpu). Because backends run through the bundled lib/ld.so with
|
||||
# LD_LIBRARY_PATH=lib (see run.sh), an unbundled transitive dep is a hard load
|
||||
# failure (issue #10537: "librocprofiler-register.so.0: cannot open shared
|
||||
# object file"). ldd resolves the full recursive closure, so a single pass over
|
||||
# the already-bundled libs is enough; core libc-family deps are skipped via
|
||||
# copy_elf_deps/is_core_lib so we never shadow the loader's own libc/libstdc++.
|
||||
sweep_transitive_deps() {
|
||||
local dir="${1:-$TARGET_LIB_DIR}"
|
||||
command -v ldd >/dev/null 2>&1 || return 0
|
||||
|
||||
# Snapshot the current set first: copy_elf_deps adds files as it runs, and
|
||||
# ldd already returns the full recursive closure, so we only need to sweep
|
||||
# the libs that were present before the sweep started.
|
||||
# `local x=$(...)` keeps set -e from tripping on shopt -p's nonzero exit.
|
||||
local old_nullglob=$(shopt -p nullglob)
|
||||
shopt -s nullglob
|
||||
local libs=("$dir"/*.so*)
|
||||
eval "$old_nullglob"
|
||||
|
||||
local lib
|
||||
for lib in "${libs[@]}"; do
|
||||
[ -e "$lib" ] || continue
|
||||
# Skip symlinks: their real target is in the snapshot and gets swept.
|
||||
[ -L "$lib" ] && continue
|
||||
copy_elf_deps "$lib"
|
||||
done
|
||||
}
|
||||
|
||||
# Package NVIDIA CUDA libraries
|
||||
package_cuda_libs() {
|
||||
echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
||||
@@ -185,6 +217,10 @@ package_cuda_libs() {
|
||||
# cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true
|
||||
# fi
|
||||
|
||||
# Pull in transitive deps the allowlist misses so the backend is
|
||||
# self-contained (same class of failure as #10537).
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "CUDA libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -261,6 +297,10 @@ package_rocm_libs() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Pull in transitive deps the allowlist misses (librocprofiler-register.so.0,
|
||||
# libnuma, libdrm_amdgpu, ...) so the backend is self-contained. See #10537.
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "ROCm libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -303,6 +343,10 @@ package_intel_libs() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Pull in transitive deps the allowlist misses so the backend is
|
||||
# self-contained (same class of failure as #10537).
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "Intel oneAPI libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -432,6 +476,7 @@ export -f copy_lib
|
||||
export -f copy_libs_glob
|
||||
export -f is_core_lib
|
||||
export -f copy_elf_deps
|
||||
export -f sweep_transitive_deps
|
||||
export -f package_cuda_libs
|
||||
export -f package_rocm_libs
|
||||
export -f package_intel_libs
|
||||
|
||||
54
scripts/build/package-gpu-libs_test.sh
Executable file
54
scripts/build/package-gpu-libs_test.sh
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
# Regression test for scripts/build/package-gpu-libs.sh.
|
||||
#
|
||||
# Guards issue #10537: the per-vendor packagers copy an explicit allowlist of
|
||||
# top-level GPU runtime libs but used to miss their transitive dependencies
|
||||
# (e.g. ROCm's librocprofiler-register.so.0). Since backends run through the
|
||||
# bundled lib/ld.so with LD_LIBRARY_PATH=lib, an unbundled transitive dep is a
|
||||
# fatal "cannot open shared object file" at load time.
|
||||
#
|
||||
# This test fabricates a primary lib that links a transitive lib, simulates the
|
||||
# allowlist step (primary copied, transitive not), and asserts the transitive
|
||||
# sweep pulls the dependency in. Requires gcc + ldd (present in build images).
|
||||
set -euo pipefail
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
SCRIPT="$CURDIR/package-gpu-libs.sh"
|
||||
|
||||
if ! command -v gcc >/dev/null 2>&1 || ! command -v ldd >/dev/null 2>&1; then
|
||||
echo "SKIP: gcc/ldd not available"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
WORK=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK"' EXIT
|
||||
|
||||
# Transitive dependency (stand-in for librocprofiler-register.so.0).
|
||||
echo 'int transitive_fn(void){return 42;}' > "$WORK/transitive.c"
|
||||
gcc -shared -fPIC -o "$WORK/libfaketransitive.so.0" "$WORK/transitive.c"
|
||||
|
||||
# Primary allowlisted lib (stand-in for libhipblas.so) that links it.
|
||||
echo 'int transitive_fn(void); int primary_fn(void){return transitive_fn();}' > "$WORK/primary.c"
|
||||
gcc -shared -fPIC -o "$WORK/libfakeprimary.so.0" "$WORK/primary.c" \
|
||||
-L"$WORK" -l:libfaketransitive.so.0 -Wl,-rpath,"$WORK"
|
||||
|
||||
# Simulate the allowlist step: primary already bundled, transitive not.
|
||||
TARGET="$WORK/target"
|
||||
mkdir -p "$TARGET"
|
||||
cp "$WORK/libfakeprimary.so.0" "$TARGET/"
|
||||
|
||||
# Make the transitive dep resolvable like /opt/rocm libs are in the build image.
|
||||
export LD_LIBRARY_PATH="$WORK:${LD_LIBRARY_PATH:-}"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$SCRIPT" "$TARGET"
|
||||
sweep_transitive_deps "$TARGET"
|
||||
|
||||
if [ -e "$TARGET/libfaketransitive.so.0" ]; then
|
||||
echo "PASS: transitive dependency was bundled by sweep_transitive_deps"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "FAIL: transitive dependency was NOT bundled (regression of #10537)"
|
||||
ls -la "$TARGET"
|
||||
exit 1
|
||||
Reference in New Issue
Block a user