Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
738fe0f49d chore(deps): bump the pip group across 3 directories with 1 update
Bumps the pip group with 1 update in the /backend/python/sglang directory: torch.
Bumps the pip group with 1 update in the /backend/python/trl directory: torch.
Bumps the pip group with 1 update in the /backend/python/vllm-omni directory: torch.


Updates `torch` from 2.9.0 to 2.12.0+cpu

Updates `torch` from 2.10.0 to 2.12.0+cpu

Updates `torch` from 2.7.0 to 2.12.0+cu130

---
updated-dependencies:
- dependency-name: torch
  dependency-version: 2.12.0+cpu
  dependency-type: direct:production
  dependency-group: pip
- dependency-name: torch
  dependency-version: 2.12.0+cpu
  dependency-type: direct:production
  dependency-group: pip
- dependency-name: torch
  dependency-version: 2.12.0+cu130
  dependency-type: direct:production
  dependency-group: pip
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-06-28 09:54:23 +00:00
29 changed files with 360 additions and 895 deletions

View File

@@ -82,7 +82,7 @@ jobs:
# as the Linux registry cache.
- name: Restore Homebrew cache
id: brew-cache
uses: actions/cache/restore@v6
uses: actions/cache/restore@v4
with:
path: |
~/Library/Caches/Homebrew/downloads
@@ -142,7 +142,7 @@ jobs:
- name: Save Homebrew cache
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
uses: actions/cache/save@v6
uses: actions/cache/save@v4
with:
path: |
~/Library/Caches/Homebrew/downloads
@@ -178,7 +178,7 @@ jobs:
- name: Restore ccache
if: inputs.backend == 'llama-cpp'
id: ccache-cache
uses: actions/cache/restore@v6
uses: actions/cache/restore@v4
with:
path: ~/Library/Caches/ccache
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
@@ -211,7 +211,7 @@ jobs:
- name: Restore Python wheel cache
if: inputs.lang == 'python'
id: pyenv-cache
uses: actions/cache/restore@v6
uses: actions/cache/restore@v4
with:
path: |
~/Library/Caches/pip
@@ -256,14 +256,14 @@ jobs:
- name: Save ccache
if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
uses: actions/cache/save@v6
uses: actions/cache/save@v4
with:
path: ~/Library/Caches/ccache
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
- name: Save Python wheel cache
if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
uses: actions/cache/save@v6
uses: actions/cache/save@v4
with:
path: |
~/Library/Caches/pip

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=dbdaece23de9ac63f2e7ca9e6bfcdc4fc156a3fa
LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# CrispASR version (release tag)
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
CRISPASR_VERSION?=6b50f76e59700665358a1aabf5295597fa318e06
CRISPASR_VERSION?=6514c9da00b03a2f0f1b49a43fae4f3a01a41844
SO_TARGET?=libgocrispasr.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -1,6 +1,6 @@
# face-detect backend Makefile.
#
# Upstream pin lives below as FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
# Upstream pin lives below as FACEDETECT_VERSION?=06914b0... (.github/bump_deps.sh
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
# convention).
#
@@ -14,7 +14,7 @@
# The default target below does the proper clone-at-pin + cmake build so CI does
# not need a side-checkout.
FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
FACEDETECT_VERSION?=06914b077d52f90d5421299138e7be6bdd06b5e8
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
GOCMD?=go

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=c1790754d31bec0731ed5fddc9d5b9ff22ee19cd
STABLEDIFFUSION_GGML_VERSION?=9956436c925a367daeab097598b1ea1f32d3503f
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -1,6 +1,6 @@
# voice-detect backend Makefile.
#
# Upstream pin lives below as VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
# Upstream pin lives below as VOICEDETECT_VERSION?=3d51077... (.github/bump_deps.sh
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
#
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
@@ -13,7 +13,7 @@
# The default target below does the proper clone-at-pin + cmake build so CI does
# not need a side-checkout.
VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
VOICEDETECT_VERSION?=3d510772357538c5182808ac7de2278b84824e24
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
GOCMD?=go

View File

@@ -13,17 +13,6 @@ fi
# fish-speech uses pyrootutils which requires a .project-root marker
touch "${backend_dir}/.project-root"
# On darwin arm64 the transitive `tokenizers` dep compiles its Rust extension
# from source (Linux uses prebuilt manylinux wheels, so it never compiles
# there). The pinned tokenizers crate that fish-speech's stack resolves to
# contains a `&T` -> `&mut T` cast that trips the now-deny-by-default
# `invalid_reference_casting` lint in the macOS runner's newer Rust toolchain,
# breaking the build (seen in the v4.5.5 release CI fish-speech darwin/metal
# job). Allow that lint so the unchanged third-party crate compiles as before.
# Append rather than clobber any pre-existing RUSTFLAGS; harmless on Linux
# where no Rust compile happens.
export RUSTFLAGS="${RUSTFLAGS:-} -A invalid_reference_casting"
installRequirements
# Clone fish-speech source (the pip package doesn't include inference modules)

View File

@@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/cpu
accelerate
torch==2.9.0
torch==2.12.0+cpu
torchvision
torchaudio
transformers

View File

@@ -6,7 +6,7 @@
# for cublas12 so uv consults this index alongside PyPI.
--extra-index-url https://download.pytorch.org/whl/cu128
accelerate
torch==2.9.1
torch==2.12.0+cpu
torchvision
torchaudio
transformers

View File

@@ -1,9 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.10.0
torch==2.12.0+cpu
trl
peft
datasets>=3.0.0
transformers>=4.56.2
transformers>=5.12.1
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece

View File

@@ -1,8 +1,8 @@
torch==2.10.0
torch==2.12.0+cpu
trl
peft
datasets>=3.0.0
transformers>=4.56.2
transformers>=5.12.1
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece

View File

@@ -1,8 +1,8 @@
torch==2.10.0
torch==2.12.0+cpu
trl
peft
datasets>=3.0.0
transformers>=4.56.2
transformers>=5.12.1
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece

View File

@@ -1,8 +1,8 @@
torch==2.10.0
torch==2.12.0+cpu
trl
peft
datasets>=3.0.0
transformers>=4.56.2
transformers>=5.12.1
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece

View File

@@ -1,4 +1,4 @@
accelerate
torch==2.7.0
torch==2.12.0+cu130
transformers
bitsandbytes

View File

@@ -104,7 +104,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
# vllm pin (requirements-cublas13-after.txt, bumped independently against
# vllm/vllm) until vllm-metal supports a newer vLLM.
VLLM_METAL_VERSION="v0.3.0.dev20260628073537"
VLLM_METAL_VERSION="v0.3.0.dev20260622062346"
# The coupled vLLM source version is whatever this vllm-metal release builds
# against -- it declares it in its own installer as `vllm_v=`. Derive it from

View File

@@ -429,7 +429,7 @@ func (l *Launcher) CheckForUpdates() (bool, string, error) {
}
// DownloadUpdate downloads the latest version
func (l *Launcher) DownloadUpdate(version string, progressCallback func(downloaded, total int64)) error {
func (l *Launcher) DownloadUpdate(version string, progressCallback func(float64)) error {
return l.releaseManager.DownloadRelease(version, progressCallback)
}
@@ -486,6 +486,7 @@ func (l *Launcher) showDownloadLocalAIDialog() {
fyne.DoAndWait(func() {
// Create a standalone window for the download dialog
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
dialogWindow.Resize(fyne.NewSize(500, 350))
dialogWindow.CenterOnScreen()
dialogWindow.SetCloseIntercept(func() {
dialogWindow.Close()
@@ -547,7 +548,6 @@ func (l *Launcher) showDownloadLocalAIDialog() {
)
dialogWindow.SetContent(content)
resizeToContent(dialogWindow, content)
dialogWindow.Show()
})
}
@@ -621,134 +621,88 @@ func (l *Launcher) showDownloadError(title, message string) {
}
// showDownloadProgress shows a standalone progress window for downloading LocalAI
// after a fresh install (no LocalAI binary present yet).
func (l *Launcher) showDownloadProgress(version, title string) {
l.showDownloadProgressWindow(version, title, func(win fyne.Window) {
dialog.ShowConfirm("Installation Complete",
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
func(bool) {
win.Close()
l.updateStatus("LocalAI installed successfully")
if l.systray != nil {
l.systray.recreateMenu()
}
}, win)
})
}
// showDownloadProgressWindow renders the download progress popup shared by every
// "download/upgrade LocalAI" entry point. It owns the progress bar, the
// human-readable byte readout, resume-aware retry, and content-fit window
// sizing so the behaviour stays identical everywhere. onSuccess runs (on the UI
// goroutine) once the download verifies, and is responsible for the success
// dialog and any follow-up; the window is passed in so it can be parented/closed.
func (l *Launcher) showDownloadProgressWindow(version, title string, onSuccess func(win fyne.Window)) {
fyne.DoAndWait(func() {
// Create progress window
progressWindow := l.app.NewWindow("Downloading LocalAI")
progressWindow.Resize(fyne.NewSize(400, 250))
progressWindow.CenterOnScreen()
progressWindow.SetCloseIntercept(func() {
progressWindow.Close()
})
// Progress bar
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line.
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {
releaseNotesURL, err := l.githubReleaseNotesURL(version)
if err != nil {
log.Printf("Failed to parse URL: %v", err)
return
}
l.app.OpenURL(releaseNotesURL)
})
// Retry button: hidden until a download fails. GitHub downloads are
// flaky, and the underlying download resumes from the partial file, so
// a retry continues where it left off rather than starting over.
retryButton := widget.NewButton("Retry", nil)
retryButton.Importance = widget.HighImportance
retryButton.Hide()
buttonRow := container.NewHBox(releaseNotesButton, retryButton)
content := container.NewVBox(
// Progress container
progressContainer := container.NewVBox(
widget.NewLabel(title),
progressBar,
statusLabel,
widget.NewSeparator(),
buttonRow,
releaseNotesButton,
)
progressWindow.SetContent(content)
resizeToContent(progressWindow, content)
var startDownload func()
startDownload = func() {
retryButton.Hide()
progressBar.SetValue(0)
statusLabel.SetText("Preparing download...")
resizeToContent(progressWindow, content)
go func() {
err := l.DownloadUpdate(version, func(downloaded, total int64) {
fyne.Do(func() {
if total > 0 {
progressBar.SetValue(float64(downloaded) / float64(total))
statusLabel.SetText(fmt.Sprintf("Downloading… %s / %s", formatBytes(downloaded), formatBytes(total)))
} else {
statusLabel.SetText(fmt.Sprintf("Downloading… %s", formatBytes(downloaded)))
}
})
})
fyne.Do(func() {
if err != nil {
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
retryButton.Show()
resizeToContent(progressWindow, content)
return
}
progressBar.SetValue(1.0)
statusLabel.SetText("Download complete")
onSuccess(progressWindow)
})
}()
}
retryButton.OnTapped = startDownload
progressWindow.SetContent(progressContainer)
progressWindow.Show()
startDownload()
// Start download in background
go func() {
err := l.DownloadUpdate(version, func(progress float64) {
// Update progress bar
fyne.Do(func() {
progressBar.SetValue(progress)
percentage := int(progress * 100)
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
})
})
// Handle completion
fyne.Do(func() {
if err != nil {
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
// Show error dialog
dialog.ShowError(err, progressWindow)
} else {
statusLabel.SetText("Download completed successfully!")
progressBar.SetValue(1.0)
// Show success dialog
dialog.ShowConfirm("Installation Complete",
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
func(close bool) {
progressWindow.Close()
// Update status and refresh systray menu
l.updateStatus("LocalAI installed successfully")
if l.systray != nil {
l.systray.recreateMenu()
}
}, progressWindow)
}
})
}()
})
}
// resizeToContent sizes a window to fit its content (with a sane minimum width)
// so the dialog doesn't show a large blank gap below the last widget.
func resizeToContent(w fyne.Window, content fyne.CanvasObject) {
size := content.MinSize()
if size.Width < 400 {
size.Width = 400
}
w.Resize(size)
}
// formatBytes renders a byte count as a human-readable size (e.g. "12.3 MB").
func formatBytes(b int64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%d B", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
}
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
scanner := bufio.NewScanner(reader)

View File

@@ -11,7 +11,6 @@ import (
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"strings"
@@ -51,12 +50,6 @@ type ReleaseManager struct {
ChecksumsPath string
// MetadataPath is where version metadata is stored
MetadataPath string
// BaseDownloadURL is the base URL release assets are downloaded from
// (defaults to https://github.com; overridable for testing)
BaseDownloadURL string
// RetryBackoff is the base wait between download attempts; the Nth retry
// waits N*RetryBackoff (defaults to 1s; lowered in tests)
RetryBackoff time.Duration
// HTTPClient is the HTTP client used for downloads
HTTPClient *http.Client
}
@@ -69,94 +62,28 @@ func NewReleaseManager() *ReleaseManager {
metadataPath := filepath.Join(homeDir, ".localai", "metadata")
return &ReleaseManager{
GitHubOwner: "mudler",
GitHubRepo: "LocalAI",
BinaryPath: binaryPath,
CurrentVersion: internal.PrintableVersion(),
ChecksumsPath: checksumsPath,
MetadataPath: metadataPath,
BaseDownloadURL: "https://github.com",
RetryBackoff: 1 * time.Second,
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
GitHubOwner: "mudler",
GitHubRepo: "LocalAI",
BinaryPath: binaryPath,
CurrentVersion: internal.PrintableVersion(),
ChecksumsPath: checksumsPath,
MetadataPath: metadataPath,
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
}
}
// GetLatestRelease resolves the latest LocalAI release.
//
// It first follows the github.com "releases/latest" redirect, which reveals the
// latest tag in the final URL and—crucially—is NOT subject to the
// 60-requests/hour unauthenticated rate limit of api.github.com. That limit is
// per-IP, so on shared/NAT/CGNAT/cloud addresses the API returns 403 almost
// immediately (e.g. on a fresh install with no LocalAI present yet). The
// redirect avoids that entirely. The richer JSON API is kept only as a fallback.
//
// Only the version is consumed by callers, so the redirect's tag is sufficient.
// GetLatestRelease fetches the latest release information from GitHub
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
version, redirectErr := rm.latestVersionFromRedirect()
if redirectErr == nil {
return &Release{Version: version}, nil
}
log.Printf("Could not resolve latest version via release redirect (%v); falling back to GitHub API", redirectErr)
release, apiErr := rm.latestReleaseFromAPI()
if apiErr != nil {
// Surface both failures so a rate-limited API doesn't mask the (usually
// more relevant) redirect error.
return nil, fmt.Errorf("failed to fetch latest release: %v (redirect: %v)", apiErr, redirectErr)
}
return release, nil
}
// latestVersionFromRedirect returns the latest tag by following the github.com
// "releases/latest" redirect to ".../releases/tag/<tag>".
func (rm *ReleaseManager) latestVersionFromRedirect() (string, error) {
url := fmt.Sprintf("%s/%s/%s/releases/latest", rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo)
resp, err := rm.HTTPClient.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status %s", resp.Status)
}
// After the redirect is followed, the final request URL is the tag page.
version := path.Base(resp.Request.URL.Path)
if version == "" || version == "." || version == "latest" {
return "", fmt.Errorf("could not determine version from %s", resp.Request.URL.String())
}
return version, nil
}
// latestReleaseFromAPI fetches the latest release JSON from api.github.com. This
// is the fallback path; it is rate-limited unless GITHUB_TOKEN is set.
func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept", "application/vnd.github+json")
// An optional token lifts the unauthenticated 60/hour limit to 5000/hour.
if token := os.Getenv("GITHUB_TOKEN"); token != "" {
req.Header.Set("Authorization", "Bearer "+token)
}
resp, err := rm.HTTPClient.Do(req)
resp, err := rm.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
if (resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests) &&
resp.Header.Get("X-RateLimit-Remaining") == "0" {
return nil, fmt.Errorf("GitHub API rate limit exceeded (status %d); retry later or set GITHUB_TOKEN to raise the limit", resp.StatusCode)
}
return nil, fmt.Errorf("status %d", resp.StatusCode)
return nil, fmt.Errorf("failed to fetch latest release: status %d", resp.StatusCode)
}
// Parse the JSON response properly
@@ -179,7 +106,7 @@ func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
}
// DownloadRelease downloads a specific version of LocalAI
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(downloaded, total int64)) error {
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(float64)) error {
// Ensure the binary directory exists
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
return fmt.Errorf("failed to create binary directory: %w", err)
@@ -190,16 +117,16 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
localPath := filepath.Join(rm.BinaryPath, "local-ai")
// Download the binary
downloadURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/%s",
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s",
rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
return fmt.Errorf("failed to download binary: %w", err)
}
// Download and verify checksums
checksumURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, version)
checksumURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
rm.GitHubOwner, rm.GitHubRepo, version, version)
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
@@ -227,10 +154,6 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
// Verify the checksum if we have a checksum file
if _, err := os.Stat(checksumPath); err == nil {
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
// Discard the corrupt binary (and any leftover partial) so the next
// retry starts from a clean slate rather than resuming corruption.
os.Remove(localPath)
os.Remove(localPath + ".part")
return fmt.Errorf("checksum verification failed: %w", err)
}
log.Printf("Checksum verification successful")
@@ -273,88 +196,44 @@ func (rm *ReleaseManager) GetBinaryName(version string) string {
}
// downloadFile downloads a file from a URL to a local path with optional progress callback
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(downloaded, total int64)) error {
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(float64)) error {
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
}
// downloadFileWithRetry downloads a file with retry and HTTP Range resume.
//
// The body is streamed to "<dest>.part" and only renamed to dest on success, so
// a dropped connection leaves a partial file that the next attempt continues via
// a "Range: bytes=N-" request instead of restarting from zero. This matters for
// GitHub release downloads, which are large and flaky.
func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallback func(downloaded, total int64), maxRetries int) error {
partPath := dest + ".part"
// downloadFileWithRetry downloads a file from a URL with retry logic
func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCallback func(float64), maxRetries int) error {
var lastErr error
for attempt := 1; attempt <= maxRetries; attempt++ {
if attempt > 1 {
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
time.Sleep(time.Duration(attempt) * rm.RetryBackoff)
time.Sleep(time.Duration(attempt) * time.Second)
}
// Resume from however much we already have on disk.
var offset int64
if fi, err := os.Stat(partPath); err == nil {
offset = fi.Size()
}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return err
}
if offset > 0 {
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
}
resp, err := rm.HTTPClient.Do(req)
resp, err := rm.HTTPClient.Get(url)
if err != nil {
lastErr = err
continue
}
switch resp.StatusCode {
case http.StatusOK:
// Server ignored the Range (or we had nothing): start fresh.
offset = 0
case http.StatusPartialContent:
// Resume: append to the existing partial file.
case http.StatusRequestedRangeNotSatisfiable:
// Stale or already-complete partial: discard and restart fresh.
resp.Body.Close()
os.Remove(partPath)
lastErr = fmt.Errorf("partial download no longer valid (status %s), restarting", resp.Status)
continue
default:
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
lastErr = fmt.Errorf("bad status: %s", resp.Status)
continue
}
var out *os.File
if offset > 0 {
out, err = os.OpenFile(partPath, os.O_WRONLY|os.O_APPEND, 0644)
} else {
out, err = os.Create(partPath)
}
out, err := os.Create(filepath)
if err != nil {
resp.Body.Close()
return err
}
// On a 206 the Content-Length is the remaining bytes, so the full size
// is what we already have plus what's still to come.
total := resp.ContentLength
if offset > 0 && total > 0 {
total += offset
}
// Create a progress reader if callback is provided
var reader io.Reader = resp.Body
if progressCallback != nil && total > 0 {
if progressCallback != nil && resp.ContentLength > 0 {
reader = &progressReader{
Reader: resp.Body,
Total: total,
Current: offset,
Total: resp.ContentLength,
Callback: progressCallback,
}
}
@@ -364,14 +243,11 @@ func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallba
out.Close()
if err != nil {
// Keep the partial file so the next attempt can resume from it.
lastErr = err
os.Remove(filepath)
continue
}
if err := os.Rename(partPath, dest); err != nil {
return err
}
return nil
}
@@ -446,21 +322,20 @@ func (rm *ReleaseManager) saveVersionMetadata(version string) error {
return nil
}
// progressReader wraps an io.Reader to provide download progress as a
// (downloaded, total) byte count so callers can render both a progress bar and
// a human-readable size.
// progressReader wraps an io.Reader to provide download progress
type progressReader struct {
io.Reader
Total int64
Current int64
Callback func(downloaded, total int64)
Callback func(float64)
}
func (pr *progressReader) Read(p []byte) (int, error) {
n, err := pr.Reader.Read(p)
pr.Current += int64(n)
if pr.Callback != nil {
pr.Callback(pr.Current, pr.Total)
progress := float64(pr.Current) / float64(pr.Total)
pr.Callback(progress)
}
return n, err
}

View File

@@ -1,17 +1,9 @@
package launcher_test
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
. "github.com/onsi/ginkgo/v2"
@@ -186,221 +178,4 @@ var _ = Describe("ReleaseManager", func() {
Expect(err.Error()).To(ContainSubstring("checksum not found"))
})
})
Describe("DownloadRelease resume and retry", func() {
var (
version string
binaryName string
content []byte
checksums string
finalPath string
partPath string
)
BeforeEach(func() {
version = "v9.9.9"
binaryName = rm.GetBinaryName(version)
// Deterministic, non-trivial content so resume/append bugs surface.
content = make([]byte, 4096)
for i := range content {
content[i] = byte(i % 251)
}
sum := sha256.Sum256(content)
checksums = fmt.Sprintf("%s %s\n", hex.EncodeToString(sum[:]), binaryName)
finalPath = filepath.Join(tempDir, "local-ai")
partPath = finalPath + ".part"
// Isolate the persistent checksum/metadata dirs to the temp dir so
// the test never touches the real ~/.localai and existing checksum
// files don't short-circuit the download.
rm.ChecksumsPath = filepath.Join(tempDir, "checksums")
rm.MetadataPath = filepath.Join(tempDir, "metadata")
rm.GitHubOwner = "owner"
rm.GitHubRepo = "repo"
rm.RetryBackoff = time.Millisecond
Expect(os.MkdirAll(tempDir, 0755)).To(Succeed())
})
It("resumes from a partial .part file using a Range request", func() {
Expect(os.WriteFile(partPath, content[:1024], 0644)).To(Succeed())
var mu sync.Mutex
sawRange := false
binBytesServed := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
_, _ = w.Write([]byte(checksums))
return
}
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
var start int
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
mu.Lock()
sawRange = true
mu.Unlock()
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
w.WriteHeader(http.StatusPartialContent)
n, _ := w.Write(content[start:])
mu.Lock()
binBytesServed += n
mu.Unlock()
return
}
w.WriteHeader(http.StatusOK)
n, _ := w.Write(content)
mu.Lock()
binBytesServed += n
mu.Unlock()
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
err := rm.DownloadRelease(version, nil)
Expect(err).ToNot(HaveOccurred())
got, err := os.ReadFile(finalPath)
Expect(err).ToNot(HaveOccurred())
Expect(got).To(Equal(content))
Expect(sawRange).To(BeTrue(), "expected the download to resume with a Range request")
Expect(binBytesServed).To(Equal(len(content)-1024), "expected only the remaining bytes to be served")
Expect(partPath).ToNot(BeAnExistingFile())
})
It("starts fresh when the server ignores the Range header (200)", func() {
// A stale/garbage partial that must NOT be appended to.
Expect(os.WriteFile(partPath, []byte("garbage-garbage-garbage"), 0644)).To(Succeed())
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
_, _ = w.Write([]byte(checksums))
return
}
// Ignore any Range and always serve the full body.
w.WriteHeader(http.StatusOK)
_, _ = w.Write(content)
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
err := rm.DownloadRelease(version, nil)
Expect(err).ToNot(HaveOccurred())
got, err := os.ReadFile(finalPath)
Expect(err).ToNot(HaveOccurred())
Expect(got).To(Equal(content))
})
It("restarts the download when the partial is stale (416)", func() {
// Oversized partial -> requested Range start is beyond the content.
Expect(os.WriteFile(partPath, make([]byte, len(content)+10), 0644)).To(Succeed())
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
_, _ = w.Write([]byte(checksums))
return
}
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
var start int
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
if start >= len(content) {
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
return
}
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
w.WriteHeader(http.StatusPartialContent)
_, _ = w.Write(content[start:])
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write(content)
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
err := rm.DownloadRelease(version, nil)
Expect(err).ToNot(HaveOccurred())
got, err := os.ReadFile(finalPath)
Expect(err).ToNot(HaveOccurred())
Expect(got).To(Equal(content))
})
It("removes the downloaded file when checksum verification fails", func() {
bad := []byte("this is definitely not the expected binary content")
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
// Checksums are for `content`, but we serve `bad`.
_, _ = w.Write([]byte(checksums))
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write(bad)
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
err := rm.DownloadRelease(version, nil)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("checksum"))
Expect(finalPath).ToNot(BeAnExistingFile())
Expect(partPath).ToNot(BeAnExistingFile())
})
It("reports progress as downloaded and total byte counts", func() {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
_, _ = w.Write([]byte(checksums))
return
}
w.Header().Set("Content-Length", strconv.Itoa(len(content)))
w.WriteHeader(http.StatusOK)
_, _ = w.Write(content)
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
var mu sync.Mutex
var lastDownloaded, lastTotal int64
err := rm.DownloadRelease(version, func(downloaded, total int64) {
mu.Lock()
lastDownloaded = downloaded
lastTotal = total
mu.Unlock()
})
Expect(err).ToNot(HaveOccurred())
Expect(lastTotal).To(Equal(int64(len(content))))
Expect(lastDownloaded).To(Equal(int64(len(content))))
})
})
Describe("GetLatestRelease", func() {
It("resolves the latest version from the releases/latest redirect", func() {
// The github.com redirect path must be preferred over the
// rate-limited api.github.com, so a working redirect yields the tag
// without ever needing the API.
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasSuffix(r.URL.Path, "/releases/latest"):
http.Redirect(w, r, "/owner/repo/releases/tag/v9.9.9", http.StatusFound)
case strings.HasSuffix(r.URL.Path, "/releases/tag/v9.9.9"):
w.WriteHeader(http.StatusOK)
default:
w.WriteHeader(http.StatusNotFound)
}
}))
defer srv.Close()
rm.BaseDownloadURL = srv.URL
rm.GitHubOwner = "owner"
rm.GitHubRepo = "repo"
release, err := rm.GetLatestRelease()
Expect(err).ToNot(HaveOccurred())
Expect(release.Version).To(Equal("v9.9.9"))
})
})
})

View File

@@ -443,23 +443,84 @@ func (sm *SystrayManager) showStartupErrorDialog(err error) {
})
}
// showDownloadProgress shows a progress window for downloading updates. The
// progress UI (byte readout, resume-aware retry, sizing) is shared with the
// other download entry points via the launcher; only the post-success behaviour
// (restart prompt + systray refresh) is specific to the update flow.
// showDownloadProgress shows a progress window for downloading updates
func (sm *SystrayManager) showDownloadProgress(version string) {
sm.launcher.showDownloadProgressWindow(version, fmt.Sprintf("Downloading LocalAI version %s", version), func(win fyne.Window) {
dialog.ShowConfirm("Update Downloaded",
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
func(restart bool) {
if restart {
sm.app.Quit()
}
win.Close()
}, win)
// Create a new window for download progress
progressWindow := sm.app.NewWindow("Downloading LocalAI Update")
progressWindow.Resize(fyne.NewSize(400, 250))
progressWindow.CenterOnScreen()
sm.hasUpdateAvailable = false
sm.latestVersion = ""
sm.recreateMenu()
// Progress bar
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {
releaseNotesURL, err := sm.launcher.githubReleaseNotesURL(version)
if err != nil {
log.Printf("Failed to parse URL: %v", err)
return
}
sm.app.OpenURL(releaseNotesURL)
})
// Progress container
progressContainer := container.NewVBox(
widget.NewLabel(fmt.Sprintf("Downloading LocalAI version %s", version)),
progressBar,
statusLabel,
widget.NewSeparator(),
releaseNotesButton,
)
progressWindow.SetContent(progressContainer)
progressWindow.Show()
// Start download in background
go func() {
err := sm.launcher.DownloadUpdate(version, func(progress float64) {
// Update progress bar
fyne.Do(func() {
progressBar.SetValue(progress)
percentage := int(progress * 100)
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
})
})
// Handle completion
fyne.Do(func() {
if err != nil {
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
// Show error dialog
dialog.ShowError(err, progressWindow)
} else {
statusLabel.SetText("Download completed successfully!")
progressBar.SetValue(1.0)
// Show restart dialog
dialog.ShowConfirm("Update Downloaded",
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
func(restart bool) {
if restart {
sm.app.Quit()
}
progressWindow.Close()
}, progressWindow)
}
})
// Update systray menu
if err == nil {
sm.hasUpdateAvailable = false
sm.latestVersion = ""
sm.recreateMenu()
}
}()
}

View File

@@ -490,19 +490,14 @@ func (ui *LauncherUI) downloadUpdate() {
ui.UpdateStatus("Downloading update " + version + "...")
go func() {
err := ui.launcher.DownloadUpdate(version, func(downloaded, total int64) {
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
// Update progress bar
fyne.Do(func() {
if total > 0 {
ui.progressBar.SetValue(float64(downloaded) / float64(total))
}
ui.progressBar.SetValue(progress)
})
// The progress bar already shows the percentage, so report the
// human-readable size here instead of repeating the percent.
if total > 0 {
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s / %s", version, formatBytes(downloaded), formatBytes(total)))
} else {
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s", version, formatBytes(downloaded)))
}
// Update status with percentage
percentage := int(progress * 100)
ui.UpdateStatus(fmt.Sprintf("Downloading update %s... %d%%", version, percentage))
})
fyne.Do(func() {
@@ -603,6 +598,82 @@ func (ui *LauncherUI) LoadConfiguration() {
log.Printf("UI LoadConfiguration: configuration loaded successfully")
}
// showDownloadProgress shows a progress window for downloading LocalAI
func (ui *LauncherUI) showDownloadProgress(version, title string) {
fyne.DoAndWait(func() {
// Create progress window using the launcher's app
progressWindow := ui.launcher.app.NewWindow("Downloading LocalAI")
progressWindow.Resize(fyne.NewSize(400, 250))
progressWindow.CenterOnScreen()
// Progress bar
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {
releaseNotesURL, err := ui.launcher.githubReleaseNotesURL(version)
if err != nil {
log.Printf("Failed to parse URL: %v", err)
return
}
ui.launcher.app.OpenURL(releaseNotesURL)
})
// Progress container
progressContainer := container.NewVBox(
widget.NewLabel(title),
progressBar,
statusLabel,
widget.NewSeparator(),
releaseNotesButton,
)
progressWindow.SetContent(progressContainer)
progressWindow.Show()
// Start download in background
go func() {
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
// Update progress bar
fyne.Do(func() {
progressBar.SetValue(progress)
percentage := int(progress * 100)
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
})
})
// Handle completion
fyne.Do(func() {
if err != nil {
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
// Show error dialog
dialog.ShowError(err, progressWindow)
} else {
statusLabel.SetText("Download completed successfully!")
progressBar.SetValue(1.0)
// Show success dialog
dialog.ShowConfirm("Installation Complete",
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
func(close bool) {
progressWindow.Close()
// Update status
ui.UpdateStatus("LocalAI installed successfully")
}, progressWindow)
}
})
}()
})
}
// UpdateRunningState updates UI based on LocalAI running state
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
fyne.Do(func() {

View File

@@ -356,12 +356,6 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
PrefixConfig: prefixCfg,
Pressure: pressure,
SharedModels: cfg.Distributed.SharedModels,
// Cap how long a cold load may hold the per-model advisory lock: the
// configured backend.install deadline plus a margin for file staging and
// the remote LoadModel. Derived from the install timeout so raising it
// (for slow links pulling multi-GB images) widens the ceiling too,
// instead of letting the static default cut a legitimately slow load.
ModelLoadCeiling: cfg.Distributed.BackendInstallTimeoutOrDefault() + 10*time.Minute,
})
// Wire staging-progress broadcasting so file-staging shows up on every

View File

@@ -130,20 +130,6 @@ func WithLockCtx(ctx context.Context, db *gorm.DB, key int64, fn func() error) e
}
defer conn.Close()
// Neutralize any deployment-wide lock_timeout on this dedicated connection.
// Operators commonly set a short global lock_timeout (on the role or
// database) to bound ordinary row-lock waits. Applied to the blocking
// pg_advisory_lock below, it aborts the wait with SQLSTATE 55P03 and turns
// LocalAI's intentional cross-replica "wait your turn, then re-check"
// coordination into a hard error for the caller (e.g. a chat request that
// just wanted to reuse a model another replica is loading). Let the Go
// context be the single source of truth for how long we wait instead.
if _, err := conn.ExecContext(ctx, "SET lock_timeout = 0"); err != nil {
return fmt.Errorf("advisorylock: disabling lock_timeout: %w", err)
}
// Restore the session default before this pooled connection is reused.
defer func() { _, _ = conn.ExecContext(context.Background(), "RESET lock_timeout") }()
if _, err := conn.ExecContext(ctx, "SELECT pg_advisory_lock($1)", key); err != nil {
return fmt.Errorf("advisorylock: acquiring lock %d: %w", key, err)
}

View File

@@ -158,53 +158,6 @@ var _ = Describe("AdvisoryLock", func() {
Expect(err).To(HaveOccurred())
})
It("waits out a short server-side lock_timeout instead of failing with 55P03", func() {
const lockKey int64 = 703
// Reproduce the production deployment that triggered this: a short
// global lock_timeout set on the database. Without the fix, a waiter
// blocked on pg_advisory_lock() is aborted by the server after this
// window and surfaces SQLSTATE 55P03 ("canceling statement due to
// lock timeout") to the caller instead of waiting for its turn.
Expect(db.Exec("ALTER DATABASE testdb SET lock_timeout = '300ms'").Error).ToNot(HaveOccurred())
sqlDB, err := db.DB()
Expect(err).ToNot(HaveOccurred())
// Drop pooled connections so subsequent ones reconnect and inherit
// the new database-level lock_timeout default.
sqlDB.SetMaxIdleConns(0)
holding := make(chan struct{})
released := make(chan struct{})
go func() {
defer GinkgoRecover()
herr := WithLockCtx(context.Background(), db, lockKey, func() error {
close(holding)
// Hold well past the 300ms server lock_timeout.
time.Sleep(1 * time.Second)
return nil
})
Expect(herr).ToNot(HaveOccurred())
close(released)
}()
<-holding // ensure the holder owns the lock before we contend
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
executed := false
start := time.Now()
werr := WithLockCtx(ctx, db, lockKey, func() error {
executed = true
return nil
})
Expect(werr).ToNot(HaveOccurred(),
"waiter should wait out the in-progress hold, not fail with lock_timeout (55P03)")
Expect(executed).To(BeTrue())
Expect(time.Since(start)).To(BeNumerically(">=", 400*time.Millisecond),
"waiter should have actually waited for the holder to release")
<-released
})
It("serializes concurrent WithLockCtx on same key", func() {
const lockKey int64 = 702

View File

@@ -68,13 +68,6 @@ type SmartRouterOptions struct {
// the absolute model paths untouched so the worker loads them directly from
// the shared volume (#10556). See config.DistributedConfig.SharedModels.
SharedModels bool
// ModelLoadCeiling is the hard upper bound on how long a single cold-load
// attempt (node selection -> backend install -> file staging -> LoadModel)
// may run while holding the per-model advisory lock. It backstops every
// sub-step's own timeout so a wedged worker can never pin the lock - and
// every other replica's request for that model - indefinitely. Zero selects
// defaultModelLoadCeiling.
ModelLoadCeiling time.Duration
}
// SmartRouter routes inference requests to the best available backend node.
@@ -108,18 +101,8 @@ type SmartRouter struct {
// sharedModels skips file staging when all nodes mount the same models
// directory at the same path (see SmartRouterOptions.SharedModels).
sharedModels bool
// modelLoadCeiling bounds how long a cold load may hold the per-model
// advisory lock (see SmartRouterOptions.ModelLoadCeiling).
modelLoadCeiling time.Duration
}
// defaultModelLoadCeiling is the fallback hold ceiling for a cold model load.
// It must comfortably exceed the slowest legitimate load - a multi-GB backend
// install (DefaultBackendInstallTimeout, 15m) plus staging and the remote
// LoadModel (5m) - so it never cuts a real load short; it only ever fires when
// a step is genuinely wedged (e.g. a worker that died mid-install).
const defaultModelLoadCeiling = 25 * time.Minute
// probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
// trusted before the next request re-probes. Matches healthCheckTTL in
// pkg/model/model.go so the single-process and distributed paths share a
@@ -134,10 +117,6 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
if factory == nil {
factory = &tokenClientFactory{token: opts.AuthToken}
}
ceiling := opts.ModelLoadCeiling
if ceiling <= 0 {
ceiling = defaultModelLoadCeiling
}
return &SmartRouter{
registry: registry,
unloader: opts.Unloader,
@@ -152,7 +131,6 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
prefixConfig: opts.PrefixConfig,
pressure: opts.Pressure,
sharedModels: opts.SharedModels,
modelLoadCeiling: ceiling,
}
}
@@ -405,19 +383,11 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
// the request context. If staging were bound to it, the multi-GB upload
// aborts with "context canceled" mid-transfer and large models can never
// finish staging (the model-load outage). WithoutCancel keeps the request's
// values (prefix chain, etc.) but drops its cancellation/deadline.
//
// Detaching from the caller is necessary, but it must not be unbounded: the
// load runs while holding the per-model advisory lock, and a worker that
// dies mid-install (its backend.install never replies) would otherwise pin
// that lock (and every other replica's request for the same model) until
// the NATS install deadline alone expires. Re-impose a single hard ceiling
// over the whole sequence so the lock is always released in bounded time,
// even if a sub-step wedges. Each long step still has its own (tighter)
// bound; this only backstops them. The per-model advisory lock below
// de-dupes concurrent loaders across replicas.
loadCtx, cancelLoad := context.WithTimeout(context.WithoutCancel(ctx), r.modelLoadCeiling)
defer cancelLoad()
// values (prefix chain, etc.) but drops its cancellation/deadline. Each
// long step still has its own bound (the file stager's resume budget,
// LoadModel's 5m timeout), and the per-model advisory lock below de-dupes
// concurrent loaders across replicas.
loadCtx := context.WithoutCancel(ctx)
loadModel := func(ctx context.Context) (*RouteResult, error) {
// Re-check after acquiring lock — another request may have loaded it
node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
@@ -946,14 +916,7 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
}
key := fmt.Sprintf("%s|%s|%s|%d", node.ID, backendType, modelID, replicaIndex)
// DoChan rather than Do so this wait honors ctx cancellation. InstallBackend
// blocks for its full NATS deadline (15m by default) when a worker accepts
// the request but never replies (e.g. it died mid-install). Without ctx
// awareness the caller (holding the per-model advisory lock) would sit there
// the whole time; here a cancelled ctx (typically the model-load ceiling)
// frees the caller promptly. The shared install keeps running in the
// background and still coalesces other callers via singleflight.
resCh := r.installFlight.DoChan(key, func() (any, error) {
v, err, _ := r.installFlight.Do(key, func() (any, error) {
reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON, "", "", "", replicaIndex, "", nil)
if err != nil {
return "", err
@@ -968,15 +931,10 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
}
return addr, nil
})
select {
case <-ctx.Done():
return "", ctx.Err()
case res := <-resCh:
if res.Err != nil {
return "", res.Err
}
return res.Val.(string), nil
if err != nil {
return "", err
}
return v.(string), nil
}
func (r *SmartRouter) buildClientForAddr(node *BackendNode, addr string, parallel bool) grpc.Backend {

View File

@@ -493,44 +493,6 @@ var _ = Describe("SmartRouter", func() {
Expect(result.Node.ID).To(Equal("n3"))
})
})
Context("worker wedges mid-install (dead node holding the lock)", func() {
It("aborts the load at the ModelLoadCeiling instead of blocking forever", func() {
// Simulate the production incident: the chosen worker accepts the
// backend.install but never replies (it died), so InstallBackend
// would otherwise block for its full NATS deadline (15m by
// default) while pinning the per-model advisory lock. Route must
// give up at the ceiling so the lock is released promptly.
reg.findAndLockErr = errors.New("not found")
reg.findIdleNode = &BackendNode{ID: "n4", Name: "dead-node", Address: "10.0.0.4:50051"}
block := make(chan struct{})
defer close(block) // let the background install goroutine drain at test end
unloader.installHook = func() { <-block }
router := NewSmartRouter(reg, SmartRouterOptions{
Unloader: unloader,
ClientFactory: factory,
ModelLoadCeiling: 200 * time.Millisecond,
})
done := make(chan error, 1)
start := time.Now()
go func() {
defer GinkgoRecover()
_, err := router.Route(context.Background(), "wedged-model",
"models/wedged.gguf", "llama-cpp",
&pb.ModelOptions{Model: "models/wedged.gguf"}, false)
done <- err
}()
var routeErr error
Eventually(done, 5*time.Second).Should(Receive(&routeErr),
"Route must not block on a wedged install past the ceiling")
Expect(routeErr).To(HaveOccurred())
Expect(time.Since(start)).To(BeNumerically("<", 5*time.Second))
})
})
})
Describe("scheduleNewModel (mock-based, via Route)", func() {

View File

@@ -3,7 +3,26 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
description: "# Qwen-AgentWorld-35B-A3B\n\n\U0001F4D1 Technical Report |\n\U0001F4D6 Blog |\n\U0001F917 Hugging Face |\n\U0001F916 ModelScope |\n\U0001F4BB GitHub |\n\U0001F5A5 Demo\n\n> [!Note]\n> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.\n>\n> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.\n\n**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.\n\n## Highlights\n\n...\n"
description: |
# Qwen-AgentWorld-35B-A3B
📑 Technical Report |
📖 Blog |
🤗 Hugging Face |
🤖 ModelScope |
💻 GitHub |
🖥️ Demo
> [!Note]
> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.
>
> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.
**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.
## Highlights
...
license: "apache-2.0"
tags:
- llm
@@ -32,7 +51,34 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-9B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n - **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.\n - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 9B\n\nThis model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-9B\nQwen3.5-9B\nQwen3.5-35B\nGemma4-12B\nGemma4-31B\n\nAgentic Coding\n\n...\n"
description: |
[](https://deep-reinforce.com/ornith.html)
# Ornith-1.0-9B-GGUF
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
Highlights:
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
- **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
## Ornith 1.0 9B
This model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.
### Benchmarks
Ornith-1.0-9B
Qwen3.5-9B
Qwen3.5-35B
Gemma4-12B
Gemma4-31B
Agentic Coding
...
license: "mit"
tags:
- llm
@@ -59,7 +105,34 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-35B-GGUF
description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-35B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n - **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.\n - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 35B\n\nThis model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-35B\nQwen3.5-35B\nQwen3.6-35B\nGemma4-31B\nQwen3.5-397B\n\nAgentic Coding\n\n...\n"
description: |
[](https://deep-reinforce.com/ornith.html)
# Ornith-1.0-35B-GGUF
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
Highlights:
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
## Ornith 1.0 35B
This model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.
### Benchmarks
Ornith-1.0-35B
Qwen3.5-35B
Qwen3.6-35B
Gemma4-31B
Qwen3.5-397B
Agentic Coding
...
license: "mit"
tags:
- llm
@@ -400,8 +473,8 @@
use_tokenizer_template: true
files:
- filename: llama-cpp/models/Qwythos-9B-Claude-Mythos-5-1M-GGUF/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
sha256: 24ee22e0f5d9f0d3d615809607f365c728d9b0c3f3fb6eb19d8bd83a1c2933d8
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
sha256: 671c430bf18c961251338d639a3c02aac7451c39eed25874cad74287ac6cd38a
- filename: llama-cpp/mmproj/Qwythos-9B-Claude-Mythos-5-1M-GGUF/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
sha256: f70dc3509053962b0d0d3ee8a7eacebf5d60aa560cad78254ae8698516ae029f
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf

View File

@@ -20,8 +20,6 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
subAllow = []string{
"agent.execute",
"agent.*.cancel",
"gallery.*.cancel",
"gallery.*.progress",
"jobs.*.cancel",
"jobs.*.progress",
"jobs.*.result",
@@ -29,7 +27,6 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
"mcp.tools.execute",
"mcp.discovery",
prefix + ".backend.stop", // stop events drive MCP session cleanup
"staging.*.progress",
"_INBOX.>",
}
pubAllow = []string{

View File

@@ -63,72 +63,6 @@ var defaultRetryPredicate = func(err error) bool {
return false
}
// layerDownloadRetries is the number of additional attempts made when a layer
// download fails with a transient/retryable network error.
var layerDownloadRetries = 3
// layerRetryBackoff returns the wait before retry attempt n (1-indexed). It is a
// variable so tests can eliminate the wait.
var layerRetryBackoff = func(attempt int) time.Duration {
d := defaultRetryBackoff.Duration
for i := 1; i < attempt; i++ {
d = time.Duration(float64(d) * defaultRetryBackoff.Factor)
}
return d
}
// downloadLayerToFile streams a single compressed layer into dst, retrying on
// transient network errors (unexpected EOF, connection reset, ...). Large
// backend images (e.g. vLLM) are several GiB and a single dropped connection
// mid-stream previously failed the whole install with "unexpected EOF" and no
// recovery. The registry transport already retries manifest fetches via
// defaultRetryPredicate (see GetImage/GetImageDigest); this extends the same
// behaviour to the layer data stream. See issue #10577.
func downloadLayerToFile(ctx context.Context, layer v1.Layer, dst *os.File, progress *progressWriter) error {
var lastErr error
for attempt := 0; attempt <= layerDownloadRetries; attempt++ {
if attempt > 0 {
// Discard any partial data from the previous failed attempt.
if _, err := dst.Seek(0, io.SeekStart); err != nil {
return err
}
if err := dst.Truncate(0); err != nil {
return err
}
if progress != nil {
progress.written = 0
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(layerRetryBackoff(attempt)):
}
}
var w io.Writer = dst
if progress != nil {
w = io.MultiWriter(dst, progress)
}
var reader io.ReadCloser
reader, lastErr = layer.Compressed()
if lastErr == nil {
_, lastErr = xio.Copy(ctx, w, reader)
_ = reader.Close()
}
if lastErr == nil {
return nil
}
// Stop early on context cancellation or non-retryable errors.
if ctx.Err() != nil || !defaultRetryPredicate(lastErr) {
return lastErr
}
logs.Warn.Printf("layer download failed (attempt %d/%d), retrying: %v", attempt+1, layerDownloadRetries+1, lastErr)
}
return lastErr
}
type progressWriter struct {
written int64
total int64
@@ -370,17 +304,23 @@ func DownloadOCIImageTar(ctx context.Context, img v1.Image, imageRef string, tar
}
// Create progress writer for this layer
var progress *progressWriter
var writer io.Writer = file
if downloadStatus != nil {
progress = &progressWriter{
writer = io.MultiWriter(file, &progressWriter{
total: totalCompressedSize,
fileName: fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
downloadStatus: downloadStatus,
}
})
}
// Download the compressed layer, retrying on transient network errors.
err = downloadLayerToFile(ctx, layer, file, progress)
// Download the compressed layer
layerReader, err := layer.Compressed()
if err != nil {
file.Close()
return fmt.Errorf("failed to get compressed layer: %v", err)
}
_, err = xio.Copy(ctx, writer, layerReader)
file.Close()
if err != nil {
return fmt.Errorf("failed to download layer %d: %v", i, err)

View File

@@ -1,123 +0,0 @@
package oci
import (
"bytes"
"context"
"errors"
"io"
"os"
"time"
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/types"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// failingReader yields prefix bytes then returns err, simulating a connection
// dropped mid-stream while downloading a layer.
type failingReader struct {
prefix []byte
off int
err error
}
func (r *failingReader) Read(p []byte) (int, error) {
if r.off < len(r.prefix) {
n := copy(p, r.prefix[r.off:])
r.off += n
return n, nil
}
return 0, r.err
}
// fakeLayer is a minimal v1.Layer whose Compressed() fails failUntil times with
// err (after emitting a partial prefix) before finally returning data in full.
type fakeLayer struct {
data []byte
failUntil int
err error
calls int
}
func (f *fakeLayer) Digest() (v1.Hash, error) { return v1.Hash{}, nil }
func (f *fakeLayer) DiffID() (v1.Hash, error) { return v1.Hash{}, nil }
func (f *fakeLayer) Size() (int64, error) { return int64(len(f.data)), nil }
func (f *fakeLayer) MediaType() (types.MediaType, error) { return types.DockerLayer, nil }
func (f *fakeLayer) Uncompressed() (io.ReadCloser, error) {
return nil, errors.New("not implemented")
}
func (f *fakeLayer) Compressed() (io.ReadCloser, error) {
f.calls++
if f.calls <= f.failUntil {
return io.NopCloser(&failingReader{prefix: []byte("partial-garbage"), err: f.err}), nil
}
return io.NopCloser(bytes.NewReader(f.data)), nil
}
var _ = Describe("downloadLayerToFile", func() {
var (
dst *os.File
restoreWait func()
)
BeforeEach(func() {
var err error
dst, err = os.CreateTemp("", "layer-retry-*.tar.gz")
Expect(err).NotTo(HaveOccurred())
// Eliminate the real backoff sleep so the test is fast.
prev := layerRetryBackoff
layerRetryBackoff = func(int) time.Duration { return 0 }
restoreWait = func() { layerRetryBackoff = prev }
})
AfterEach(func() {
restoreWait()
_ = dst.Close()
_ = os.Remove(dst.Name())
})
It("retries on unexpected EOF and writes the complete layer", func() {
layer := &fakeLayer{
data: []byte("the-real-layer-contents"),
failUntil: 2,
err: io.ErrUnexpectedEOF,
}
err := downloadLayerToFile(context.Background(), layer, dst, nil)
Expect(err).NotTo(HaveOccurred())
Expect(layer.calls).To(Equal(3))
got, err := os.ReadFile(dst.Name())
Expect(err).NotTo(HaveOccurred())
// The partial data from the two failed attempts must have been
// discarded, leaving exactly the real contents.
Expect(string(got)).To(Equal("the-real-layer-contents"))
})
It("does not retry on a non-retryable error", func() {
layer := &fakeLayer{
data: []byte("never-reached"),
failUntil: 1,
err: errors.New("permission denied"),
}
err := downloadLayerToFile(context.Background(), layer, dst, nil)
Expect(err).To(HaveOccurred())
Expect(layer.calls).To(Equal(1))
})
It("gives up after exhausting retries on a persistent transient error", func() {
layer := &fakeLayer{
data: []byte("unreachable"),
failUntil: 1000,
err: io.ErrUnexpectedEOF,
}
err := downloadLayerToFile(context.Background(), layer, dst, nil)
Expect(err).To(MatchError(io.ErrUnexpectedEOF))
Expect(layer.calls).To(Equal(layerDownloadRetries + 1))
})
})