mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-30 03:17:01 -04:00
Compare commits
15 Commits
worktree-f
...
fix/distri
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a73516f9b4 | ||
|
|
036f950b1b | ||
|
|
5b7b914b4f | ||
|
|
d1cee4c52a | ||
|
|
baaa0fe94f | ||
|
|
c3b5c7c3fa | ||
|
|
bd1ec8f2c2 | ||
|
|
135debf9af | ||
|
|
e8c18ae28e | ||
|
|
c4d302e1ab | ||
|
|
323b57a4bc | ||
|
|
3d2f639213 | ||
|
|
be1ae9338b | ||
|
|
923c47020d | ||
|
|
b7a1dec773 |
12
.github/workflows/backend_build_darwin.yml
vendored
12
.github/workflows/backend_build_darwin.yml
vendored
@@ -82,7 +82,7 @@ jobs:
|
|||||||
# as the Linux registry cache.
|
# as the Linux registry cache.
|
||||||
- name: Restore Homebrew cache
|
- name: Restore Homebrew cache
|
||||||
id: brew-cache
|
id: brew-cache
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@v6
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/Library/Caches/Homebrew/downloads
|
~/Library/Caches/Homebrew/downloads
|
||||||
@@ -142,7 +142,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Save Homebrew cache
|
- name: Save Homebrew cache
|
||||||
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
|
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@v6
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/Library/Caches/Homebrew/downloads
|
~/Library/Caches/Homebrew/downloads
|
||||||
@@ -178,7 +178,7 @@ jobs:
|
|||||||
- name: Restore ccache
|
- name: Restore ccache
|
||||||
if: inputs.backend == 'llama-cpp'
|
if: inputs.backend == 'llama-cpp'
|
||||||
id: ccache-cache
|
id: ccache-cache
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@v6
|
||||||
with:
|
with:
|
||||||
path: ~/Library/Caches/ccache
|
path: ~/Library/Caches/ccache
|
||||||
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
||||||
@@ -211,7 +211,7 @@ jobs:
|
|||||||
- name: Restore Python wheel cache
|
- name: Restore Python wheel cache
|
||||||
if: inputs.lang == 'python'
|
if: inputs.lang == 'python'
|
||||||
id: pyenv-cache
|
id: pyenv-cache
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@v6
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/Library/Caches/pip
|
~/Library/Caches/pip
|
||||||
@@ -256,14 +256,14 @@ jobs:
|
|||||||
|
|
||||||
- name: Save ccache
|
- name: Save ccache
|
||||||
if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
|
if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@v6
|
||||||
with:
|
with:
|
||||||
path: ~/Library/Caches/ccache
|
path: ~/Library/Caches/ccache
|
||||||
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
||||||
|
|
||||||
- name: Save Python wheel cache
|
- name: Save Python wheel cache
|
||||||
if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
|
if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@v6
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/Library/Caches/pip
|
~/Library/Caches/pip
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
|
LLAMA_VERSION?=dbdaece23de9ac63f2e7ca9e6bfcdc4fc156a3fa
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# CrispASR version (release tag)
|
# CrispASR version (release tag)
|
||||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||||
CRISPASR_VERSION?=6514c9da00b03a2f0f1b49a43fae4f3a01a41844
|
CRISPASR_VERSION?=6b50f76e59700665358a1aabf5295597fa318e06
|
||||||
SO_TARGET?=libgocrispasr.so
|
SO_TARGET?=libgocrispasr.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# face-detect backend Makefile.
|
# face-detect backend Makefile.
|
||||||
#
|
#
|
||||||
# Upstream pin lives below as FACEDETECT_VERSION?=06914b0... (.github/bump_deps.sh
|
# Upstream pin lives below as FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
|
||||||
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
||||||
# convention).
|
# convention).
|
||||||
#
|
#
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
# The default target below does the proper clone-at-pin + cmake build so CI does
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
# not need a side-checkout.
|
# not need a side-checkout.
|
||||||
|
|
||||||
FACEDETECT_VERSION?=06914b077d52f90d5421299138e7be6bdd06b5e8
|
FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
|
||||||
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=9956436c925a367daeab097598b1ea1f32d3503f
|
STABLEDIFFUSION_GGML_VERSION?=c1790754d31bec0731ed5fddc9d5b9ff22ee19cd
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# voice-detect backend Makefile.
|
# voice-detect backend Makefile.
|
||||||
#
|
#
|
||||||
# Upstream pin lives below as VOICEDETECT_VERSION?=3d51077... (.github/bump_deps.sh
|
# Upstream pin lives below as VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
|
||||||
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
|
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
|
||||||
#
|
#
|
||||||
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
|
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
# The default target below does the proper clone-at-pin + cmake build so CI does
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
# not need a side-checkout.
|
# not need a side-checkout.
|
||||||
|
|
||||||
VOICEDETECT_VERSION?=3d510772357538c5182808ac7de2278b84824e24
|
VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
|
||||||
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
|
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
|
|||||||
@@ -13,6 +13,17 @@ fi
|
|||||||
# fish-speech uses pyrootutils which requires a .project-root marker
|
# fish-speech uses pyrootutils which requires a .project-root marker
|
||||||
touch "${backend_dir}/.project-root"
|
touch "${backend_dir}/.project-root"
|
||||||
|
|
||||||
|
# On darwin arm64 the transitive `tokenizers` dep compiles its Rust extension
|
||||||
|
# from source (Linux uses prebuilt manylinux wheels, so it never compiles
|
||||||
|
# there). The pinned tokenizers crate that fish-speech's stack resolves to
|
||||||
|
# contains a `&T` -> `&mut T` cast that trips the now-deny-by-default
|
||||||
|
# `invalid_reference_casting` lint in the macOS runner's newer Rust toolchain,
|
||||||
|
# breaking the build (seen in the v4.5.5 release CI fish-speech darwin/metal
|
||||||
|
# job). Allow that lint so the unchanged third-party crate compiles as before.
|
||||||
|
# Append rather than clobber any pre-existing RUSTFLAGS; harmless on Linux
|
||||||
|
# where no Rust compile happens.
|
||||||
|
export RUSTFLAGS="${RUSTFLAGS:-} -A invalid_reference_casting"
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
# Clone fish-speech source (the pip package doesn't include inference modules)
|
# Clone fish-speech source (the pip package doesn't include inference modules)
|
||||||
|
|||||||
@@ -3,4 +3,5 @@ protobuf
|
|||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
pip
|
pip
|
||||||
chardet
|
chardet
|
||||||
|
click
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
|
|||||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||||
VLLM_METAL_VERSION="v0.3.0.dev20260622062346"
|
VLLM_METAL_VERSION="v0.3.0.dev20260628073537"
|
||||||
|
|
||||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||||
|
|||||||
@@ -429,7 +429,7 @@ func (l *Launcher) CheckForUpdates() (bool, string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadUpdate downloads the latest version
|
// DownloadUpdate downloads the latest version
|
||||||
func (l *Launcher) DownloadUpdate(version string, progressCallback func(float64)) error {
|
func (l *Launcher) DownloadUpdate(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
return l.releaseManager.DownloadRelease(version, progressCallback)
|
return l.releaseManager.DownloadRelease(version, progressCallback)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,7 +486,6 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create a standalone window for the download dialog
|
// Create a standalone window for the download dialog
|
||||||
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
||||||
dialogWindow.Resize(fyne.NewSize(500, 350))
|
|
||||||
dialogWindow.CenterOnScreen()
|
dialogWindow.CenterOnScreen()
|
||||||
dialogWindow.SetCloseIntercept(func() {
|
dialogWindow.SetCloseIntercept(func() {
|
||||||
dialogWindow.Close()
|
dialogWindow.Close()
|
||||||
@@ -548,6 +547,7 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
dialogWindow.SetContent(content)
|
dialogWindow.SetContent(content)
|
||||||
|
resizeToContent(dialogWindow, content)
|
||||||
dialogWindow.Show()
|
dialogWindow.Show()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -621,88 +621,134 @@ func (l *Launcher) showDownloadError(title, message string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
||||||
|
// after a fresh install (no LocalAI binary present yet).
|
||||||
func (l *Launcher) showDownloadProgress(version, title string) {
|
func (l *Launcher) showDownloadProgress(version, title string) {
|
||||||
|
l.showDownloadProgressWindow(version, title, func(win fyne.Window) {
|
||||||
|
dialog.ShowConfirm("Installation Complete",
|
||||||
|
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
||||||
|
func(bool) {
|
||||||
|
win.Close()
|
||||||
|
l.updateStatus("LocalAI installed successfully")
|
||||||
|
if l.systray != nil {
|
||||||
|
l.systray.recreateMenu()
|
||||||
|
}
|
||||||
|
}, win)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// showDownloadProgressWindow renders the download progress popup shared by every
|
||||||
|
// "download/upgrade LocalAI" entry point. It owns the progress bar, the
|
||||||
|
// human-readable byte readout, resume-aware retry, and content-fit window
|
||||||
|
// sizing so the behaviour stays identical everywhere. onSuccess runs (on the UI
|
||||||
|
// goroutine) once the download verifies, and is responsible for the success
|
||||||
|
// dialog and any follow-up; the window is passed in so it can be parented/closed.
|
||||||
|
func (l *Launcher) showDownloadProgressWindow(version, title string, onSuccess func(win fyne.Window)) {
|
||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create progress window
|
|
||||||
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
progressWindow.CenterOnScreen()
|
||||||
progressWindow.SetCloseIntercept(func() {
|
progressWindow.SetCloseIntercept(func() {
|
||||||
progressWindow.Close()
|
progressWindow.Close()
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
progressBar := widget.NewProgressBar()
|
||||||
progressBar.SetValue(0)
|
progressBar.SetValue(0)
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
// Status label. Truncate with an ellipsis so a long "Download failed:
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
// <url>" message can't stretch the window (and progress bar) to fit the
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
// whole error on one line.
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
statusLabel := widget.NewLabel("Preparing download...")
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
||||||
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
log.Printf("Failed to parse URL: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
l.app.OpenURL(releaseNotesURL)
|
l.app.OpenURL(releaseNotesURL)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress container
|
// Retry button: hidden until a download fails. GitHub downloads are
|
||||||
progressContainer := container.NewVBox(
|
// flaky, and the underlying download resumes from the partial file, so
|
||||||
|
// a retry continues where it left off rather than starting over.
|
||||||
|
retryButton := widget.NewButton("Retry", nil)
|
||||||
|
retryButton.Importance = widget.HighImportance
|
||||||
|
retryButton.Hide()
|
||||||
|
|
||||||
|
buttonRow := container.NewHBox(releaseNotesButton, retryButton)
|
||||||
|
content := container.NewVBox(
|
||||||
widget.NewLabel(title),
|
widget.NewLabel(title),
|
||||||
progressBar,
|
progressBar,
|
||||||
statusLabel,
|
statusLabel,
|
||||||
widget.NewSeparator(),
|
widget.NewSeparator(),
|
||||||
releaseNotesButton,
|
buttonRow,
|
||||||
)
|
)
|
||||||
|
progressWindow.SetContent(content)
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
var startDownload func()
|
||||||
progressWindow.Show()
|
startDownload = func() {
|
||||||
|
retryButton.Hide()
|
||||||
|
progressBar.SetValue(0)
|
||||||
|
statusLabel.SetText("Preparing download...")
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
// Start download in background
|
go func() {
|
||||||
go func() {
|
err := l.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
err := l.DownloadUpdate(version, func(progress float64) {
|
fyne.Do(func() {
|
||||||
// Update progress bar
|
if total > 0 {
|
||||||
fyne.Do(func() {
|
progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
progressBar.SetValue(progress)
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s / %s", formatBytes(downloaded), formatBytes(total)))
|
||||||
percentage := int(progress * 100)
|
} else {
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s", formatBytes(downloaded)))
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
fyne.Do(func() {
|
||||||
fyne.Do(func() {
|
if err != nil {
|
||||||
if err != nil {
|
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
retryButton.Show()
|
||||||
// Show error dialog
|
resizeToContent(progressWindow, content)
|
||||||
dialog.ShowError(err, progressWindow)
|
return
|
||||||
} else {
|
}
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
progressBar.SetValue(1.0)
|
||||||
|
statusLabel.SetText("Download complete")
|
||||||
|
onSuccess(progressWindow)
|
||||||
|
})
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
retryButton.OnTapped = startDownload
|
||||||
|
|
||||||
// Show success dialog
|
progressWindow.Show()
|
||||||
dialog.ShowConfirm("Installation Complete",
|
startDownload()
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status and refresh systray menu
|
|
||||||
l.updateStatus("LocalAI installed successfully")
|
|
||||||
|
|
||||||
if l.systray != nil {
|
|
||||||
l.systray.recreateMenu()
|
|
||||||
}
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resizeToContent sizes a window to fit its content (with a sane minimum width)
|
||||||
|
// so the dialog doesn't show a large blank gap below the last widget.
|
||||||
|
func resizeToContent(w fyne.Window, content fyne.CanvasObject) {
|
||||||
|
size := content.MinSize()
|
||||||
|
if size.Width < 400 {
|
||||||
|
size.Width = 400
|
||||||
|
}
|
||||||
|
w.Resize(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatBytes renders a byte count as a human-readable size (e.g. "12.3 MB").
|
||||||
|
func formatBytes(b int64) string {
|
||||||
|
const unit = 1024
|
||||||
|
if b < unit {
|
||||||
|
return fmt.Sprintf("%d B", b)
|
||||||
|
}
|
||||||
|
div, exp := int64(unit), 0
|
||||||
|
for n := b / unit; n >= unit; n /= unit {
|
||||||
|
div *= unit
|
||||||
|
exp++
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||||
|
}
|
||||||
|
|
||||||
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
||||||
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
||||||
scanner := bufio.NewScanner(reader)
|
scanner := bufio.NewScanner(reader)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -50,6 +51,12 @@ type ReleaseManager struct {
|
|||||||
ChecksumsPath string
|
ChecksumsPath string
|
||||||
// MetadataPath is where version metadata is stored
|
// MetadataPath is where version metadata is stored
|
||||||
MetadataPath string
|
MetadataPath string
|
||||||
|
// BaseDownloadURL is the base URL release assets are downloaded from
|
||||||
|
// (defaults to https://github.com; overridable for testing)
|
||||||
|
BaseDownloadURL string
|
||||||
|
// RetryBackoff is the base wait between download attempts; the Nth retry
|
||||||
|
// waits N*RetryBackoff (defaults to 1s; lowered in tests)
|
||||||
|
RetryBackoff time.Duration
|
||||||
// HTTPClient is the HTTP client used for downloads
|
// HTTPClient is the HTTP client used for downloads
|
||||||
HTTPClient *http.Client
|
HTTPClient *http.Client
|
||||||
}
|
}
|
||||||
@@ -62,28 +69,94 @@ func NewReleaseManager() *ReleaseManager {
|
|||||||
metadataPath := filepath.Join(homeDir, ".localai", "metadata")
|
metadataPath := filepath.Join(homeDir, ".localai", "metadata")
|
||||||
|
|
||||||
return &ReleaseManager{
|
return &ReleaseManager{
|
||||||
GitHubOwner: "mudler",
|
GitHubOwner: "mudler",
|
||||||
GitHubRepo: "LocalAI",
|
GitHubRepo: "LocalAI",
|
||||||
BinaryPath: binaryPath,
|
BinaryPath: binaryPath,
|
||||||
CurrentVersion: internal.PrintableVersion(),
|
CurrentVersion: internal.PrintableVersion(),
|
||||||
ChecksumsPath: checksumsPath,
|
ChecksumsPath: checksumsPath,
|
||||||
MetadataPath: metadataPath,
|
MetadataPath: metadataPath,
|
||||||
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
BaseDownloadURL: "https://github.com",
|
||||||
|
RetryBackoff: 1 * time.Second,
|
||||||
|
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLatestRelease fetches the latest release information from GitHub
|
// GetLatestRelease resolves the latest LocalAI release.
|
||||||
|
//
|
||||||
|
// It first follows the github.com "releases/latest" redirect, which reveals the
|
||||||
|
// latest tag in the final URL and—crucially—is NOT subject to the
|
||||||
|
// 60-requests/hour unauthenticated rate limit of api.github.com. That limit is
|
||||||
|
// per-IP, so on shared/NAT/CGNAT/cloud addresses the API returns 403 almost
|
||||||
|
// immediately (e.g. on a fresh install with no LocalAI present yet). The
|
||||||
|
// redirect avoids that entirely. The richer JSON API is kept only as a fallback.
|
||||||
|
//
|
||||||
|
// Only the version is consumed by callers, so the redirect's tag is sufficient.
|
||||||
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
||||||
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
version, redirectErr := rm.latestVersionFromRedirect()
|
||||||
|
if redirectErr == nil {
|
||||||
|
return &Release{Version: version}, nil
|
||||||
|
}
|
||||||
|
log.Printf("Could not resolve latest version via release redirect (%v); falling back to GitHub API", redirectErr)
|
||||||
|
|
||||||
|
release, apiErr := rm.latestReleaseFromAPI()
|
||||||
|
if apiErr != nil {
|
||||||
|
// Surface both failures so a rate-limited API doesn't mask the (usually
|
||||||
|
// more relevant) redirect error.
|
||||||
|
return nil, fmt.Errorf("failed to fetch latest release: %v (redirect: %v)", apiErr, redirectErr)
|
||||||
|
}
|
||||||
|
return release, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestVersionFromRedirect returns the latest tag by following the github.com
|
||||||
|
// "releases/latest" redirect to ".../releases/tag/<tag>".
|
||||||
|
func (rm *ReleaseManager) latestVersionFromRedirect() (string, error) {
|
||||||
|
url := fmt.Sprintf("%s/%s/%s/releases/latest", rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
resp, err := rm.HTTPClient.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("unexpected status %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// After the redirect is followed, the final request URL is the tag page.
|
||||||
|
version := path.Base(resp.Request.URL.Path)
|
||||||
|
if version == "" || version == "." || version == "latest" {
|
||||||
|
return "", fmt.Errorf("could not determine version from %s", resp.Request.URL.String())
|
||||||
|
}
|
||||||
|
return version, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestReleaseFromAPI fetches the latest release JSON from api.github.com. This
|
||||||
|
// is the fallback path; it is rate-limited unless GITHUB_TOKEN is set.
|
||||||
|
func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
|
||||||
|
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/vnd.github+json")
|
||||||
|
// An optional token lifts the unauthenticated 60/hour limit to 5000/hour.
|
||||||
|
if token := os.Getenv("GITHUB_TOKEN"); token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: status %d", resp.StatusCode)
|
if (resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests) &&
|
||||||
|
resp.Header.Get("X-RateLimit-Remaining") == "0" {
|
||||||
|
return nil, fmt.Errorf("GitHub API rate limit exceeded (status %d); retry later or set GITHUB_TOKEN to raise the limit", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the JSON response properly
|
// Parse the JSON response properly
|
||||||
@@ -106,7 +179,7 @@ func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadRelease downloads a specific version of LocalAI
|
// DownloadRelease downloads a specific version of LocalAI
|
||||||
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
// Ensure the binary directory exists
|
// Ensure the binary directory exists
|
||||||
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
||||||
return fmt.Errorf("failed to create binary directory: %w", err)
|
return fmt.Errorf("failed to create binary directory: %w", err)
|
||||||
@@ -117,16 +190,16 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
||||||
|
|
||||||
// Download the binary
|
// Download the binary
|
||||||
downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s",
|
downloadURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/%s",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
||||||
|
|
||||||
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
||||||
return fmt.Errorf("failed to download binary: %w", err)
|
return fmt.Errorf("failed to download binary: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download and verify checksums
|
// Download and verify checksums
|
||||||
checksumURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
checksumURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, version)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, version)
|
||||||
|
|
||||||
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
||||||
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
||||||
@@ -154,6 +227,10 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
// Verify the checksum if we have a checksum file
|
// Verify the checksum if we have a checksum file
|
||||||
if _, err := os.Stat(checksumPath); err == nil {
|
if _, err := os.Stat(checksumPath); err == nil {
|
||||||
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
||||||
|
// Discard the corrupt binary (and any leftover partial) so the next
|
||||||
|
// retry starts from a clean slate rather than resuming corruption.
|
||||||
|
os.Remove(localPath)
|
||||||
|
os.Remove(localPath + ".part")
|
||||||
return fmt.Errorf("checksum verification failed: %w", err)
|
return fmt.Errorf("checksum verification failed: %w", err)
|
||||||
}
|
}
|
||||||
log.Printf("Checksum verification successful")
|
log.Printf("Checksum verification successful")
|
||||||
@@ -196,44 +273,88 @@ func (rm *ReleaseManager) GetBinaryName(version string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
||||||
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(downloaded, total int64)) error {
|
||||||
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
// downloadFileWithRetry downloads a file from a URL with retry logic
|
// downloadFileWithRetry downloads a file with retry and HTTP Range resume.
|
||||||
func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCallback func(float64), maxRetries int) error {
|
//
|
||||||
|
// The body is streamed to "<dest>.part" and only renamed to dest on success, so
|
||||||
|
// a dropped connection leaves a partial file that the next attempt continues via
|
||||||
|
// a "Range: bytes=N-" request instead of restarting from zero. This matters for
|
||||||
|
// GitHub release downloads, which are large and flaky.
|
||||||
|
func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallback func(downloaded, total int64), maxRetries int) error {
|
||||||
|
partPath := dest + ".part"
|
||||||
var lastErr error
|
var lastErr error
|
||||||
|
|
||||||
for attempt := 1; attempt <= maxRetries; attempt++ {
|
for attempt := 1; attempt <= maxRetries; attempt++ {
|
||||||
if attempt > 1 {
|
if attempt > 1 {
|
||||||
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
||||||
time.Sleep(time.Duration(attempt) * time.Second)
|
time.Sleep(time.Duration(attempt) * rm.RetryBackoff)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
// Resume from however much we already have on disk.
|
||||||
|
var offset int64
|
||||||
|
if fi, err := os.Stat(partPath); err == nil {
|
||||||
|
offset = fi.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if offset > 0 {
|
||||||
|
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = err
|
lastErr = err
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
switch resp.StatusCode {
|
||||||
|
case http.StatusOK:
|
||||||
|
// Server ignored the Range (or we had nothing): start fresh.
|
||||||
|
offset = 0
|
||||||
|
case http.StatusPartialContent:
|
||||||
|
// Resume: append to the existing partial file.
|
||||||
|
case http.StatusRequestedRangeNotSatisfiable:
|
||||||
|
// Stale or already-complete partial: discard and restart fresh.
|
||||||
|
resp.Body.Close()
|
||||||
|
os.Remove(partPath)
|
||||||
|
lastErr = fmt.Errorf("partial download no longer valid (status %s), restarting", resp.Status)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
out, err := os.Create(filepath)
|
var out *os.File
|
||||||
|
if offset > 0 {
|
||||||
|
out, err = os.OpenFile(partPath, os.O_WRONLY|os.O_APPEND, 0644)
|
||||||
|
} else {
|
||||||
|
out, err = os.Create(partPath)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a progress reader if callback is provided
|
// On a 206 the Content-Length is the remaining bytes, so the full size
|
||||||
|
// is what we already have plus what's still to come.
|
||||||
|
total := resp.ContentLength
|
||||||
|
if offset > 0 && total > 0 {
|
||||||
|
total += offset
|
||||||
|
}
|
||||||
|
|
||||||
var reader io.Reader = resp.Body
|
var reader io.Reader = resp.Body
|
||||||
if progressCallback != nil && resp.ContentLength > 0 {
|
if progressCallback != nil && total > 0 {
|
||||||
reader = &progressReader{
|
reader = &progressReader{
|
||||||
Reader: resp.Body,
|
Reader: resp.Body,
|
||||||
Total: resp.ContentLength,
|
Total: total,
|
||||||
|
Current: offset,
|
||||||
Callback: progressCallback,
|
Callback: progressCallback,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,11 +364,14 @@ func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCa
|
|||||||
out.Close()
|
out.Close()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Keep the partial file so the next attempt can resume from it.
|
||||||
lastErr = err
|
lastErr = err
|
||||||
os.Remove(filepath)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := os.Rename(partPath, dest); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,20 +446,21 @@ func (rm *ReleaseManager) saveVersionMetadata(version string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// progressReader wraps an io.Reader to provide download progress
|
// progressReader wraps an io.Reader to provide download progress as a
|
||||||
|
// (downloaded, total) byte count so callers can render both a progress bar and
|
||||||
|
// a human-readable size.
|
||||||
type progressReader struct {
|
type progressReader struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
Total int64
|
Total int64
|
||||||
Current int64
|
Current int64
|
||||||
Callback func(float64)
|
Callback func(downloaded, total int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pr *progressReader) Read(p []byte) (int, error) {
|
func (pr *progressReader) Read(p []byte) (int, error) {
|
||||||
n, err := pr.Reader.Read(p)
|
n, err := pr.Reader.Read(p)
|
||||||
pr.Current += int64(n)
|
pr.Current += int64(n)
|
||||||
if pr.Callback != nil {
|
if pr.Callback != nil {
|
||||||
progress := float64(pr.Current) / float64(pr.Total)
|
pr.Callback(pr.Current, pr.Total)
|
||||||
pr.Callback(progress)
|
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,17 @@
|
|||||||
package launcher_test
|
package launcher_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
@@ -178,4 +186,221 @@ var _ = Describe("ReleaseManager", func() {
|
|||||||
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("DownloadRelease resume and retry", func() {
|
||||||
|
var (
|
||||||
|
version string
|
||||||
|
binaryName string
|
||||||
|
content []byte
|
||||||
|
checksums string
|
||||||
|
finalPath string
|
||||||
|
partPath string
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
version = "v9.9.9"
|
||||||
|
binaryName = rm.GetBinaryName(version)
|
||||||
|
|
||||||
|
// Deterministic, non-trivial content so resume/append bugs surface.
|
||||||
|
content = make([]byte, 4096)
|
||||||
|
for i := range content {
|
||||||
|
content[i] = byte(i % 251)
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256(content)
|
||||||
|
checksums = fmt.Sprintf("%s %s\n", hex.EncodeToString(sum[:]), binaryName)
|
||||||
|
|
||||||
|
finalPath = filepath.Join(tempDir, "local-ai")
|
||||||
|
partPath = finalPath + ".part"
|
||||||
|
|
||||||
|
// Isolate the persistent checksum/metadata dirs to the temp dir so
|
||||||
|
// the test never touches the real ~/.localai and existing checksum
|
||||||
|
// files don't short-circuit the download.
|
||||||
|
rm.ChecksumsPath = filepath.Join(tempDir, "checksums")
|
||||||
|
rm.MetadataPath = filepath.Join(tempDir, "metadata")
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
rm.RetryBackoff = time.Millisecond
|
||||||
|
|
||||||
|
Expect(os.MkdirAll(tempDir, 0755)).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("resumes from a partial .part file using a Range request", func() {
|
||||||
|
Expect(os.WriteFile(partPath, content[:1024], 0644)).To(Succeed())
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
sawRange := false
|
||||||
|
binBytesServed := 0
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
mu.Lock()
|
||||||
|
sawRange = true
|
||||||
|
mu.Unlock()
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
n, _ := w.Write(content[start:])
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
n, _ := w.Write(content)
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
Expect(sawRange).To(BeTrue(), "expected the download to resume with a Range request")
|
||||||
|
Expect(binBytesServed).To(Equal(len(content)-1024), "expected only the remaining bytes to be served")
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("starts fresh when the server ignores the Range header (200)", func() {
|
||||||
|
// A stale/garbage partial that must NOT be appended to.
|
||||||
|
Expect(os.WriteFile(partPath, []byte("garbage-garbage-garbage"), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Ignore any Range and always serve the full body.
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("restarts the download when the partial is stale (416)", func() {
|
||||||
|
// Oversized partial -> requested Range start is beyond the content.
|
||||||
|
Expect(os.WriteFile(partPath, make([]byte, len(content)+10), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
if start >= len(content) {
|
||||||
|
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
_, _ = w.Write(content[start:])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes the downloaded file when checksum verification fails", func() {
|
||||||
|
bad := []byte("this is definitely not the expected binary content")
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
// Checksums are for `content`, but we serve `bad`.
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(bad)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("checksum"))
|
||||||
|
Expect(finalPath).ToNot(BeAnExistingFile())
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("reports progress as downloaded and total byte counts", func() {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Length", strconv.Itoa(len(content)))
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var lastDownloaded, lastTotal int64
|
||||||
|
err := rm.DownloadRelease(version, func(downloaded, total int64) {
|
||||||
|
mu.Lock()
|
||||||
|
lastDownloaded = downloaded
|
||||||
|
lastTotal = total
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(lastTotal).To(Equal(int64(len(content))))
|
||||||
|
Expect(lastDownloaded).To(Equal(int64(len(content))))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("GetLatestRelease", func() {
|
||||||
|
It("resolves the latest version from the releases/latest redirect", func() {
|
||||||
|
// The github.com redirect path must be preferred over the
|
||||||
|
// rate-limited api.github.com, so a working redirect yields the tag
|
||||||
|
// without ever needing the API.
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/latest"):
|
||||||
|
http.Redirect(w, r, "/owner/repo/releases/tag/v9.9.9", http.StatusFound)
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/tag/v9.9.9"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
|
||||||
|
release, err := rm.GetLatestRelease()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(release.Version).To(Equal("v9.9.9"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -443,84 +443,23 @@ func (sm *SystrayManager) showStartupErrorDialog(err error) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading updates
|
// showDownloadProgress shows a progress window for downloading updates. The
|
||||||
|
// progress UI (byte readout, resume-aware retry, sizing) is shared with the
|
||||||
|
// other download entry points via the launcher; only the post-success behaviour
|
||||||
|
// (restart prompt + systray refresh) is specific to the update flow.
|
||||||
func (sm *SystrayManager) showDownloadProgress(version string) {
|
func (sm *SystrayManager) showDownloadProgress(version string) {
|
||||||
// Create a new window for download progress
|
sm.launcher.showDownloadProgressWindow(version, fmt.Sprintf("Downloading LocalAI version %s", version), func(win fyne.Window) {
|
||||||
progressWindow := sm.app.NewWindow("Downloading LocalAI Update")
|
dialog.ShowConfirm("Update Downloaded",
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
||||||
progressWindow.CenterOnScreen()
|
func(restart bool) {
|
||||||
|
if restart {
|
||||||
|
sm.app.Quit()
|
||||||
|
}
|
||||||
|
win.Close()
|
||||||
|
}, win)
|
||||||
|
|
||||||
// Progress bar
|
sm.hasUpdateAvailable = false
|
||||||
progressBar := widget.NewProgressBar()
|
sm.latestVersion = ""
|
||||||
progressBar.SetValue(0)
|
sm.recreateMenu()
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := sm.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
sm.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(fmt.Sprintf("Downloading LocalAI version %s", version)),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := sm.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show restart dialog
|
|
||||||
dialog.ShowConfirm("Update Downloaded",
|
|
||||||
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
|
||||||
func(restart bool) {
|
|
||||||
if restart {
|
|
||||||
sm.app.Quit()
|
|
||||||
}
|
|
||||||
progressWindow.Close()
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Update systray menu
|
|
||||||
if err == nil {
|
|
||||||
sm.hasUpdateAvailable = false
|
|
||||||
sm.latestVersion = ""
|
|
||||||
sm.recreateMenu()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -490,14 +490,19 @@ func (ui *LauncherUI) downloadUpdate() {
|
|||||||
ui.UpdateStatus("Downloading update " + version + "...")
|
ui.UpdateStatus("Downloading update " + version + "...")
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
err := ui.launcher.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
ui.progressBar.SetValue(progress)
|
if total > 0 {
|
||||||
|
ui.progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
// Update status with percentage
|
// The progress bar already shows the percentage, so report the
|
||||||
percentage := int(progress * 100)
|
// human-readable size here instead of repeating the percent.
|
||||||
ui.UpdateStatus(fmt.Sprintf("Downloading update %s... %d%%", version, percentage))
|
if total > 0 {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s / %s", version, formatBytes(downloaded), formatBytes(total)))
|
||||||
|
} else {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s", version, formatBytes(downloaded)))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
@@ -598,82 +603,6 @@ func (ui *LauncherUI) LoadConfiguration() {
|
|||||||
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading LocalAI
|
|
||||||
func (ui *LauncherUI) showDownloadProgress(version, title string) {
|
|
||||||
fyne.DoAndWait(func() {
|
|
||||||
// Create progress window using the launcher's app
|
|
||||||
progressWindow := ui.launcher.app.NewWindow("Downloading LocalAI")
|
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
|
||||||
progressBar.SetValue(0)
|
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := ui.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ui.launcher.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(title),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show success dialog
|
|
||||||
dialog.ShowConfirm("Installation Complete",
|
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status
|
|
||||||
ui.UpdateStatus("LocalAI installed successfully")
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateRunningState updates UI based on LocalAI running state
|
// UpdateRunningState updates UI based on LocalAI running state
|
||||||
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
|
|||||||
@@ -356,6 +356,12 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
|||||||
PrefixConfig: prefixCfg,
|
PrefixConfig: prefixCfg,
|
||||||
Pressure: pressure,
|
Pressure: pressure,
|
||||||
SharedModels: cfg.Distributed.SharedModels,
|
SharedModels: cfg.Distributed.SharedModels,
|
||||||
|
// Cap how long a cold load may hold the per-model advisory lock: the
|
||||||
|
// configured backend.install deadline plus a margin for file staging and
|
||||||
|
// the remote LoadModel. Derived from the install timeout so raising it
|
||||||
|
// (for slow links pulling multi-GB images) widens the ceiling too,
|
||||||
|
// instead of letting the static default cut a legitimately slow load.
|
||||||
|
ModelLoadCeiling: cfg.Distributed.BackendInstallTimeoutOrDefault() + 10*time.Minute,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wire staging-progress broadcasting so file-staging shows up on every
|
// Wire staging-progress broadcasting so file-staging shows up on every
|
||||||
|
|||||||
@@ -130,6 +130,20 @@ func WithLockCtx(ctx context.Context, db *gorm.DB, key int64, fn func() error) e
|
|||||||
}
|
}
|
||||||
defer conn.Close()
|
defer conn.Close()
|
||||||
|
|
||||||
|
// Neutralize any deployment-wide lock_timeout on this dedicated connection.
|
||||||
|
// Operators commonly set a short global lock_timeout (on the role or
|
||||||
|
// database) to bound ordinary row-lock waits. Applied to the blocking
|
||||||
|
// pg_advisory_lock below, it aborts the wait with SQLSTATE 55P03 and turns
|
||||||
|
// LocalAI's intentional cross-replica "wait your turn, then re-check"
|
||||||
|
// coordination into a hard error for the caller (e.g. a chat request that
|
||||||
|
// just wanted to reuse a model another replica is loading). Let the Go
|
||||||
|
// context be the single source of truth for how long we wait instead.
|
||||||
|
if _, err := conn.ExecContext(ctx, "SET lock_timeout = 0"); err != nil {
|
||||||
|
return fmt.Errorf("advisorylock: disabling lock_timeout: %w", err)
|
||||||
|
}
|
||||||
|
// Restore the session default before this pooled connection is reused.
|
||||||
|
defer func() { _, _ = conn.ExecContext(context.Background(), "RESET lock_timeout") }()
|
||||||
|
|
||||||
if _, err := conn.ExecContext(ctx, "SELECT pg_advisory_lock($1)", key); err != nil {
|
if _, err := conn.ExecContext(ctx, "SELECT pg_advisory_lock($1)", key); err != nil {
|
||||||
return fmt.Errorf("advisorylock: acquiring lock %d: %w", key, err)
|
return fmt.Errorf("advisorylock: acquiring lock %d: %w", key, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -158,6 +158,53 @@ var _ = Describe("AdvisoryLock", func() {
|
|||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("waits out a short server-side lock_timeout instead of failing with 55P03", func() {
|
||||||
|
const lockKey int64 = 703
|
||||||
|
|
||||||
|
// Reproduce the production deployment that triggered this: a short
|
||||||
|
// global lock_timeout set on the database. Without the fix, a waiter
|
||||||
|
// blocked on pg_advisory_lock() is aborted by the server after this
|
||||||
|
// window and surfaces SQLSTATE 55P03 ("canceling statement due to
|
||||||
|
// lock timeout") to the caller instead of waiting for its turn.
|
||||||
|
Expect(db.Exec("ALTER DATABASE testdb SET lock_timeout = '300ms'").Error).ToNot(HaveOccurred())
|
||||||
|
sqlDB, err := db.DB()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
// Drop pooled connections so subsequent ones reconnect and inherit
|
||||||
|
// the new database-level lock_timeout default.
|
||||||
|
sqlDB.SetMaxIdleConns(0)
|
||||||
|
|
||||||
|
holding := make(chan struct{})
|
||||||
|
released := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer GinkgoRecover()
|
||||||
|
herr := WithLockCtx(context.Background(), db, lockKey, func() error {
|
||||||
|
close(holding)
|
||||||
|
// Hold well past the 300ms server lock_timeout.
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
Expect(herr).ToNot(HaveOccurred())
|
||||||
|
close(released)
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-holding // ensure the holder owns the lock before we contend
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
executed := false
|
||||||
|
start := time.Now()
|
||||||
|
werr := WithLockCtx(ctx, db, lockKey, func() error {
|
||||||
|
executed = true
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
Expect(werr).ToNot(HaveOccurred(),
|
||||||
|
"waiter should wait out the in-progress hold, not fail with lock_timeout (55P03)")
|
||||||
|
Expect(executed).To(BeTrue())
|
||||||
|
Expect(time.Since(start)).To(BeNumerically(">=", 400*time.Millisecond),
|
||||||
|
"waiter should have actually waited for the holder to release")
|
||||||
|
<-released
|
||||||
|
})
|
||||||
|
|
||||||
It("serializes concurrent WithLockCtx on same key", func() {
|
It("serializes concurrent WithLockCtx on same key", func() {
|
||||||
const lockKey int64 = 702
|
const lockKey int64 = 702
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,13 @@ type SmartRouterOptions struct {
|
|||||||
// the absolute model paths untouched so the worker loads them directly from
|
// the absolute model paths untouched so the worker loads them directly from
|
||||||
// the shared volume (#10556). See config.DistributedConfig.SharedModels.
|
// the shared volume (#10556). See config.DistributedConfig.SharedModels.
|
||||||
SharedModels bool
|
SharedModels bool
|
||||||
|
// ModelLoadCeiling is the hard upper bound on how long a single cold-load
|
||||||
|
// attempt (node selection -> backend install -> file staging -> LoadModel)
|
||||||
|
// may run while holding the per-model advisory lock. It backstops every
|
||||||
|
// sub-step's own timeout so a wedged worker can never pin the lock - and
|
||||||
|
// every other replica's request for that model - indefinitely. Zero selects
|
||||||
|
// defaultModelLoadCeiling.
|
||||||
|
ModelLoadCeiling time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// SmartRouter routes inference requests to the best available backend node.
|
// SmartRouter routes inference requests to the best available backend node.
|
||||||
@@ -101,8 +108,18 @@ type SmartRouter struct {
|
|||||||
// sharedModels skips file staging when all nodes mount the same models
|
// sharedModels skips file staging when all nodes mount the same models
|
||||||
// directory at the same path (see SmartRouterOptions.SharedModels).
|
// directory at the same path (see SmartRouterOptions.SharedModels).
|
||||||
sharedModels bool
|
sharedModels bool
|
||||||
|
// modelLoadCeiling bounds how long a cold load may hold the per-model
|
||||||
|
// advisory lock (see SmartRouterOptions.ModelLoadCeiling).
|
||||||
|
modelLoadCeiling time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// defaultModelLoadCeiling is the fallback hold ceiling for a cold model load.
|
||||||
|
// It must comfortably exceed the slowest legitimate load - a multi-GB backend
|
||||||
|
// install (DefaultBackendInstallTimeout, 15m) plus staging and the remote
|
||||||
|
// LoadModel (5m) - so it never cuts a real load short; it only ever fires when
|
||||||
|
// a step is genuinely wedged (e.g. a worker that died mid-install).
|
||||||
|
const defaultModelLoadCeiling = 25 * time.Minute
|
||||||
|
|
||||||
// probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
|
// probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
|
||||||
// trusted before the next request re-probes. Matches healthCheckTTL in
|
// trusted before the next request re-probes. Matches healthCheckTTL in
|
||||||
// pkg/model/model.go so the single-process and distributed paths share a
|
// pkg/model/model.go so the single-process and distributed paths share a
|
||||||
@@ -117,6 +134,10 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
|
|||||||
if factory == nil {
|
if factory == nil {
|
||||||
factory = &tokenClientFactory{token: opts.AuthToken}
|
factory = &tokenClientFactory{token: opts.AuthToken}
|
||||||
}
|
}
|
||||||
|
ceiling := opts.ModelLoadCeiling
|
||||||
|
if ceiling <= 0 {
|
||||||
|
ceiling = defaultModelLoadCeiling
|
||||||
|
}
|
||||||
return &SmartRouter{
|
return &SmartRouter{
|
||||||
registry: registry,
|
registry: registry,
|
||||||
unloader: opts.Unloader,
|
unloader: opts.Unloader,
|
||||||
@@ -131,6 +152,7 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
|
|||||||
prefixConfig: opts.PrefixConfig,
|
prefixConfig: opts.PrefixConfig,
|
||||||
pressure: opts.Pressure,
|
pressure: opts.Pressure,
|
||||||
sharedModels: opts.SharedModels,
|
sharedModels: opts.SharedModels,
|
||||||
|
modelLoadCeiling: ceiling,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -383,11 +405,19 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
|
|||||||
// the request context. If staging were bound to it, the multi-GB upload
|
// the request context. If staging were bound to it, the multi-GB upload
|
||||||
// aborts with "context canceled" mid-transfer and large models can never
|
// aborts with "context canceled" mid-transfer and large models can never
|
||||||
// finish staging (the model-load outage). WithoutCancel keeps the request's
|
// finish staging (the model-load outage). WithoutCancel keeps the request's
|
||||||
// values (prefix chain, etc.) but drops its cancellation/deadline. Each
|
// values (prefix chain, etc.) but drops its cancellation/deadline.
|
||||||
// long step still has its own bound (the file stager's resume budget,
|
//
|
||||||
// LoadModel's 5m timeout), and the per-model advisory lock below de-dupes
|
// Detaching from the caller is necessary, but it must not be unbounded: the
|
||||||
// concurrent loaders across replicas.
|
// load runs while holding the per-model advisory lock, and a worker that
|
||||||
loadCtx := context.WithoutCancel(ctx)
|
// dies mid-install (its backend.install never replies) would otherwise pin
|
||||||
|
// that lock (and every other replica's request for the same model) until
|
||||||
|
// the NATS install deadline alone expires. Re-impose a single hard ceiling
|
||||||
|
// over the whole sequence so the lock is always released in bounded time,
|
||||||
|
// even if a sub-step wedges. Each long step still has its own (tighter)
|
||||||
|
// bound; this only backstops them. The per-model advisory lock below
|
||||||
|
// de-dupes concurrent loaders across replicas.
|
||||||
|
loadCtx, cancelLoad := context.WithTimeout(context.WithoutCancel(ctx), r.modelLoadCeiling)
|
||||||
|
defer cancelLoad()
|
||||||
loadModel := func(ctx context.Context) (*RouteResult, error) {
|
loadModel := func(ctx context.Context) (*RouteResult, error) {
|
||||||
// Re-check after acquiring lock — another request may have loaded it
|
// Re-check after acquiring lock — another request may have loaded it
|
||||||
node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
|
node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
|
||||||
@@ -916,7 +946,14 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
|
|||||||
}
|
}
|
||||||
|
|
||||||
key := fmt.Sprintf("%s|%s|%s|%d", node.ID, backendType, modelID, replicaIndex)
|
key := fmt.Sprintf("%s|%s|%s|%d", node.ID, backendType, modelID, replicaIndex)
|
||||||
v, err, _ := r.installFlight.Do(key, func() (any, error) {
|
// DoChan rather than Do so this wait honors ctx cancellation. InstallBackend
|
||||||
|
// blocks for its full NATS deadline (15m by default) when a worker accepts
|
||||||
|
// the request but never replies (e.g. it died mid-install). Without ctx
|
||||||
|
// awareness the caller (holding the per-model advisory lock) would sit there
|
||||||
|
// the whole time; here a cancelled ctx (typically the model-load ceiling)
|
||||||
|
// frees the caller promptly. The shared install keeps running in the
|
||||||
|
// background and still coalesces other callers via singleflight.
|
||||||
|
resCh := r.installFlight.DoChan(key, func() (any, error) {
|
||||||
reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON, "", "", "", replicaIndex, "", nil)
|
reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON, "", "", "", replicaIndex, "", nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -931,10 +968,15 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
|
|||||||
}
|
}
|
||||||
return addr, nil
|
return addr, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
select {
|
||||||
return "", err
|
case <-ctx.Done():
|
||||||
|
return "", ctx.Err()
|
||||||
|
case res := <-resCh:
|
||||||
|
if res.Err != nil {
|
||||||
|
return "", res.Err
|
||||||
|
}
|
||||||
|
return res.Val.(string), nil
|
||||||
}
|
}
|
||||||
return v.(string), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *SmartRouter) buildClientForAddr(node *BackendNode, addr string, parallel bool) grpc.Backend {
|
func (r *SmartRouter) buildClientForAddr(node *BackendNode, addr string, parallel bool) grpc.Backend {
|
||||||
|
|||||||
@@ -493,6 +493,44 @@ var _ = Describe("SmartRouter", func() {
|
|||||||
Expect(result.Node.ID).To(Equal("n3"))
|
Expect(result.Node.ID).To(Equal("n3"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("worker wedges mid-install (dead node holding the lock)", func() {
|
||||||
|
It("aborts the load at the ModelLoadCeiling instead of blocking forever", func() {
|
||||||
|
// Simulate the production incident: the chosen worker accepts the
|
||||||
|
// backend.install but never replies (it died), so InstallBackend
|
||||||
|
// would otherwise block for its full NATS deadline (15m by
|
||||||
|
// default) while pinning the per-model advisory lock. Route must
|
||||||
|
// give up at the ceiling so the lock is released promptly.
|
||||||
|
reg.findAndLockErr = errors.New("not found")
|
||||||
|
reg.findIdleNode = &BackendNode{ID: "n4", Name: "dead-node", Address: "10.0.0.4:50051"}
|
||||||
|
|
||||||
|
block := make(chan struct{})
|
||||||
|
defer close(block) // let the background install goroutine drain at test end
|
||||||
|
unloader.installHook = func() { <-block }
|
||||||
|
|
||||||
|
router := NewSmartRouter(reg, SmartRouterOptions{
|
||||||
|
Unloader: unloader,
|
||||||
|
ClientFactory: factory,
|
||||||
|
ModelLoadCeiling: 200 * time.Millisecond,
|
||||||
|
})
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
start := time.Now()
|
||||||
|
go func() {
|
||||||
|
defer GinkgoRecover()
|
||||||
|
_, err := router.Route(context.Background(), "wedged-model",
|
||||||
|
"models/wedged.gguf", "llama-cpp",
|
||||||
|
&pb.ModelOptions{Model: "models/wedged.gguf"}, false)
|
||||||
|
done <- err
|
||||||
|
}()
|
||||||
|
|
||||||
|
var routeErr error
|
||||||
|
Eventually(done, 5*time.Second).Should(Receive(&routeErr),
|
||||||
|
"Route must not block on a wedged install past the ceiling")
|
||||||
|
Expect(routeErr).To(HaveOccurred())
|
||||||
|
Expect(time.Since(start)).To(BeNumerically("<", 5*time.Second))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
Describe("scheduleNewModel (mock-based, via Route)", func() {
|
Describe("scheduleNewModel (mock-based, via Route)", func() {
|
||||||
|
|||||||
@@ -3,26 +3,7 @@
|
|||||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
urls:
|
urls:
|
||||||
- https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
|
- https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
|
||||||
description: |
|
description: "# Qwen-AgentWorld-35B-A3B\n\n\U0001F4D1 Technical Report |\n\U0001F4D6 Blog |\n\U0001F917 Hugging Face |\n\U0001F916 ModelScope |\n\U0001F4BB GitHub |\n\U0001F5A5️ Demo\n\n> [!Note]\n> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.\n>\n> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.\n\n**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.\n\n## Highlights\n\n...\n"
|
||||||
# Qwen-AgentWorld-35B-A3B
|
|
||||||
|
|
||||||
📑 Technical Report |
|
|
||||||
📖 Blog |
|
|
||||||
🤗 Hugging Face |
|
|
||||||
🤖 ModelScope |
|
|
||||||
💻 GitHub |
|
|
||||||
🖥️ Demo
|
|
||||||
|
|
||||||
> [!Note]
|
|
||||||
> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.
|
|
||||||
>
|
|
||||||
> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.
|
|
||||||
|
|
||||||
**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
...
|
|
||||||
license: "apache-2.0"
|
license: "apache-2.0"
|
||||||
tags:
|
tags:
|
||||||
- llm
|
- llm
|
||||||
@@ -51,34 +32,7 @@
|
|||||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
urls:
|
urls:
|
||||||
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
|
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
|
||||||
description: |
|
description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-9B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n - **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.\n - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 9B\n\nThis model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-9B\nQwen3.5-9B\nQwen3.5-35B\nGemma4-12B\nGemma4-31B\n\nAgentic Coding\n\n...\n"
|
||||||
[](https://deep-reinforce.com/ornith.html)
|
|
||||||
|
|
||||||
# Ornith-1.0-9B-GGUF
|
|
||||||
|
|
||||||
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
|
|
||||||
|
|
||||||
Highlights:
|
|
||||||
|
|
||||||
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
|
|
||||||
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
|
|
||||||
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
|
|
||||||
|
|
||||||
## Ornith 1.0 9B
|
|
||||||
|
|
||||||
This model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.
|
|
||||||
|
|
||||||
### Benchmarks
|
|
||||||
|
|
||||||
Ornith-1.0-9B
|
|
||||||
Qwen3.5-9B
|
|
||||||
Qwen3.5-35B
|
|
||||||
Gemma4-12B
|
|
||||||
Gemma4-31B
|
|
||||||
|
|
||||||
Agentic Coding
|
|
||||||
|
|
||||||
...
|
|
||||||
license: "mit"
|
license: "mit"
|
||||||
tags:
|
tags:
|
||||||
- llm
|
- llm
|
||||||
@@ -105,34 +59,7 @@
|
|||||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
urls:
|
urls:
|
||||||
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-35B-GGUF
|
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-35B-GGUF
|
||||||
description: |
|
description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-35B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n - **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.\n - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 35B\n\nThis model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-35B\nQwen3.5-35B\nQwen3.6-35B\nGemma4-31B\nQwen3.5-397B\n\nAgentic Coding\n\n...\n"
|
||||||
[](https://deep-reinforce.com/ornith.html)
|
|
||||||
|
|
||||||
# Ornith-1.0-35B-GGUF
|
|
||||||
|
|
||||||
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
|
|
||||||
|
|
||||||
Highlights:
|
|
||||||
|
|
||||||
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
|
|
||||||
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
|
|
||||||
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
|
|
||||||
|
|
||||||
## Ornith 1.0 35B
|
|
||||||
|
|
||||||
This model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.
|
|
||||||
|
|
||||||
### Benchmarks
|
|
||||||
|
|
||||||
Ornith-1.0-35B
|
|
||||||
Qwen3.5-35B
|
|
||||||
Qwen3.6-35B
|
|
||||||
Gemma4-31B
|
|
||||||
Qwen3.5-397B
|
|
||||||
|
|
||||||
Agentic Coding
|
|
||||||
|
|
||||||
...
|
|
||||||
license: "mit"
|
license: "mit"
|
||||||
tags:
|
tags:
|
||||||
- llm
|
- llm
|
||||||
@@ -473,8 +400,8 @@
|
|||||||
use_tokenizer_template: true
|
use_tokenizer_template: true
|
||||||
files:
|
files:
|
||||||
- filename: llama-cpp/models/Qwythos-9B-Claude-Mythos-5-1M-GGUF/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
|
- filename: llama-cpp/models/Qwythos-9B-Claude-Mythos-5-1M-GGUF/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
|
||||||
sha256: 24ee22e0f5d9f0d3d615809607f365c728d9b0c3f3fb6eb19d8bd83a1c2933d8
|
|
||||||
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
|
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
|
||||||
|
sha256: 671c430bf18c961251338d639a3c02aac7451c39eed25874cad74287ac6cd38a
|
||||||
- filename: llama-cpp/mmproj/Qwythos-9B-Claude-Mythos-5-1M-GGUF/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
|
- filename: llama-cpp/mmproj/Qwythos-9B-Claude-Mythos-5-1M-GGUF/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
|
||||||
sha256: f70dc3509053962b0d0d3ee8a7eacebf5d60aa560cad78254ae8698516ae029f
|
sha256: f70dc3509053962b0d0d3ee8a7eacebf5d60aa560cad78254ae8698516ae029f
|
||||||
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
|
uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
|
|||||||
subAllow = []string{
|
subAllow = []string{
|
||||||
"agent.execute",
|
"agent.execute",
|
||||||
"agent.*.cancel",
|
"agent.*.cancel",
|
||||||
|
"gallery.*.cancel",
|
||||||
|
"gallery.*.progress",
|
||||||
"jobs.*.cancel",
|
"jobs.*.cancel",
|
||||||
"jobs.*.progress",
|
"jobs.*.progress",
|
||||||
"jobs.*.result",
|
"jobs.*.result",
|
||||||
@@ -27,6 +29,7 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
|
|||||||
"mcp.tools.execute",
|
"mcp.tools.execute",
|
||||||
"mcp.discovery",
|
"mcp.discovery",
|
||||||
prefix + ".backend.stop", // stop events drive MCP session cleanup
|
prefix + ".backend.stop", // stop events drive MCP session cleanup
|
||||||
|
"staging.*.progress",
|
||||||
"_INBOX.>",
|
"_INBOX.>",
|
||||||
}
|
}
|
||||||
pubAllow = []string{
|
pubAllow = []string{
|
||||||
|
|||||||
@@ -63,6 +63,72 @@ var defaultRetryPredicate = func(err error) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// layerDownloadRetries is the number of additional attempts made when a layer
|
||||||
|
// download fails with a transient/retryable network error.
|
||||||
|
var layerDownloadRetries = 3
|
||||||
|
|
||||||
|
// layerRetryBackoff returns the wait before retry attempt n (1-indexed). It is a
|
||||||
|
// variable so tests can eliminate the wait.
|
||||||
|
var layerRetryBackoff = func(attempt int) time.Duration {
|
||||||
|
d := defaultRetryBackoff.Duration
|
||||||
|
for i := 1; i < attempt; i++ {
|
||||||
|
d = time.Duration(float64(d) * defaultRetryBackoff.Factor)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// downloadLayerToFile streams a single compressed layer into dst, retrying on
|
||||||
|
// transient network errors (unexpected EOF, connection reset, ...). Large
|
||||||
|
// backend images (e.g. vLLM) are several GiB and a single dropped connection
|
||||||
|
// mid-stream previously failed the whole install with "unexpected EOF" and no
|
||||||
|
// recovery. The registry transport already retries manifest fetches via
|
||||||
|
// defaultRetryPredicate (see GetImage/GetImageDigest); this extends the same
|
||||||
|
// behaviour to the layer data stream. See issue #10577.
|
||||||
|
func downloadLayerToFile(ctx context.Context, layer v1.Layer, dst *os.File, progress *progressWriter) error {
|
||||||
|
var lastErr error
|
||||||
|
for attempt := 0; attempt <= layerDownloadRetries; attempt++ {
|
||||||
|
if attempt > 0 {
|
||||||
|
// Discard any partial data from the previous failed attempt.
|
||||||
|
if _, err := dst.Seek(0, io.SeekStart); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := dst.Truncate(0); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if progress != nil {
|
||||||
|
progress.written = 0
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-time.After(layerRetryBackoff(attempt)):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var w io.Writer = dst
|
||||||
|
if progress != nil {
|
||||||
|
w = io.MultiWriter(dst, progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
var reader io.ReadCloser
|
||||||
|
reader, lastErr = layer.Compressed()
|
||||||
|
if lastErr == nil {
|
||||||
|
_, lastErr = xio.Copy(ctx, w, reader)
|
||||||
|
_ = reader.Close()
|
||||||
|
}
|
||||||
|
if lastErr == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop early on context cancellation or non-retryable errors.
|
||||||
|
if ctx.Err() != nil || !defaultRetryPredicate(lastErr) {
|
||||||
|
return lastErr
|
||||||
|
}
|
||||||
|
logs.Warn.Printf("layer download failed (attempt %d/%d), retrying: %v", attempt+1, layerDownloadRetries+1, lastErr)
|
||||||
|
}
|
||||||
|
return lastErr
|
||||||
|
}
|
||||||
|
|
||||||
type progressWriter struct {
|
type progressWriter struct {
|
||||||
written int64
|
written int64
|
||||||
total int64
|
total int64
|
||||||
@@ -304,23 +370,17 @@ func DownloadOCIImageTar(ctx context.Context, img v1.Image, imageRef string, tar
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create progress writer for this layer
|
// Create progress writer for this layer
|
||||||
var writer io.Writer = file
|
var progress *progressWriter
|
||||||
if downloadStatus != nil {
|
if downloadStatus != nil {
|
||||||
writer = io.MultiWriter(file, &progressWriter{
|
progress = &progressWriter{
|
||||||
total: totalCompressedSize,
|
total: totalCompressedSize,
|
||||||
fileName: fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
|
fileName: fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
|
||||||
downloadStatus: downloadStatus,
|
downloadStatus: downloadStatus,
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download the compressed layer
|
// Download the compressed layer, retrying on transient network errors.
|
||||||
layerReader, err := layer.Compressed()
|
err = downloadLayerToFile(ctx, layer, file, progress)
|
||||||
if err != nil {
|
|
||||||
file.Close()
|
|
||||||
return fmt.Errorf("failed to get compressed layer: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = xio.Copy(ctx, writer, layerReader)
|
|
||||||
file.Close()
|
file.Close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to download layer %d: %v", i, err)
|
return fmt.Errorf("failed to download layer %d: %v", i, err)
|
||||||
|
|||||||
123
pkg/oci/layer_internal_test.go
Normal file
123
pkg/oci/layer_internal_test.go
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
package oci
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/types"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
// failingReader yields prefix bytes then returns err, simulating a connection
|
||||||
|
// dropped mid-stream while downloading a layer.
|
||||||
|
type failingReader struct {
|
||||||
|
prefix []byte
|
||||||
|
off int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *failingReader) Read(p []byte) (int, error) {
|
||||||
|
if r.off < len(r.prefix) {
|
||||||
|
n := copy(p, r.prefix[r.off:])
|
||||||
|
r.off += n
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
return 0, r.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// fakeLayer is a minimal v1.Layer whose Compressed() fails failUntil times with
|
||||||
|
// err (after emitting a partial prefix) before finally returning data in full.
|
||||||
|
type fakeLayer struct {
|
||||||
|
data []byte
|
||||||
|
failUntil int
|
||||||
|
err error
|
||||||
|
calls int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeLayer) Digest() (v1.Hash, error) { return v1.Hash{}, nil }
|
||||||
|
func (f *fakeLayer) DiffID() (v1.Hash, error) { return v1.Hash{}, nil }
|
||||||
|
func (f *fakeLayer) Size() (int64, error) { return int64(len(f.data)), nil }
|
||||||
|
func (f *fakeLayer) MediaType() (types.MediaType, error) { return types.DockerLayer, nil }
|
||||||
|
func (f *fakeLayer) Uncompressed() (io.ReadCloser, error) {
|
||||||
|
return nil, errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeLayer) Compressed() (io.ReadCloser, error) {
|
||||||
|
f.calls++
|
||||||
|
if f.calls <= f.failUntil {
|
||||||
|
return io.NopCloser(&failingReader{prefix: []byte("partial-garbage"), err: f.err}), nil
|
||||||
|
}
|
||||||
|
return io.NopCloser(bytes.NewReader(f.data)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("downloadLayerToFile", func() {
|
||||||
|
var (
|
||||||
|
dst *os.File
|
||||||
|
restoreWait func()
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
var err error
|
||||||
|
dst, err = os.CreateTemp("", "layer-retry-*.tar.gz")
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
// Eliminate the real backoff sleep so the test is fast.
|
||||||
|
prev := layerRetryBackoff
|
||||||
|
layerRetryBackoff = func(int) time.Duration { return 0 }
|
||||||
|
restoreWait = func() { layerRetryBackoff = prev }
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
restoreWait()
|
||||||
|
_ = dst.Close()
|
||||||
|
_ = os.Remove(dst.Name())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("retries on unexpected EOF and writes the complete layer", func() {
|
||||||
|
layer := &fakeLayer{
|
||||||
|
data: []byte("the-real-layer-contents"),
|
||||||
|
failUntil: 2,
|
||||||
|
err: io.ErrUnexpectedEOF,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadLayerToFile(context.Background(), layer, dst, nil)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
Expect(layer.calls).To(Equal(3))
|
||||||
|
|
||||||
|
got, err := os.ReadFile(dst.Name())
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
// The partial data from the two failed attempts must have been
|
||||||
|
// discarded, leaving exactly the real contents.
|
||||||
|
Expect(string(got)).To(Equal("the-real-layer-contents"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does not retry on a non-retryable error", func() {
|
||||||
|
layer := &fakeLayer{
|
||||||
|
data: []byte("never-reached"),
|
||||||
|
failUntil: 1,
|
||||||
|
err: errors.New("permission denied"),
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadLayerToFile(context.Background(), layer, dst, nil)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(layer.calls).To(Equal(1))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("gives up after exhausting retries on a persistent transient error", func() {
|
||||||
|
layer := &fakeLayer{
|
||||||
|
data: []byte("unreachable"),
|
||||||
|
failUntil: 1000,
|
||||||
|
err: io.ErrUnexpectedEOF,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadLayerToFile(context.Background(), layer, dst, nil)
|
||||||
|
Expect(err).To(MatchError(io.ErrUnexpectedEOF))
|
||||||
|
Expect(layer.calls).To(Equal(layerDownloadRetries + 1))
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user