chore(deps): bump the pip group across 3 directories with 1 update

Bumps the pip group with 1 update in the /backend/python/sglang directory: torch. Bumps the pip group with 1 update in the /backend/python/trl directory: torch. Bumps the pip group with 1 update in the /backend/python/vllm-omni directory: torch. Updates `torch` from 2.9.0 to 2.12.0+cpu Updates `torch` from 2.10.0 to 2.12.0+cpu Updates `torch` from 2.7.0 to 2.12.0+cu130 --- updated-dependencies: - dependency-name: torch dependency-version: 2.12.0+cpu dependency-type: direct:production dependency-group: pip - dependency-name: torch dependency-version: 2.12.0+cpu dependency-type: direct:production dependency-group: pip - dependency-name: torch dependency-version: 2.12.0+cu130 dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] <support@github.com>
2026-06-29 19:06:43 -04:00 · 2026-06-28 09:54:23 +00:00
29 changed files with 360 additions and 895 deletions
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -82,7 +82,7 @@ jobs:
      # as the Linux registry cache.
      - name: Restore Homebrew cache
        id: brew-cache
-        uses: actions/cache/restore@v6
+        uses: actions/cache/restore@v4
        with:
          path: |
            ~/Library/Caches/Homebrew/downloads
@@ -142,7 +142,7 @@ jobs:

      - name: Save Homebrew cache
        if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
-        uses: actions/cache/save@v6
+        uses: actions/cache/save@v4
        with:
          path: |
            ~/Library/Caches/Homebrew/downloads
@@ -178,7 +178,7 @@ jobs:
      - name: Restore ccache
        if: inputs.backend == 'llama-cpp'
        id: ccache-cache
-        uses: actions/cache/restore@v6
+        uses: actions/cache/restore@v4
        with:
          path: ~/Library/Caches/ccache
          key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
@@ -211,7 +211,7 @@ jobs:
      - name: Restore Python wheel cache
        if: inputs.lang == 'python'
        id: pyenv-cache
-        uses: actions/cache/restore@v6
+        uses: actions/cache/restore@v4
        with:
          path: |
            ~/Library/Caches/pip
@@ -256,14 +256,14 @@ jobs:

      - name: Save ccache
        if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
-        uses: actions/cache/save@v6
+        uses: actions/cache/save@v4
        with:
          path: ~/Library/Caches/ccache
          key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}

      - name: Save Python wheel cache
        if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
-        uses: actions/cache/save@v6
+        uses: actions/cache/save@v4
        with:
          path: |
            ~/Library/Caches/pip
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=dbdaece23de9ac63f2e7ca9e6bfcdc4fc156a3fa
+LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # CrispASR version (release tag)
 CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
-CRISPASR_VERSION?=6b50f76e59700665358a1aabf5295597fa318e06
+CRISPASR_VERSION?=6514c9da00b03a2f0f1b49a43fae4f3a01a41844
 SO_TARGET?=libgocrispasr.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/go/face-detect/Makefile
+++ b/backend/go/face-detect/Makefile
@@ -1,6 +1,6 @@
 # face-detect backend Makefile.
 #
-# Upstream pin lives below as FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
+# Upstream pin lives below as FACEDETECT_VERSION?=06914b0... (.github/bump_deps.sh
 # can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
 # convention).
 #
@@ -14,7 +14,7 @@
 # The default target below does the proper clone-at-pin + cmake build so CI does
 # not need a side-checkout.

-FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
+FACEDETECT_VERSION?=06914b077d52f90d5421299138e7be6bdd06b5e8
 FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp

 GOCMD?=go
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=c1790754d31bec0731ed5fddc9d5b9ff22ee19cd
+STABLEDIFFUSION_GGML_VERSION?=9956436c925a367daeab097598b1ea1f32d3503f

 CMAKE_ARGS+=-DGGML_MAX_NAME=128

--- a/backend/go/voice-detect/Makefile
+++ b/backend/go/voice-detect/Makefile
@@ -1,6 +1,6 @@
 # voice-detect backend Makefile.
 #
-# Upstream pin lives below as VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
+# Upstream pin lives below as VOICEDETECT_VERSION?=3d51077... (.github/bump_deps.sh
 # can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
 #
 # Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
@@ -13,7 +13,7 @@
 # The default target below does the proper clone-at-pin + cmake build so CI does
 # not need a side-checkout.

-VOICEDETECT_VERSION?=1db1759572c90faef6f3a78c36b5941a096a9f89
+VOICEDETECT_VERSION?=3d510772357538c5182808ac7de2278b84824e24
 VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp

 GOCMD?=go
--- a/backend/python/fish-speech/install.sh
+++ b/backend/python/fish-speech/install.sh
@@ -13,17 +13,6 @@ fi
 # fish-speech uses pyrootutils which requires a .project-root marker
 touch "${backend_dir}/.project-root"

-# On darwin arm64 the transitive `tokenizers` dep compiles its Rust extension
-# from source (Linux uses prebuilt manylinux wheels, so it never compiles
-# there). The pinned tokenizers crate that fish-speech's stack resolves to
-# contains a `&T` -> `&mut T` cast that trips the now-deny-by-default
-# `invalid_reference_casting` lint in the macOS runner's newer Rust toolchain,
-# breaking the build (seen in the v4.5.5 release CI fish-speech darwin/metal
-# job). Allow that lint so the unchanged third-party crate compiles as before.
-# Append rather than clobber any pre-existing RUSTFLAGS; harmless on Linux
-# where no Rust compile happens.
-export RUSTFLAGS="${RUSTFLAGS:-} -A invalid_reference_casting"
-
 installRequirements

 # Clone fish-speech source (the pip package doesn't include inference modules)
--- a/backend/python/sglang/requirements-cpu.txt
+++ b/backend/python/sglang/requirements-cpu.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate
-torch==2.9.0
+torch==2.12.0+cpu
 torchvision
 torchaudio
 transformers
--- a/backend/python/sglang/requirements-cublas12.txt
+++ b/backend/python/sglang/requirements-cublas12.txt
@@ -6,7 +6,7 @@
 # for cublas12 so uv consults this index alongside PyPI.
 --extra-index-url https://download.pytorch.org/whl/cu128
 accelerate
-torch==2.9.1
+torch==2.12.0+cpu
 torchvision
 torchaudio
 transformers
--- a/backend/python/trl/requirements-cpu.txt
+++ b/backend/python/trl/requirements-cpu.txt
@@ -1,9 +1,9 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch==2.10.0
+torch==2.12.0+cpu
 trl
 peft
 datasets>=3.0.0
-transformers>=4.56.2
+transformers>=5.12.1
 accelerate>=1.4.0
 huggingface-hub>=1.3.0
 sentencepiece
--- a/backend/python/trl/requirements-cublas12.txt
+++ b/backend/python/trl/requirements-cublas12.txt
@@ -1,8 +1,8 @@
-torch==2.10.0
+torch==2.12.0+cpu
 trl
 peft
 datasets>=3.0.0
-transformers>=4.56.2
+transformers>=5.12.1
 accelerate>=1.4.0
 huggingface-hub>=1.3.0
 sentencepiece
--- a/backend/python/trl/requirements-cublas13.txt
+++ b/backend/python/trl/requirements-cublas13.txt
@@ -1,8 +1,8 @@
-torch==2.10.0
+torch==2.12.0+cpu
 trl
 peft
 datasets>=3.0.0
-transformers>=4.56.2
+transformers>=5.12.1
 accelerate>=1.4.0
 huggingface-hub>=1.3.0
 sentencepiece
--- a/backend/python/trl/requirements-mps.txt
+++ b/backend/python/trl/requirements-mps.txt
@@ -1,8 +1,8 @@
-torch==2.10.0
+torch==2.12.0+cpu
 trl
 peft
 datasets>=3.0.0
-transformers>=4.56.2
+transformers>=5.12.1
 accelerate>=1.4.0
 huggingface-hub>=1.3.0
 sentencepiece
--- a/backend/python/vllm-omni/requirements-cublas12.txt
+++ b/backend/python/vllm-omni/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 accelerate
-torch==2.7.0
+torch==2.12.0+cu130
 transformers
 bitsandbytes
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -104,7 +104,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
    # can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
    # vllm pin (requirements-cublas13-after.txt, bumped independently against
    # vllm/vllm) until vllm-metal supports a newer vLLM.
-    VLLM_METAL_VERSION="v0.3.0.dev20260628073537"
+    VLLM_METAL_VERSION="v0.3.0.dev20260622062346"

    # The coupled vLLM source version is whatever this vllm-metal release builds
    # against -- it declares it in its own installer as `vllm_v=`. Derive it from
--- a/cmd/launcher/internal/launcher.go
+++ b/cmd/launcher/internal/launcher.go
@@ -429,7 +429,7 @@ func (l *Launcher) CheckForUpdates() (bool, string, error) {
 }

 // DownloadUpdate downloads the latest version
-func (l *Launcher) DownloadUpdate(version string, progressCallback func(downloaded, total int64)) error {
+func (l *Launcher) DownloadUpdate(version string, progressCallback func(float64)) error {
 	return l.releaseManager.DownloadRelease(version, progressCallback)
 }

@@ -486,6 +486,7 @@ func (l *Launcher) showDownloadLocalAIDialog() {
 	fyne.DoAndWait(func() {
 		// Create a standalone window for the download dialog
 		dialogWindow := l.app.NewWindow("LocalAI Installation Required")
+		dialogWindow.Resize(fyne.NewSize(500, 350))
 		dialogWindow.CenterOnScreen()
 		dialogWindow.SetCloseIntercept(func() {
 			dialogWindow.Close()
@@ -547,7 +548,6 @@ func (l *Launcher) showDownloadLocalAIDialog() {
 		)

 		dialogWindow.SetContent(content)
-		resizeToContent(dialogWindow, content)
 		dialogWindow.Show()
 	})
 }
@@ -621,134 +621,88 @@ func (l *Launcher) showDownloadError(title, message string) {
 }

 // showDownloadProgress shows a standalone progress window for downloading LocalAI
-// after a fresh install (no LocalAI binary present yet).
 func (l *Launcher) showDownloadProgress(version, title string) {
-	l.showDownloadProgressWindow(version, title, func(win fyne.Window) {
-		dialog.ShowConfirm("Installation Complete",
-			"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
-			func(bool) {
-				win.Close()
-				l.updateStatus("LocalAI installed successfully")
-				if l.systray != nil {
-					l.systray.recreateMenu()
-				}
-			}, win)
-	})
-}
-
-// showDownloadProgressWindow renders the download progress popup shared by every
-// "download/upgrade LocalAI" entry point. It owns the progress bar, the
-// human-readable byte readout, resume-aware retry, and content-fit window
-// sizing so the behaviour stays identical everywhere. onSuccess runs (on the UI
-// goroutine) once the download verifies, and is responsible for the success
-// dialog and any follow-up; the window is passed in so it can be parented/closed.
-func (l *Launcher) showDownloadProgressWindow(version, title string, onSuccess func(win fyne.Window)) {
 	fyne.DoAndWait(func() {
+		// Create progress window
 		progressWindow := l.app.NewWindow("Downloading LocalAI")
+		progressWindow.Resize(fyne.NewSize(400, 250))
 		progressWindow.CenterOnScreen()
 		progressWindow.SetCloseIntercept(func() {
 			progressWindow.Close()
 		})

+		// Progress bar
 		progressBar := widget.NewProgressBar()
 		progressBar.SetValue(0)

 		// Status label. Truncate with an ellipsis so a long "Download failed:
 		// <url>" message can't stretch the window (and progress bar) to fit the
-		// whole error on one line.
+		// whole error on one line; the full error is shown in the dialog below.
 		statusLabel := widget.NewLabel("Preparing download...")
 		statusLabel.Truncation = fyne.TextTruncateEllipsis

+		// Release notes button
 		releaseNotesButton := widget.NewButton("View Release Notes", func() {
 			releaseNotesURL, err := l.githubReleaseNotesURL(version)
 			if err != nil {
 				log.Printf("Failed to parse URL: %v", err)
 				return
 			}
+
 			l.app.OpenURL(releaseNotesURL)
 		})

-		// Retry button: hidden until a download fails. GitHub downloads are
-		// flaky, and the underlying download resumes from the partial file, so
-		// a retry continues where it left off rather than starting over.
-		retryButton := widget.NewButton("Retry", nil)
-		retryButton.Importance = widget.HighImportance
-		retryButton.Hide()
-
-		buttonRow := container.NewHBox(releaseNotesButton, retryButton)
-		content := container.NewVBox(
+		// Progress container
+		progressContainer := container.NewVBox(
 			widget.NewLabel(title),
 			progressBar,
 			statusLabel,
 			widget.NewSeparator(),
-			buttonRow,
+			releaseNotesButton,
 		)
-		progressWindow.SetContent(content)
-		resizeToContent(progressWindow, content)
-
-		var startDownload func()
-		startDownload = func() {
-			retryButton.Hide()
-			progressBar.SetValue(0)
-			statusLabel.SetText("Preparing download...")
-			resizeToContent(progressWindow, content)
-
-			go func() {
-				err := l.DownloadUpdate(version, func(downloaded, total int64) {
-					fyne.Do(func() {
-						if total > 0 {
-							progressBar.SetValue(float64(downloaded) / float64(total))
-							statusLabel.SetText(fmt.Sprintf("Downloading… %s / %s", formatBytes(downloaded), formatBytes(total)))
-						} else {
-							statusLabel.SetText(fmt.Sprintf("Downloading… %s", formatBytes(downloaded)))
-						}
-					})
-				})
-
-				fyne.Do(func() {
-					if err != nil {
-						statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
-						retryButton.Show()
-						resizeToContent(progressWindow, content)
-						return
-					}
-					progressBar.SetValue(1.0)
-					statusLabel.SetText("Download complete")
-					onSuccess(progressWindow)
-				})
-			}()
-		}
-		retryButton.OnTapped = startDownload

+		progressWindow.SetContent(progressContainer)
 		progressWindow.Show()
-		startDownload()
+
+		// Start download in background
+		go func() {
+			err := l.DownloadUpdate(version, func(progress float64) {
+				// Update progress bar
+				fyne.Do(func() {
+					progressBar.SetValue(progress)
+					percentage := int(progress * 100)
+					statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
+				})
+			})
+
+			// Handle completion
+			fyne.Do(func() {
+				if err != nil {
+					statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
+					// Show error dialog
+					dialog.ShowError(err, progressWindow)
+				} else {
+					statusLabel.SetText("Download completed successfully!")
+					progressBar.SetValue(1.0)
+
+					// Show success dialog
+					dialog.ShowConfirm("Installation Complete",
+						"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
+						func(close bool) {
+							progressWindow.Close()
+							// Update status and refresh systray menu
+							l.updateStatus("LocalAI installed successfully")
+
+							if l.systray != nil {
+								l.systray.recreateMenu()
+							}
+						}, progressWindow)
+				}
+			})
+		}()
 	})
 }

-// resizeToContent sizes a window to fit its content (with a sane minimum width)
-// so the dialog doesn't show a large blank gap below the last widget.
-func resizeToContent(w fyne.Window, content fyne.CanvasObject) {
-	size := content.MinSize()
-	if size.Width < 400 {
-		size.Width = 400
-	}
-	w.Resize(size)
-}
-
-// formatBytes renders a byte count as a human-readable size (e.g. "12.3 MB").
-func formatBytes(b int64) string {
-	const unit = 1024
-	if b < unit {
-		return fmt.Sprintf("%d B", b)
-	}
-	div, exp := int64(unit), 0
-	for n := b / unit; n >= unit; n /= unit {
-		div *= unit
-		exp++
-	}
-	return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
-}
-
 // monitorLogs monitors the output of LocalAI and adds it to the log buffer
 func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
 	scanner := bufio.NewScanner(reader)
--- a/cmd/launcher/internal/release_manager.go
+++ b/cmd/launcher/internal/release_manager.go
@@ -11,7 +11,6 @@ import (
 	"net/http"
 	"os"
 	"os/exec"
-	"path"
 	"path/filepath"
 	"runtime"
 	"strings"
@@ -51,12 +50,6 @@ type ReleaseManager struct {
 	ChecksumsPath string
 	// MetadataPath is where version metadata is stored
 	MetadataPath string
-	// BaseDownloadURL is the base URL release assets are downloaded from
-	// (defaults to https://github.com; overridable for testing)
-	BaseDownloadURL string
-	// RetryBackoff is the base wait between download attempts; the Nth retry
-	// waits N*RetryBackoff (defaults to 1s; lowered in tests)
-	RetryBackoff time.Duration
 	// HTTPClient is the HTTP client used for downloads
 	HTTPClient *http.Client
 }
@@ -69,94 +62,28 @@ func NewReleaseManager() *ReleaseManager {
 	metadataPath := filepath.Join(homeDir, ".localai", "metadata")

 	return &ReleaseManager{
-		GitHubOwner:     "mudler",
-		GitHubRepo:      "LocalAI",
-		BinaryPath:      binaryPath,
-		CurrentVersion:  internal.PrintableVersion(),
-		ChecksumsPath:   checksumsPath,
-		MetadataPath:    metadataPath,
-		BaseDownloadURL: "https://github.com",
-		RetryBackoff:    1 * time.Second,
-		HTTPClient:      httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
+		GitHubOwner:    "mudler",
+		GitHubRepo:     "LocalAI",
+		BinaryPath:     binaryPath,
+		CurrentVersion: internal.PrintableVersion(),
+		ChecksumsPath:  checksumsPath,
+		MetadataPath:   metadataPath,
+		HTTPClient:     httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
 	}
 }

-// GetLatestRelease resolves the latest LocalAI release.
-//
-// It first follows the github.com "releases/latest" redirect, which reveals the
-// latest tag in the final URL and—crucially—is NOT subject to the
-// 60-requests/hour unauthenticated rate limit of api.github.com. That limit is
-// per-IP, so on shared/NAT/CGNAT/cloud addresses the API returns 403 almost
-// immediately (e.g. on a fresh install with no LocalAI present yet). The
-// redirect avoids that entirely. The richer JSON API is kept only as a fallback.
-//
-// Only the version is consumed by callers, so the redirect's tag is sufficient.
+// GetLatestRelease fetches the latest release information from GitHub
 func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
-	version, redirectErr := rm.latestVersionFromRedirect()
-	if redirectErr == nil {
-		return &Release{Version: version}, nil
-	}
-	log.Printf("Could not resolve latest version via release redirect (%v); falling back to GitHub API", redirectErr)
-
-	release, apiErr := rm.latestReleaseFromAPI()
-	if apiErr != nil {
-		// Surface both failures so a rate-limited API doesn't mask the (usually
-		// more relevant) redirect error.
-		return nil, fmt.Errorf("failed to fetch latest release: %v (redirect: %v)", apiErr, redirectErr)
-	}
-	return release, nil
-}
-
-// latestVersionFromRedirect returns the latest tag by following the github.com
-// "releases/latest" redirect to ".../releases/tag/<tag>".
-func (rm *ReleaseManager) latestVersionFromRedirect() (string, error) {
-	url := fmt.Sprintf("%s/%s/%s/releases/latest", rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo)
-
-	resp, err := rm.HTTPClient.Get(url)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return "", fmt.Errorf("unexpected status %s", resp.Status)
-	}
-
-	// After the redirect is followed, the final request URL is the tag page.
-	version := path.Base(resp.Request.URL.Path)
-	if version == "" || version == "." || version == "latest" {
-		return "", fmt.Errorf("could not determine version from %s", resp.Request.URL.String())
-	}
-	return version, nil
-}
-
-// latestReleaseFromAPI fetches the latest release JSON from api.github.com. This
-// is the fallback path; it is rate-limited unless GITHUB_TOKEN is set.
-func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
 	url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)

-	req, err := http.NewRequest(http.MethodGet, url, nil)
-	if err != nil {
-		return nil, err
-	}
-	req.Header.Set("Accept", "application/vnd.github+json")
-	// An optional token lifts the unauthenticated 60/hour limit to 5000/hour.
-	if token := os.Getenv("GITHUB_TOKEN"); token != "" {
-		req.Header.Set("Authorization", "Bearer "+token)
-	}
-
-	resp, err := rm.HTTPClient.Do(req)
+	resp, err := rm.HTTPClient.Get(url)
 	if err != nil {
 		return nil, fmt.Errorf("failed to fetch latest release: %w", err)
 	}
 	defer resp.Body.Close()

 	if resp.StatusCode != http.StatusOK {
-		if (resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests) &&
-			resp.Header.Get("X-RateLimit-Remaining") == "0" {
-			return nil, fmt.Errorf("GitHub API rate limit exceeded (status %d); retry later or set GITHUB_TOKEN to raise the limit", resp.StatusCode)
-		}
-		return nil, fmt.Errorf("status %d", resp.StatusCode)
+		return nil, fmt.Errorf("failed to fetch latest release: status %d", resp.StatusCode)
 	}

 	// Parse the JSON response properly
@@ -179,7 +106,7 @@ func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
 }

 // DownloadRelease downloads a specific version of LocalAI
-func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(downloaded, total int64)) error {
+func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(float64)) error {
 	// Ensure the binary directory exists
 	if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
 		return fmt.Errorf("failed to create binary directory: %w", err)
@@ -190,16 +117,16 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
 	localPath := filepath.Join(rm.BinaryPath, "local-ai")

 	// Download the binary
-	downloadURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/%s",
-		rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
+	downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s",
+		rm.GitHubOwner, rm.GitHubRepo, version, binaryName)

 	if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
 		return fmt.Errorf("failed to download binary: %w", err)
 	}

 	// Download and verify checksums
-	checksumURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
-		rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, version)
+	checksumURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
+		rm.GitHubOwner, rm.GitHubRepo, version, version)

 	checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
 	manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
@@ -227,10 +154,6 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
 	// Verify the checksum if we have a checksum file
 	if _, err := os.Stat(checksumPath); err == nil {
 		if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
-			// Discard the corrupt binary (and any leftover partial) so the next
-			// retry starts from a clean slate rather than resuming corruption.
-			os.Remove(localPath)
-			os.Remove(localPath + ".part")
 			return fmt.Errorf("checksum verification failed: %w", err)
 		}
 		log.Printf("Checksum verification successful")
@@ -273,88 +196,44 @@ func (rm *ReleaseManager) GetBinaryName(version string) string {
 }

 // downloadFile downloads a file from a URL to a local path with optional progress callback
-func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(downloaded, total int64)) error {
+func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(float64)) error {
 	return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
 }

-// downloadFileWithRetry downloads a file with retry and HTTP Range resume.
-//
-// The body is streamed to "<dest>.part" and only renamed to dest on success, so
-// a dropped connection leaves a partial file that the next attempt continues via
-// a "Range: bytes=N-" request instead of restarting from zero. This matters for
-// GitHub release downloads, which are large and flaky.
-func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallback func(downloaded, total int64), maxRetries int) error {
-	partPath := dest + ".part"
+// downloadFileWithRetry downloads a file from a URL with retry logic
+func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCallback func(float64), maxRetries int) error {
 	var lastErr error

 	for attempt := 1; attempt <= maxRetries; attempt++ {
 		if attempt > 1 {
 			log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
-			time.Sleep(time.Duration(attempt) * rm.RetryBackoff)
+			time.Sleep(time.Duration(attempt) * time.Second)
 		}

-		// Resume from however much we already have on disk.
-		var offset int64
-		if fi, err := os.Stat(partPath); err == nil {
-			offset = fi.Size()
-		}
-
-		req, err := http.NewRequest(http.MethodGet, url, nil)
-		if err != nil {
-			return err
-		}
-		if offset > 0 {
-			req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
-		}
-
-		resp, err := rm.HTTPClient.Do(req)
+		resp, err := rm.HTTPClient.Get(url)
 		if err != nil {
 			lastErr = err
 			continue
 		}

-		switch resp.StatusCode {
-		case http.StatusOK:
-			// Server ignored the Range (or we had nothing): start fresh.
-			offset = 0
-		case http.StatusPartialContent:
-			// Resume: append to the existing partial file.
-		case http.StatusRequestedRangeNotSatisfiable:
-			// Stale or already-complete partial: discard and restart fresh.
-			resp.Body.Close()
-			os.Remove(partPath)
-			lastErr = fmt.Errorf("partial download no longer valid (status %s), restarting", resp.Status)
-			continue
-		default:
+		if resp.StatusCode != http.StatusOK {
 			resp.Body.Close()
 			lastErr = fmt.Errorf("bad status: %s", resp.Status)
 			continue
 		}

-		var out *os.File
-		if offset > 0 {
-			out, err = os.OpenFile(partPath, os.O_WRONLY|os.O_APPEND, 0644)
-		} else {
-			out, err = os.Create(partPath)
-		}
+		out, err := os.Create(filepath)
 		if err != nil {
 			resp.Body.Close()
 			return err
 		}

-		// On a 206 the Content-Length is the remaining bytes, so the full size
-		// is what we already have plus what's still to come.
-		total := resp.ContentLength
-		if offset > 0 && total > 0 {
-			total += offset
-		}
-
+		// Create a progress reader if callback is provided
 		var reader io.Reader = resp.Body
-		if progressCallback != nil && total > 0 {
+		if progressCallback != nil && resp.ContentLength > 0 {
 			reader = &progressReader{
 				Reader:   resp.Body,
-				Total:    total,
-				Current:  offset,
+				Total:    resp.ContentLength,
 				Callback: progressCallback,
 			}
 		}
@@ -364,14 +243,11 @@ func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallba
 		out.Close()

 		if err != nil {
-			// Keep the partial file so the next attempt can resume from it.
 			lastErr = err
+			os.Remove(filepath)
 			continue
 		}

-		if err := os.Rename(partPath, dest); err != nil {
-			return err
-		}
 		return nil
 	}

@@ -446,21 +322,20 @@ func (rm *ReleaseManager) saveVersionMetadata(version string) error {
 	return nil
 }

-// progressReader wraps an io.Reader to provide download progress as a
-// (downloaded, total) byte count so callers can render both a progress bar and
-// a human-readable size.
+// progressReader wraps an io.Reader to provide download progress
 type progressReader struct {
 	io.Reader
 	Total    int64
 	Current  int64
-	Callback func(downloaded, total int64)
+	Callback func(float64)
 }

 func (pr *progressReader) Read(p []byte) (int, error) {
 	n, err := pr.Reader.Read(p)
 	pr.Current += int64(n)
 	if pr.Callback != nil {
-		pr.Callback(pr.Current, pr.Total)
+		progress := float64(pr.Current) / float64(pr.Total)
+		pr.Callback(progress)
 	}
 	return n, err
 }
--- a/cmd/launcher/internal/release_manager_test.go
+++ b/cmd/launcher/internal/release_manager_test.go
@@ -1,17 +1,9 @@
 package launcher_test

 import (
-	"crypto/sha256"
-	"encoding/hex"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
 	"os"
 	"path/filepath"
 	"runtime"
-	"strconv"
-	"strings"
-	"sync"
 	"time"

 	. "github.com/onsi/ginkgo/v2"
@@ -186,221 +178,4 @@ var _ = Describe("ReleaseManager", func() {
 			Expect(err.Error()).To(ContainSubstring("checksum not found"))
 		})
 	})
-
-	Describe("DownloadRelease resume and retry", func() {
-		var (
-			version    string
-			binaryName string
-			content    []byte
-			checksums  string
-			finalPath  string
-			partPath   string
-		)
-
-		BeforeEach(func() {
-			version = "v9.9.9"
-			binaryName = rm.GetBinaryName(version)
-
-			// Deterministic, non-trivial content so resume/append bugs surface.
-			content = make([]byte, 4096)
-			for i := range content {
-				content[i] = byte(i % 251)
-			}
-			sum := sha256.Sum256(content)
-			checksums = fmt.Sprintf("%s  %s\n", hex.EncodeToString(sum[:]), binaryName)
-
-			finalPath = filepath.Join(tempDir, "local-ai")
-			partPath = finalPath + ".part"
-
-			// Isolate the persistent checksum/metadata dirs to the temp dir so
-			// the test never touches the real ~/.localai and existing checksum
-			// files don't short-circuit the download.
-			rm.ChecksumsPath = filepath.Join(tempDir, "checksums")
-			rm.MetadataPath = filepath.Join(tempDir, "metadata")
-			rm.GitHubOwner = "owner"
-			rm.GitHubRepo = "repo"
-			rm.RetryBackoff = time.Millisecond
-
-			Expect(os.MkdirAll(tempDir, 0755)).To(Succeed())
-		})
-
-		It("resumes from a partial .part file using a Range request", func() {
-			Expect(os.WriteFile(partPath, content[:1024], 0644)).To(Succeed())
-
-			var mu sync.Mutex
-			sawRange := false
-			binBytesServed := 0
-
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.HasSuffix(r.URL.Path, "checksums.txt") {
-					_, _ = w.Write([]byte(checksums))
-					return
-				}
-				if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
-					var start int
-					_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
-					mu.Lock()
-					sawRange = true
-					mu.Unlock()
-					w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
-					w.WriteHeader(http.StatusPartialContent)
-					n, _ := w.Write(content[start:])
-					mu.Lock()
-					binBytesServed += n
-					mu.Unlock()
-					return
-				}
-				w.WriteHeader(http.StatusOK)
-				n, _ := w.Write(content)
-				mu.Lock()
-				binBytesServed += n
-				mu.Unlock()
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-
-			err := rm.DownloadRelease(version, nil)
-			Expect(err).ToNot(HaveOccurred())
-
-			got, err := os.ReadFile(finalPath)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(got).To(Equal(content))
-			Expect(sawRange).To(BeTrue(), "expected the download to resume with a Range request")
-			Expect(binBytesServed).To(Equal(len(content)-1024), "expected only the remaining bytes to be served")
-			Expect(partPath).ToNot(BeAnExistingFile())
-		})
-
-		It("starts fresh when the server ignores the Range header (200)", func() {
-			// A stale/garbage partial that must NOT be appended to.
-			Expect(os.WriteFile(partPath, []byte("garbage-garbage-garbage"), 0644)).To(Succeed())
-
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.HasSuffix(r.URL.Path, "checksums.txt") {
-					_, _ = w.Write([]byte(checksums))
-					return
-				}
-				// Ignore any Range and always serve the full body.
-				w.WriteHeader(http.StatusOK)
-				_, _ = w.Write(content)
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-
-			err := rm.DownloadRelease(version, nil)
-			Expect(err).ToNot(HaveOccurred())
-
-			got, err := os.ReadFile(finalPath)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(got).To(Equal(content))
-		})
-
-		It("restarts the download when the partial is stale (416)", func() {
-			// Oversized partial -> requested Range start is beyond the content.
-			Expect(os.WriteFile(partPath, make([]byte, len(content)+10), 0644)).To(Succeed())
-
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.HasSuffix(r.URL.Path, "checksums.txt") {
-					_, _ = w.Write([]byte(checksums))
-					return
-				}
-				if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
-					var start int
-					_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
-					if start >= len(content) {
-						w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
-						return
-					}
-					w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
-					w.WriteHeader(http.StatusPartialContent)
-					_, _ = w.Write(content[start:])
-					return
-				}
-				w.WriteHeader(http.StatusOK)
-				_, _ = w.Write(content)
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-
-			err := rm.DownloadRelease(version, nil)
-			Expect(err).ToNot(HaveOccurred())
-
-			got, err := os.ReadFile(finalPath)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(got).To(Equal(content))
-		})
-
-		It("removes the downloaded file when checksum verification fails", func() {
-			bad := []byte("this is definitely not the expected binary content")
-
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.HasSuffix(r.URL.Path, "checksums.txt") {
-					// Checksums are for `content`, but we serve `bad`.
-					_, _ = w.Write([]byte(checksums))
-					return
-				}
-				w.WriteHeader(http.StatusOK)
-				_, _ = w.Write(bad)
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-
-			err := rm.DownloadRelease(version, nil)
-			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("checksum"))
-			Expect(finalPath).ToNot(BeAnExistingFile())
-			Expect(partPath).ToNot(BeAnExistingFile())
-		})
-
-		It("reports progress as downloaded and total byte counts", func() {
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.HasSuffix(r.URL.Path, "checksums.txt") {
-					_, _ = w.Write([]byte(checksums))
-					return
-				}
-				w.Header().Set("Content-Length", strconv.Itoa(len(content)))
-				w.WriteHeader(http.StatusOK)
-				_, _ = w.Write(content)
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-
-			var mu sync.Mutex
-			var lastDownloaded, lastTotal int64
-			err := rm.DownloadRelease(version, func(downloaded, total int64) {
-				mu.Lock()
-				lastDownloaded = downloaded
-				lastTotal = total
-				mu.Unlock()
-			})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(lastTotal).To(Equal(int64(len(content))))
-			Expect(lastDownloaded).To(Equal(int64(len(content))))
-		})
-	})
-
-	Describe("GetLatestRelease", func() {
-		It("resolves the latest version from the releases/latest redirect", func() {
-			// The github.com redirect path must be preferred over the
-			// rate-limited api.github.com, so a working redirect yields the tag
-			// without ever needing the API.
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				switch {
-				case strings.HasSuffix(r.URL.Path, "/releases/latest"):
-					http.Redirect(w, r, "/owner/repo/releases/tag/v9.9.9", http.StatusFound)
-				case strings.HasSuffix(r.URL.Path, "/releases/tag/v9.9.9"):
-					w.WriteHeader(http.StatusOK)
-				default:
-					w.WriteHeader(http.StatusNotFound)
-				}
-			}))
-			defer srv.Close()
-			rm.BaseDownloadURL = srv.URL
-			rm.GitHubOwner = "owner"
-			rm.GitHubRepo = "repo"
-
-			release, err := rm.GetLatestRelease()
-			Expect(err).ToNot(HaveOccurred())
-			Expect(release.Version).To(Equal("v9.9.9"))
-		})
-	})
 })
--- a/cmd/launcher/internal/systray_manager.go
+++ b/cmd/launcher/internal/systray_manager.go
@@ -443,23 +443,84 @@ func (sm *SystrayManager) showStartupErrorDialog(err error) {
 	})
 }

-// showDownloadProgress shows a progress window for downloading updates. The
-// progress UI (byte readout, resume-aware retry, sizing) is shared with the
-// other download entry points via the launcher; only the post-success behaviour
-// (restart prompt + systray refresh) is specific to the update flow.
+// showDownloadProgress shows a progress window for downloading updates
 func (sm *SystrayManager) showDownloadProgress(version string) {
-	sm.launcher.showDownloadProgressWindow(version, fmt.Sprintf("Downloading LocalAI version %s", version), func(win fyne.Window) {
-		dialog.ShowConfirm("Update Downloaded",
-			"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
-			func(restart bool) {
-				if restart {
-					sm.app.Quit()
-				}
-				win.Close()
-			}, win)
+	// Create a new window for download progress
+	progressWindow := sm.app.NewWindow("Downloading LocalAI Update")
+	progressWindow.Resize(fyne.NewSize(400, 250))
+	progressWindow.CenterOnScreen()

-		sm.hasUpdateAvailable = false
-		sm.latestVersion = ""
-		sm.recreateMenu()
+	// Progress bar
+	progressBar := widget.NewProgressBar()
+	progressBar.SetValue(0)
+
+	// Status label. Truncate with an ellipsis so a long "Download failed:
+	// <url>" message can't stretch the window (and progress bar) to fit the
+	// whole error on one line; the full error is shown in the dialog below.
+	statusLabel := widget.NewLabel("Preparing download...")
+	statusLabel.Truncation = fyne.TextTruncateEllipsis
+
+	// Release notes button
+	releaseNotesButton := widget.NewButton("View Release Notes", func() {
+		releaseNotesURL, err := sm.launcher.githubReleaseNotesURL(version)
+		if err != nil {
+			log.Printf("Failed to parse URL: %v", err)
+			return
+		}
+
+		sm.app.OpenURL(releaseNotesURL)
 	})
+
+	// Progress container
+	progressContainer := container.NewVBox(
+		widget.NewLabel(fmt.Sprintf("Downloading LocalAI version %s", version)),
+		progressBar,
+		statusLabel,
+		widget.NewSeparator(),
+		releaseNotesButton,
+	)
+
+	progressWindow.SetContent(progressContainer)
+	progressWindow.Show()
+
+	// Start download in background
+	go func() {
+		err := sm.launcher.DownloadUpdate(version, func(progress float64) {
+			// Update progress bar
+			fyne.Do(func() {
+				progressBar.SetValue(progress)
+				percentage := int(progress * 100)
+				statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
+			})
+		})
+
+		// Handle completion
+		fyne.Do(func() {
+			if err != nil {
+				statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
+				// Show error dialog
+				dialog.ShowError(err, progressWindow)
+			} else {
+				statusLabel.SetText("Download completed successfully!")
+				progressBar.SetValue(1.0)
+
+				// Show restart dialog
+				dialog.ShowConfirm("Update Downloaded",
+					"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
+					func(restart bool) {
+						if restart {
+							sm.app.Quit()
+						}
+						progressWindow.Close()
+					}, progressWindow)
+			}
+		})
+
+		// Update systray menu
+		if err == nil {
+			sm.hasUpdateAvailable = false
+			sm.latestVersion = ""
+			sm.recreateMenu()
+		}
+	}()
 }
--- a/cmd/launcher/internal/ui.go
+++ b/cmd/launcher/internal/ui.go
@@ -490,19 +490,14 @@ func (ui *LauncherUI) downloadUpdate() {
 	ui.UpdateStatus("Downloading update " + version + "...")

 	go func() {
-		err := ui.launcher.DownloadUpdate(version, func(downloaded, total int64) {
+		err := ui.launcher.DownloadUpdate(version, func(progress float64) {
+			// Update progress bar
 			fyne.Do(func() {
-				if total > 0 {
-					ui.progressBar.SetValue(float64(downloaded) / float64(total))
-				}
+				ui.progressBar.SetValue(progress)
 			})
-			// The progress bar already shows the percentage, so report the
-			// human-readable size here instead of repeating the percent.
-			if total > 0 {
-				ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s / %s", version, formatBytes(downloaded), formatBytes(total)))
-			} else {
-				ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s", version, formatBytes(downloaded)))
-			}
+			// Update status with percentage
+			percentage := int(progress * 100)
+			ui.UpdateStatus(fmt.Sprintf("Downloading update %s... %d%%", version, percentage))
 		})

 		fyne.Do(func() {
@@ -603,6 +598,82 @@ func (ui *LauncherUI) LoadConfiguration() {
 	log.Printf("UI LoadConfiguration: configuration loaded successfully")
 }

+// showDownloadProgress shows a progress window for downloading LocalAI
+func (ui *LauncherUI) showDownloadProgress(version, title string) {
+	fyne.DoAndWait(func() {
+		// Create progress window using the launcher's app
+		progressWindow := ui.launcher.app.NewWindow("Downloading LocalAI")
+		progressWindow.Resize(fyne.NewSize(400, 250))
+		progressWindow.CenterOnScreen()
+
+		// Progress bar
+		progressBar := widget.NewProgressBar()
+		progressBar.SetValue(0)
+
+		// Status label. Truncate with an ellipsis so a long "Download failed:
+		// <url>" message can't stretch the window (and progress bar) to fit the
+		// whole error on one line; the full error is shown in the dialog below.
+		statusLabel := widget.NewLabel("Preparing download...")
+		statusLabel.Truncation = fyne.TextTruncateEllipsis
+
+		// Release notes button
+		releaseNotesButton := widget.NewButton("View Release Notes", func() {
+			releaseNotesURL, err := ui.launcher.githubReleaseNotesURL(version)
+			if err != nil {
+				log.Printf("Failed to parse URL: %v", err)
+				return
+			}
+
+			ui.launcher.app.OpenURL(releaseNotesURL)
+		})
+
+		// Progress container
+		progressContainer := container.NewVBox(
+			widget.NewLabel(title),
+			progressBar,
+			statusLabel,
+			widget.NewSeparator(),
+			releaseNotesButton,
+		)
+
+		progressWindow.SetContent(progressContainer)
+		progressWindow.Show()
+
+		// Start download in background
+		go func() {
+			err := ui.launcher.DownloadUpdate(version, func(progress float64) {
+				// Update progress bar
+				fyne.Do(func() {
+					progressBar.SetValue(progress)
+					percentage := int(progress * 100)
+					statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
+				})
+			})
+
+			// Handle completion
+			fyne.Do(func() {
+				if err != nil {
+					statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
+					// Show error dialog
+					dialog.ShowError(err, progressWindow)
+				} else {
+					statusLabel.SetText("Download completed successfully!")
+					progressBar.SetValue(1.0)
+
+					// Show success dialog
+					dialog.ShowConfirm("Installation Complete",
+						"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
+						func(close bool) {
+							progressWindow.Close()
+							// Update status
+							ui.UpdateStatus("LocalAI installed successfully")
+						}, progressWindow)
+				}
+			})
+		}()
+	})
+}
+
 // UpdateRunningState updates UI based on LocalAI running state
 func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
 	fyne.Do(func() {
--- a/core/application/distributed.go
+++ b/core/application/distributed.go
@@ -356,12 +356,6 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
 		PrefixConfig:     prefixCfg,
 		Pressure:         pressure,
 		SharedModels:     cfg.Distributed.SharedModels,
-		// Cap how long a cold load may hold the per-model advisory lock: the
-		// configured backend.install deadline plus a margin for file staging and
-		// the remote LoadModel. Derived from the install timeout so raising it
-		// (for slow links pulling multi-GB images) widens the ceiling too,
-		// instead of letting the static default cut a legitimately slow load.
-		ModelLoadCeiling: cfg.Distributed.BackendInstallTimeoutOrDefault() + 10*time.Minute,
 	})

 	// Wire staging-progress broadcasting so file-staging shows up on every
--- a/core/services/advisorylock/advisorylock.go
+++ b/core/services/advisorylock/advisorylock.go
@@ -130,20 +130,6 @@ func WithLockCtx(ctx context.Context, db *gorm.DB, key int64, fn func() error) e
 	}
 	defer conn.Close()

-	// Neutralize any deployment-wide lock_timeout on this dedicated connection.
-	// Operators commonly set a short global lock_timeout (on the role or
-	// database) to bound ordinary row-lock waits. Applied to the blocking
-	// pg_advisory_lock below, it aborts the wait with SQLSTATE 55P03 and turns
-	// LocalAI's intentional cross-replica "wait your turn, then re-check"
-	// coordination into a hard error for the caller (e.g. a chat request that
-	// just wanted to reuse a model another replica is loading). Let the Go
-	// context be the single source of truth for how long we wait instead.
-	if _, err := conn.ExecContext(ctx, "SET lock_timeout = 0"); err != nil {
-		return fmt.Errorf("advisorylock: disabling lock_timeout: %w", err)
-	}
-	// Restore the session default before this pooled connection is reused.
-	defer func() { _, _ = conn.ExecContext(context.Background(), "RESET lock_timeout") }()
-
 	if _, err := conn.ExecContext(ctx, "SELECT pg_advisory_lock($1)", key); err != nil {
 		return fmt.Errorf("advisorylock: acquiring lock %d: %w", key, err)
 	}
--- a/core/services/advisorylock/advisorylock_test.go
+++ b/core/services/advisorylock/advisorylock_test.go
@@ -158,53 +158,6 @@ var _ = Describe("AdvisoryLock", func() {
 			Expect(err).To(HaveOccurred())
 		})

-		It("waits out a short server-side lock_timeout instead of failing with 55P03", func() {
-			const lockKey int64 = 703
-
-			// Reproduce the production deployment that triggered this: a short
-			// global lock_timeout set on the database. Without the fix, a waiter
-			// blocked on pg_advisory_lock() is aborted by the server after this
-			// window and surfaces SQLSTATE 55P03 ("canceling statement due to
-			// lock timeout") to the caller instead of waiting for its turn.
-			Expect(db.Exec("ALTER DATABASE testdb SET lock_timeout = '300ms'").Error).ToNot(HaveOccurred())
-			sqlDB, err := db.DB()
-			Expect(err).ToNot(HaveOccurred())
-			// Drop pooled connections so subsequent ones reconnect and inherit
-			// the new database-level lock_timeout default.
-			sqlDB.SetMaxIdleConns(0)
-
-			holding := make(chan struct{})
-			released := make(chan struct{})
-			go func() {
-				defer GinkgoRecover()
-				herr := WithLockCtx(context.Background(), db, lockKey, func() error {
-					close(holding)
-					// Hold well past the 300ms server lock_timeout.
-					time.Sleep(1 * time.Second)
-					return nil
-				})
-				Expect(herr).ToNot(HaveOccurred())
-				close(released)
-			}()
-
-			<-holding // ensure the holder owns the lock before we contend
-
-			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-			defer cancel()
-			executed := false
-			start := time.Now()
-			werr := WithLockCtx(ctx, db, lockKey, func() error {
-				executed = true
-				return nil
-			})
-			Expect(werr).ToNot(HaveOccurred(),
-				"waiter should wait out the in-progress hold, not fail with lock_timeout (55P03)")
-			Expect(executed).To(BeTrue())
-			Expect(time.Since(start)).To(BeNumerically(">=", 400*time.Millisecond),
-				"waiter should have actually waited for the holder to release")
-			<-released
-		})
-
 		It("serializes concurrent WithLockCtx on same key", func() {
 			const lockKey int64 = 702

--- a/core/services/nodes/router.go
+++ b/core/services/nodes/router.go
@@ -68,13 +68,6 @@ type SmartRouterOptions struct {
 	// the absolute model paths untouched so the worker loads them directly from
 	// the shared volume (#10556). See config.DistributedConfig.SharedModels.
 	SharedModels bool
-	// ModelLoadCeiling is the hard upper bound on how long a single cold-load
-	// attempt (node selection -> backend install -> file staging -> LoadModel)
-	// may run while holding the per-model advisory lock. It backstops every
-	// sub-step's own timeout so a wedged worker can never pin the lock - and
-	// every other replica's request for that model - indefinitely. Zero selects
-	// defaultModelLoadCeiling.
-	ModelLoadCeiling time.Duration
 }

 // SmartRouter routes inference requests to the best available backend node.
@@ -108,18 +101,8 @@ type SmartRouter struct {
 	// sharedModels skips file staging when all nodes mount the same models
 	// directory at the same path (see SmartRouterOptions.SharedModels).
 	sharedModels bool
-	// modelLoadCeiling bounds how long a cold load may hold the per-model
-	// advisory lock (see SmartRouterOptions.ModelLoadCeiling).
-	modelLoadCeiling time.Duration
 }

-// defaultModelLoadCeiling is the fallback hold ceiling for a cold model load.
-// It must comfortably exceed the slowest legitimate load - a multi-GB backend
-// install (DefaultBackendInstallTimeout, 15m) plus staging and the remote
-// LoadModel (5m) - so it never cuts a real load short; it only ever fires when
-// a step is genuinely wedged (e.g. a worker that died mid-install).
-const defaultModelLoadCeiling = 25 * time.Minute
-
 // probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
 // trusted before the next request re-probes. Matches healthCheckTTL in
 // pkg/model/model.go so the single-process and distributed paths share a
@@ -134,10 +117,6 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
 	if factory == nil {
 		factory = &tokenClientFactory{token: opts.AuthToken}
 	}
-	ceiling := opts.ModelLoadCeiling
-	if ceiling <= 0 {
-		ceiling = defaultModelLoadCeiling
-	}
 	return &SmartRouter{
 		registry:         registry,
 		unloader:         opts.Unloader,
@@ -152,7 +131,6 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
 		prefixConfig:     opts.PrefixConfig,
 		pressure:         opts.Pressure,
 		sharedModels:     opts.SharedModels,
-		modelLoadCeiling: ceiling,
 	}
 }

@@ -405,19 +383,11 @@ func (r *SmartRouter) Route(ctx context.Context, modelID, modelName, backendType
 	// the request context. If staging were bound to it, the multi-GB upload
 	// aborts with "context canceled" mid-transfer and large models can never
 	// finish staging (the model-load outage). WithoutCancel keeps the request's
-	// values (prefix chain, etc.) but drops its cancellation/deadline.
-	//
-	// Detaching from the caller is necessary, but it must not be unbounded: the
-	// load runs while holding the per-model advisory lock, and a worker that
-	// dies mid-install (its backend.install never replies) would otherwise pin
-	// that lock (and every other replica's request for the same model) until
-	// the NATS install deadline alone expires. Re-impose a single hard ceiling
-	// over the whole sequence so the lock is always released in bounded time,
-	// even if a sub-step wedges. Each long step still has its own (tighter)
-	// bound; this only backstops them. The per-model advisory lock below
-	// de-dupes concurrent loaders across replicas.
-	loadCtx, cancelLoad := context.WithTimeout(context.WithoutCancel(ctx), r.modelLoadCeiling)
-	defer cancelLoad()
+	// values (prefix chain, etc.) but drops its cancellation/deadline. Each
+	// long step still has its own bound (the file stager's resume budget,
+	// LoadModel's 5m timeout), and the per-model advisory lock below de-dupes
+	// concurrent loaders across replicas.
+	loadCtx := context.WithoutCancel(ctx)
 	loadModel := func(ctx context.Context) (*RouteResult, error) {
 		// Re-check after acquiring lock — another request may have loaded it
 		node, nm, err := r.registry.FindAndLockNodeWithModel(ctx, trackingKey, candidateNodeIDs, pref)
@@ -946,14 +916,7 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
 	}

 	key := fmt.Sprintf("%s|%s|%s|%d", node.ID, backendType, modelID, replicaIndex)
-	// DoChan rather than Do so this wait honors ctx cancellation. InstallBackend
-	// blocks for its full NATS deadline (15m by default) when a worker accepts
-	// the request but never replies (e.g. it died mid-install). Without ctx
-	// awareness the caller (holding the per-model advisory lock) would sit there
-	// the whole time; here a cancelled ctx (typically the model-load ceiling)
-	// frees the caller promptly. The shared install keeps running in the
-	// background and still coalesces other callers via singleflight.
-	resCh := r.installFlight.DoChan(key, func() (any, error) {
+	v, err, _ := r.installFlight.Do(key, func() (any, error) {
 		reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON, "", "", "", replicaIndex, "", nil)
 		if err != nil {
 			return "", err
@@ -968,15 +931,10 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
 		}
 		return addr, nil
 	})
-	select {
-	case <-ctx.Done():
-		return "", ctx.Err()
-	case res := <-resCh:
-		if res.Err != nil {
-			return "", res.Err
-		}
-		return res.Val.(string), nil
+	if err != nil {
+		return "", err
 	}
+	return v.(string), nil
 }

 func (r *SmartRouter) buildClientForAddr(node *BackendNode, addr string, parallel bool) grpc.Backend {
--- a/core/services/nodes/router_test.go
+++ b/core/services/nodes/router_test.go
@@ -493,44 +493,6 @@ var _ = Describe("SmartRouter", func() {
 				Expect(result.Node.ID).To(Equal("n3"))
 			})
 		})
-
-		Context("worker wedges mid-install (dead node holding the lock)", func() {
-			It("aborts the load at the ModelLoadCeiling instead of blocking forever", func() {
-				// Simulate the production incident: the chosen worker accepts the
-				// backend.install but never replies (it died), so InstallBackend
-				// would otherwise block for its full NATS deadline (15m by
-				// default) while pinning the per-model advisory lock. Route must
-				// give up at the ceiling so the lock is released promptly.
-				reg.findAndLockErr = errors.New("not found")
-				reg.findIdleNode = &BackendNode{ID: "n4", Name: "dead-node", Address: "10.0.0.4:50051"}
-
-				block := make(chan struct{})
-				defer close(block) // let the background install goroutine drain at test end
-				unloader.installHook = func() { <-block }
-
-				router := NewSmartRouter(reg, SmartRouterOptions{
-					Unloader:         unloader,
-					ClientFactory:    factory,
-					ModelLoadCeiling: 200 * time.Millisecond,
-				})
-
-				done := make(chan error, 1)
-				start := time.Now()
-				go func() {
-					defer GinkgoRecover()
-					_, err := router.Route(context.Background(), "wedged-model",
-						"models/wedged.gguf", "llama-cpp",
-						&pb.ModelOptions{Model: "models/wedged.gguf"}, false)
-					done <- err
-				}()
-
-				var routeErr error
-				Eventually(done, 5*time.Second).Should(Receive(&routeErr),
-					"Route must not block on a wedged install past the ceiling")
-				Expect(routeErr).To(HaveOccurred())
-				Expect(time.Since(start)).To(BeNumerically("<", 5*time.Second))
-			})
-		})
 	})

 	Describe("scheduleNewModel (mock-based, via Route)", func() {
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3,7 +3,26 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
-  description: "# Qwen-AgentWorld-35B-A3B\n\n\U0001F4D1 Technical Report |\n\U0001F4D6 Blog |\n\U0001F917 Hugging Face |\n\U0001F916 ModelScope |\n\U0001F4BB GitHub |\n\U0001F5A5️ Demo\n\n> [!Note]\n> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.\n>\n> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.\n\n**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.\n\n## Highlights\n\n...\n"
+  description: |
+    # Qwen-AgentWorld-35B-A3B
+
+    📑 Technical Report |
+    📖 Blog |
+    🤗 Hugging Face |
+    🤖 ModelScope |
+    💻 GitHub |
+    🖥️ Demo
+
+    > [!Note]
+    > This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.
+    >
+    > These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.
+
+    **Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.
+
+    ## Highlights
+
+    ...
  license: "apache-2.0"
  tags:
    - llm
@@ -32,7 +51,34 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
-  description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-9B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n  - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n  - **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model  discovers better search trajectories and generates higher-quality solutions.\n  - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 9B\n\nThis model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-9B\nQwen3.5-9B\nQwen3.5-35B\nGemma4-12B\nGemma4-31B\n\nAgentic Coding\n\n...\n"
+  description: |
+    [](https://deep-reinforce.com/ornith.html)
+
+    # Ornith-1.0-9B-GGUF
+
+    Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
+
+    Highlights:
+
+      - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
+      - **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model  discovers better search trajectories and generates higher-quality solutions.
+      - **Licence**: MIT licensed, globally accessible, and free from regional limitations.
+
+    ## Ornith 1.0 9B
+
+    This model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.
+
+    ### Benchmarks
+
+    Ornith-1.0-9B
+    Qwen3.5-9B
+    Qwen3.5-35B
+    Gemma4-12B
+    Gemma4-31B
+
+    Agentic Coding
+
+    ...
  license: "mit"
  tags:
    - llm
@@ -59,7 +105,34 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/deepreinforce-ai/Ornith-1.0-35B-GGUF
-  description: "[](https://deep-reinforce.com/ornith.html)\n\n# Ornith-1.0-35B-GGUF\n\nAloha! \U0001F33A Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.\n\nHighlights:\n\n  - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.\n  - **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model  discovers better search trajectories and generates higher-quality solutions.\n  - **Licence**: MIT licensed, globally accessible, and free from regional limitations.\n\n## Ornith 1.0 35B\n\nThis model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.\n\n### Benchmarks\n\nOrnith-1.0-35B\nQwen3.5-35B\nQwen3.6-35B\nGemma4-31B\nQwen3.5-397B\n\nAgentic Coding\n\n...\n"
+  description: |
+    [](https://deep-reinforce.com/ornith.html)
+
+    # Ornith-1.0-35B-GGUF
+
+    Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
+
+    Highlights:
+
+      - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
+      - **Self-Improving Training Framework**:  Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model  discovers better search trajectories and generates higher-quality solutions.
+      - **Licence**: MIT licensed, globally accessible, and free from regional limitations.
+
+    ## Ornith 1.0 35B
+
+    This model card documents **Ornith-1.0-35B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.
+
+    ### Benchmarks
+
+    Ornith-1.0-35B
+    Qwen3.5-35B
+    Qwen3.6-35B
+    Gemma4-31B
+    Qwen3.5-397B
+
+    Agentic Coding
+
+    ...
  license: "mit"
  tags:
    - llm
@@ -400,8 +473,8 @@
      use_tokenizer_template: true
  files:
    - filename: llama-cpp/models/Qwythos-9B-Claude-Mythos-5-1M-GGUF/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
+      sha256: 24ee22e0f5d9f0d3d615809607f365c728d9b0c3f3fb6eb19d8bd83a1c2933d8
      uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/Qwythos-9B-Claude-Mythos-5-1M-MTP-Q4_K_M.gguf
-      sha256: 671c430bf18c961251338d639a3c02aac7451c39eed25874cad74287ac6cd38a
    - filename: llama-cpp/mmproj/Qwythos-9B-Claude-Mythos-5-1M-GGUF/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
      sha256: f70dc3509053962b0d0d3ee8a7eacebf5d60aa560cad78254ae8698516ae029f
      uri: https://huggingface.co/empero-ai/Qwythos-9B-Claude-Mythos-5-1M-GGUF/resolve/main/mmproj-Qwythos-9B-Claude-Mythos-5-1M-f16.gguf
--- a/pkg/natsauth/permissions.go
+++ b/pkg/natsauth/permissions.go
@@ -20,8 +20,6 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
 		subAllow = []string{
 			"agent.execute",
 			"agent.*.cancel",
-			"gallery.*.cancel",
-			"gallery.*.progress",
 			"jobs.*.cancel",
 			"jobs.*.progress",
 			"jobs.*.result",
@@ -29,7 +27,6 @@ func WorkerPermissions(nodeID, nodeType string) (pubAllow, subAllow []string) {
 			"mcp.tools.execute",
 			"mcp.discovery",
 			prefix + ".backend.stop", // stop events drive MCP session cleanup
-			"staging.*.progress",
 			"_INBOX.>",
 		}
 		pubAllow = []string{
--- a/pkg/oci/image.go
+++ b/pkg/oci/image.go
@@ -63,72 +63,6 @@ var defaultRetryPredicate = func(err error) bool {
 	return false
 }

-// layerDownloadRetries is the number of additional attempts made when a layer
-// download fails with a transient/retryable network error.
-var layerDownloadRetries = 3
-
-// layerRetryBackoff returns the wait before retry attempt n (1-indexed). It is a
-// variable so tests can eliminate the wait.
-var layerRetryBackoff = func(attempt int) time.Duration {
-	d := defaultRetryBackoff.Duration
-	for i := 1; i < attempt; i++ {
-		d = time.Duration(float64(d) * defaultRetryBackoff.Factor)
-	}
-	return d
-}
-
-// downloadLayerToFile streams a single compressed layer into dst, retrying on
-// transient network errors (unexpected EOF, connection reset, ...). Large
-// backend images (e.g. vLLM) are several GiB and a single dropped connection
-// mid-stream previously failed the whole install with "unexpected EOF" and no
-// recovery. The registry transport already retries manifest fetches via
-// defaultRetryPredicate (see GetImage/GetImageDigest); this extends the same
-// behaviour to the layer data stream. See issue #10577.
-func downloadLayerToFile(ctx context.Context, layer v1.Layer, dst *os.File, progress *progressWriter) error {
-	var lastErr error
-	for attempt := 0; attempt <= layerDownloadRetries; attempt++ {
-		if attempt > 0 {
-			// Discard any partial data from the previous failed attempt.
-			if _, err := dst.Seek(0, io.SeekStart); err != nil {
-				return err
-			}
-			if err := dst.Truncate(0); err != nil {
-				return err
-			}
-			if progress != nil {
-				progress.written = 0
-			}
-			select {
-			case <-ctx.Done():
-				return ctx.Err()
-			case <-time.After(layerRetryBackoff(attempt)):
-			}
-		}
-
-		var w io.Writer = dst
-		if progress != nil {
-			w = io.MultiWriter(dst, progress)
-		}
-
-		var reader io.ReadCloser
-		reader, lastErr = layer.Compressed()
-		if lastErr == nil {
-			_, lastErr = xio.Copy(ctx, w, reader)
-			_ = reader.Close()
-		}
-		if lastErr == nil {
-			return nil
-		}
-
-		// Stop early on context cancellation or non-retryable errors.
-		if ctx.Err() != nil || !defaultRetryPredicate(lastErr) {
-			return lastErr
-		}
-		logs.Warn.Printf("layer download failed (attempt %d/%d), retrying: %v", attempt+1, layerDownloadRetries+1, lastErr)
-	}
-	return lastErr
-}
-
 type progressWriter struct {
 	written        int64
 	total          int64
@@ -370,17 +304,23 @@ func DownloadOCIImageTar(ctx context.Context, img v1.Image, imageRef string, tar
 		}

 		// Create progress writer for this layer
-		var progress *progressWriter
+		var writer io.Writer = file
 		if downloadStatus != nil {
-			progress = &progressWriter{
+			writer = io.MultiWriter(file, &progressWriter{
 				total:          totalCompressedSize,
 				fileName:       fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
 				downloadStatus: downloadStatus,
-			}
+			})
 		}

-		// Download the compressed layer, retrying on transient network errors.
-		err = downloadLayerToFile(ctx, layer, file, progress)
+		// Download the compressed layer
+		layerReader, err := layer.Compressed()
+		if err != nil {
+			file.Close()
+			return fmt.Errorf("failed to get compressed layer: %v", err)
+		}
+
+		_, err = xio.Copy(ctx, writer, layerReader)
 		file.Close()
 		if err != nil {
 			return fmt.Errorf("failed to download layer %d: %v", i, err)
--- a/pkg/oci/layer_internal_test.go
+++ b/pkg/oci/layer_internal_test.go
@@ -1,123 +0,0 @@
-package oci
-
-import (
-	"bytes"
-	"context"
-	"errors"
-	"io"
-	"os"
-	"time"
-
-	v1 "github.com/google/go-containerregistry/pkg/v1"
-	"github.com/google/go-containerregistry/pkg/v1/types"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-// failingReader yields prefix bytes then returns err, simulating a connection
-// dropped mid-stream while downloading a layer.
-type failingReader struct {
-	prefix []byte
-	off    int
-	err    error
-}
-
-func (r *failingReader) Read(p []byte) (int, error) {
-	if r.off < len(r.prefix) {
-		n := copy(p, r.prefix[r.off:])
-		r.off += n
-		return n, nil
-	}
-	return 0, r.err
-}
-
-// fakeLayer is a minimal v1.Layer whose Compressed() fails failUntil times with
-// err (after emitting a partial prefix) before finally returning data in full.
-type fakeLayer struct {
-	data      []byte
-	failUntil int
-	err       error
-	calls     int
-}
-
-func (f *fakeLayer) Digest() (v1.Hash, error)            { return v1.Hash{}, nil }
-func (f *fakeLayer) DiffID() (v1.Hash, error)            { return v1.Hash{}, nil }
-func (f *fakeLayer) Size() (int64, error)                { return int64(len(f.data)), nil }
-func (f *fakeLayer) MediaType() (types.MediaType, error) { return types.DockerLayer, nil }
-func (f *fakeLayer) Uncompressed() (io.ReadCloser, error) {
-	return nil, errors.New("not implemented")
-}
-
-func (f *fakeLayer) Compressed() (io.ReadCloser, error) {
-	f.calls++
-	if f.calls <= f.failUntil {
-		return io.NopCloser(&failingReader{prefix: []byte("partial-garbage"), err: f.err}), nil
-	}
-	return io.NopCloser(bytes.NewReader(f.data)), nil
-}
-
-var _ = Describe("downloadLayerToFile", func() {
-	var (
-		dst         *os.File
-		restoreWait func()
-	)
-
-	BeforeEach(func() {
-		var err error
-		dst, err = os.CreateTemp("", "layer-retry-*.tar.gz")
-		Expect(err).NotTo(HaveOccurred())
-
-		// Eliminate the real backoff sleep so the test is fast.
-		prev := layerRetryBackoff
-		layerRetryBackoff = func(int) time.Duration { return 0 }
-		restoreWait = func() { layerRetryBackoff = prev }
-	})
-
-	AfterEach(func() {
-		restoreWait()
-		_ = dst.Close()
-		_ = os.Remove(dst.Name())
-	})
-
-	It("retries on unexpected EOF and writes the complete layer", func() {
-		layer := &fakeLayer{
-			data:      []byte("the-real-layer-contents"),
-			failUntil: 2,
-			err:       io.ErrUnexpectedEOF,
-		}
-
-		err := downloadLayerToFile(context.Background(), layer, dst, nil)
-		Expect(err).NotTo(HaveOccurred())
-		Expect(layer.calls).To(Equal(3))
-
-		got, err := os.ReadFile(dst.Name())
-		Expect(err).NotTo(HaveOccurred())
-		// The partial data from the two failed attempts must have been
-		// discarded, leaving exactly the real contents.
-		Expect(string(got)).To(Equal("the-real-layer-contents"))
-	})
-
-	It("does not retry on a non-retryable error", func() {
-		layer := &fakeLayer{
-			data:      []byte("never-reached"),
-			failUntil: 1,
-			err:       errors.New("permission denied"),
-		}
-
-		err := downloadLayerToFile(context.Background(), layer, dst, nil)
-		Expect(err).To(HaveOccurred())
-		Expect(layer.calls).To(Equal(1))
-	})
-
-	It("gives up after exhausting retries on a persistent transient error", func() {
-		layer := &fakeLayer{
-			data:      []byte("unreachable"),
-			failUntil: 1000,
-			err:       io.ErrUnexpectedEOF,
-		}
-
-		err := downloadLayerToFile(context.Background(), layer, dst, nil)
-		Expect(err).To(MatchError(io.ErrUnexpectedEOF))
-		Expect(layer.calls).To(Equal(layerDownloadRetries + 1))
-	})
-})