From 9d42a16c20524fda40f96bd11d20bc56fe74f23b Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Wed, 6 May 2026 15:40:16 +0100
Subject: [PATCH] ci: publish base images to ci-cache instead of localai-base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous tag scheme pushed to quay.io/go-skynet/localai-base, which
required a separate quay repo + a write-permission grant for the CI
robot. PR #9672 hit a 401 on push because that grant was missing — the
robot can log in but not write to localai-base.

ci-cache already exists, the robot already has write access (it writes
the buildkit cache there on every backend build), and OCI tags namespace
cleanly within a repo. So publish base images to
quay.io/go-skynet/ci-cache:base-image-<stem>[-pr<N>]. The `base-image-`
prefix doesn't collide with the existing tag prefixes:
  - cache<tag-suffix>           per-backend buildkit cache
  - cache-localai<tag-suffix>   root image buildkit cache
  - base-<stem>                 base image's own buildkit cache
  - base-image-<stem>           the published OCI image (new)

base_images.yml's compute_ref step and prebuiltRef() in
scripts/changed-backends.js are kept in lock-step. Local Makefile tags
are unchanged (they're just local docker labels with no remote
correlation).

Assisted-by: Claude:opus-4-7-1m [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 .agents/ci-caching.md               | 39 +++++++++++++++++++----------
 .docker/bases/Dockerfile.cpp        |  2 +-
 .docker/bases/Dockerfile.golang     |  2 +-
 .docker/bases/Dockerfile.python     |  9 +++----
 .docker/bases/Dockerfile.rust       |  8 +++---
 .github/workflows/backend_build.yml |  6 ++---
 .github/workflows/base_images.yml   | 19 +++++++++-----
 scripts/changed-backends.js         |  6 ++++-
 8 files changed, 56 insertions(+), 35 deletions(-)
diff --git a/.agents/ci-caching.md b/.agents/ci-caching.md
index 1c8c65470..32ce9c324 100644
--- a/.agents/ci-caching.md
+++ b/.agents/ci-caching.md
@@ -5,12 +5,16 @@ Container builds — both the root LocalAI image (`Dockerfile`) and the per-back
 ## Cache layout
 
 - **Cache registry**: `quay.io/go-skynet/ci-cache`
-- **One tag per matrix entry**, derived from the existing `tag-suffix`:
-  - Backend builds (`backend_build.yml`): `cache<tag-suffix>`
+- **Tag prefixes**:
+  - Backend builds (`backend_build.yml`) buildkit cache: `cache<tag-suffix>`
     - e.g. `cache-gpu-nvidia-cuda-12-llama-cpp`, `cache-cpu-vllm`, `cache-nvidia-l4t-cuda-13-arm64-vllm`
-  - Root image builds (`image_build.yml`): `cache-localai<tag-suffix>`
+  - Root image builds (`image_build.yml`) buildkit cache: `cache-localai<tag-suffix>`
     - e.g. `cache-localai-gpu-nvidia-cuda-12`, `cache-localai-gpu-vulkan`
-- Each tag stores a multi-arch BuildKit cache manifest (`mode=max`), so every intermediate stage is re-usable, not just the final image.
+  - Layered base builds (`base_images.yml`) buildkit cache: `base-<stem>`
+    - e.g. `base-python-cpu-2404`, `base-cpp-cublas-2404-cuda13.0`
+  - Layered base **images** (the OCI manifests consumers FROM): `base-image-<stem>[-pr<N>]`
+    - e.g. `base-image-python-cpu-2404`, `base-image-cpp-cublas-2404-cuda13.0-pr9672`
+- The cache tags store multi-arch BuildKit cache manifests (`mode=max`); the `base-image-*` tags store ordinary OCI image manifests.
 
 ## Read/write semantics
 
@@ -101,14 +105,22 @@ For ccache, the workflow exports `CMAKE_ARGS=… -DCMAKE_C_COMPILER_LAUNCHER=cca
 
 GitHub Actions caches are limited to 10 GB per repo. Steady-state worst case: ~800 MB Go cache + ~2 GB brew Cellar + up to 2 GB ccache + ~1.5 GB × 5 python backends. If the cap is hit, prefer collapsing the per-backend Python keys into a shared `pyenv-darwin-shared-<week>` key (accepts more cross-backend churn for a smaller footprint) before reducing other caches.
 
-## Layered base images (`localai-base`)
+## Layered base images (`ci-cache:base-image-*`)
 
 The registry-backed BuildKit cache deduplicates **within** a matrix entry's
 cache tag, but each matrix entry has its own tag — so the same `apt-get`,
 GPU SDK install, and language toolchain bootstrap runs into N different
-cache tags across the backend matrix. The `localai-base` images factor that
+cache tags across the backend matrix. The layered base images factor that
 shared work out of the per-backend builds.
 
+They live in the same `quay.io/go-skynet/ci-cache` repo as the buildkit
+caches, under a distinct `base-image-` tag prefix so the OCI image
+manifests coexist with `base-<stem>` (the cache for building the base),
+`cache<tag-suffix>` (per-backend caches), and `cache-localai<tag-suffix>`
+(root image caches). Reusing `ci-cache` means no new quay repo or robot
+grant is needed — the same credentials that write the cache also write
+the image.
+
 ### How it fits together
 
 ```
@@ -128,12 +140,12 @@ backend.yml / backend_pr.yml
   ├── build-bases  (matrix: bases-matrix)
   │     uses base_images.yml
   │       FROM .docker/bases/Dockerfile.<lang>
-  │       pushes quay.io/go-skynet/localai-base:<stem>[-pr<N>]
+  │       pushes quay.io/go-skynet/ci-cache:base-image-<stem>[-pr<N>]
   │
   └── backend-jobs  (matrix: matrix; needs build-bases)
         uses backend_build.yml
           FROM ${BASE_IMAGE_PREBUILT}
-            i.e. quay.io/go-skynet/localai-base:<stem>[-pr<N>]
+            i.e. quay.io/go-skynet/ci-cache:base-image-<stem>[-pr<N>]
           only the backend source COPY + `make` remain.
 ```
 
@@ -160,14 +172,15 @@ The base-image slug is empty for the default `ubuntu:24.04` and a short
 parseable suffix otherwise (`jetpack-r36.4.0`, `rocm-7.2.1`,
 `oneapi-2025.3.2`, etc.).
 
-| Event | Pushed tag |
+| Event | Pushed tag (in `quay.io/go-skynet/ci-cache`) |
 |---|---|
-| `push` (master/tag) | `:<stem>` |
-| `pull_request` | `:<stem>-pr<PR_NUMBER>` |
+| `push` (master/tag) | `:base-image-<stem>` |
+| `pull_request` | `:base-image-<stem>-pr<PR_NUMBER>` |
 
-The cache for the base build itself lives at
+The buildkit cache for the base build itself lives at
 `quay.io/go-skynet/ci-cache:base-<stem>` (`mode=max,ignore-error=true`),
-parallel to the per-matrix-entry caches.
+parallel to the per-matrix-entry caches. The `base-` (cache) and
+`base-image-` (image) prefixes never collide.
 
 The script also runs a collision check across consumers of each stem: if
 two consumers map to the same stem but disagree on `base-image` or
diff --git a/.docker/bases/Dockerfile.cpp b/.docker/bases/Dockerfile.cpp
index e7ab763bb..8219b5574 100644
--- a/.docker/bases/Dockerfile.cpp
+++ b/.docker/bases/Dockerfile.cpp
@@ -4,7 +4,7 @@
 #
 # Built once per (build-type, arch, ubuntu-version, cuda-version) combination
 # by .github/workflows/base_images.yml and pushed to
-# quay.io/go-skynet/localai-base:<tag-stem>[-pr<N>]. Consumed by
+# quay.io/go-skynet/ci-cache:base-image-<tag-stem>[-pr<N>]. Consumed by
 # backend/Dockerfile.{llama-cpp,ik-llama-cpp,turboquant} via the
 # BASE_IMAGE_PREBUILT build-arg. See .agents/ci-caching.md.
 
diff --git a/.docker/bases/Dockerfile.golang b/.docker/bases/Dockerfile.golang
index da9423c79..c103bbda3 100644
--- a/.docker/bases/Dockerfile.golang
+++ b/.docker/bases/Dockerfile.golang
@@ -2,7 +2,7 @@
 #
 # Built once per (build-type, arch, ubuntu-version, cuda-version) combination
 # by .github/workflows/base_images.yml and pushed to
-# quay.io/go-skynet/localai-base:<tag-stem>[-pr<N>]. Consumed by
+# quay.io/go-skynet/ci-cache:base-image-<tag-stem>[-pr<N>]. Consumed by
 # backend/Dockerfile.golang via the BASE_IMAGE_PREBUILT build-arg.
 #
 # Mirrors the GPU stack stanzas in Dockerfile.python; the language-specific
diff --git a/.docker/bases/Dockerfile.python b/.docker/bases/Dockerfile.python
index 76f4b70eb..70cb7ed9d 100644
--- a/.docker/bases/Dockerfile.python
+++ b/.docker/bases/Dockerfile.python
@@ -1,13 +1,10 @@
 # Shared Python + accelerator base image.
 #
 # Built once per (build-type, arch, ubuntu-version, cuda-version) combination
-# by .github/workflows/base_images_python.yml and pushed to
-# quay.io/go-skynet/localai-base:<tag-stem>[-pr<N>]. Consumed by
+# by .github/workflows/base_images.yml and pushed to
+# quay.io/go-skynet/ci-cache:base-image-<tag-stem>[-pr<N>]. Consumed by
 # backend/Dockerfile.python via the BASE_IMAGE_PREBUILT build-arg.
-#
-# Keep the install steps below in lock-step with backend/Dockerfile.python's
-# accel-inline stage until the inline fallback is removed. See
-# .agents/ci-caching.md for the migration plan.
+# See .agents/ci-caching.md.
 
 ARG BASE_IMAGE=ubuntu:24.04
 ARG APT_MIRROR=""
diff --git a/.docker/bases/Dockerfile.rust b/.docker/bases/Dockerfile.rust
index 0201e5978..a9b0e1396 100644
--- a/.docker/bases/Dockerfile.rust
+++ b/.docker/bases/Dockerfile.rust
@@ -1,10 +1,10 @@
 # Shared Rust base image for the kokoros backend.
 #
 # Built once per (ubuntu-version) by .github/workflows/base_images.yml and
-# pushed to quay.io/go-skynet/localai-base:<tag-stem>[-pr<N>]. The current
-# rust matrix is CPU-only, so this base skips the GPU SDK stanzas; if a
-# future rust backend needs cublas/rocm/etc., promote this recipe to mirror
-# Dockerfile.python's GPU stack. See .agents/ci-caching.md.
+# pushed to quay.io/go-skynet/ci-cache:base-image-<tag-stem>[-pr<N>]. The
+# current rust matrix is CPU-only, so this base skips the GPU SDK stanzas;
+# if a future rust backend needs cublas/rocm/etc., promote this recipe to
+# mirror Dockerfile.python's GPU stack. See .agents/ci-caching.md.
 
 ARG BASE_IMAGE=ubuntu:24.04
 ARG APT_MIRROR=""
diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml
index 5f68af3b0..188d3486f 100644
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -66,9 +66,9 @@ on:
       base-image-prebuilt:
         description: |
           Optional reference to a prebuilt accel/lang base image
-          (quay.io/go-skynet/localai-base:<tag>). When set, the backend
-          Dockerfile FROMs this image instead of running the inline
-          bootstrap. See .github/workflows/base_images_python.yml and
+          (quay.io/go-skynet/ci-cache:base-image-<stem>[-pr<N>]). When
+          set, the backend Dockerfile FROMs this image instead of running
+          an inline bootstrap. See .github/workflows/base_images.yml and
           .agents/ci-caching.md.
         required: false
         default: ''
diff --git a/.github/workflows/base_images.yml b/.github/workflows/base_images.yml
index 141c7411b..d36dfc718 100644
--- a/.github/workflows/base_images.yml
+++ b/.github/workflows/base_images.yml
@@ -2,11 +2,14 @@
 name: 'build base image (reusable)'
 
 # Builds and pushes one (lang, accel, arch, ubuntu, cuda) base image flavour
-# to quay.io/go-skynet/localai-base. Consumed by backend builds via the
-# BASE_IMAGE_PREBUILT build-arg. PR builds tag with `-pr${PR_NUMBER}` so the
-# same PR's backend matrix can opt-in to the freshly-built base; master
-# builds overwrite the unsuffixed tag for downstream consumption. See
-# .agents/ci-caching.md for the full tagging scheme.
+# to quay.io/go-skynet/ci-cache:base-image-<stem>[-pr<N>]. Consumed by
+# backend builds via the BASE_IMAGE_PREBUILT build-arg. PR builds tag with
+# `-pr${PR_NUMBER}` so the same PR's backend matrix can opt-in to the
+# freshly-built base; master builds overwrite the unsuffixed tag for
+# downstream consumption. The image lives in the same ci-cache repo as the
+# buildkit cache (under a `base-image-` prefix that doesn't collide with
+# the `base-<stem>` cache prefix), so no separate quay repo + grant is
+# needed. See .agents/ci-caching.md for the full tagging scheme.
 
 on:
   workflow_call:
@@ -98,7 +101,11 @@ jobs:
             tag="${stem}"
           fi
           echo "tag=${tag}" >> "$GITHUB_OUTPUT"
-          echo "ref=quay.io/go-skynet/localai-base:${tag}" >> "$GITHUB_OUTPUT"
+          # Published into the existing ci-cache repo (the CI robot already
+          # has write access there) under a distinct `base-image-` prefix so
+          # the OCI image tags coexist with the buildkit cache tags
+          # (`base-<stem>`, `cache<tag-suffix>`, `cache-localai<tag-suffix>`).
+          echo "ref=quay.io/go-skynet/ci-cache:base-image-${tag}" >> "$GITHUB_OUTPUT"
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
diff --git a/scripts/changed-backends.js b/scripts/changed-backends.js
index 6618a75eb..9876923c1 100644
--- a/scripts/changed-backends.js
+++ b/scripts/changed-backends.js
@@ -164,7 +164,11 @@ function tagStem(item) {
 function prebuiltRef(stem) {
   if (!stem) return "";
   const suffix = isPR ? `-pr${prNumber}` : "";
-  return `quay.io/go-skynet/localai-base:${stem}${suffix}`;
+  // Must match the ref computed in .github/workflows/base_images.yml.
+  // Bases live in the existing ci-cache repo under a distinct
+  // `base-image-` prefix so the CI robot's existing write access there
+  // covers the layered base flow without a new quay repo + grant.
+  return `quay.io/go-skynet/ci-cache:base-image-${stem}${suffix}`;
 }
 
 // Build-types that actually exercise the SKIP_DRIVERS branch in the base