mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-28 10:27:30 -04:00
chore(paged): drop CUDA-12 variants of llama-cpp-localai-paged, keep CUDA-13 only
The paged backend targets Blackwell sm_121a, which CUDA 12.0 cannot target
at all, so the CUDA-12 variants were pointless. They were also broken: the
cublas-12 / nvidia-l4t / arm64 build failed to compile paged-kv-manager.cpp
("no declaration matches ...", a ~10-function mismatch the older
cuda-12-base gcc rejects). CUDA-13 compiles it fine (confirmed on GB10).
Removed (config-only, scoped to the paged backend):
- backend-matrix.yml: the two CUDA-12 paged rows
(-gpu-nvidia-cuda-12-llama-cpp-localai-paged,
-nvidia-l4t-arm64-llama-cpp-localai-paged)
- backend/index.yaml: CUDA-12 capability keys (nvidia-cuda-12,
nvidia-l4t-cuda-12, nvidia-l4t) on both meta-backends, repointed
default/nvidia to the cuda13 amd64 variant, and dropped the orphaned
cuda12-* / nvidia-l4t-arm64-* variant definitions (latest + -development).
Kept CUDA-13 only: cuda13-llama-cpp-localai-paged (amd64) and
cuda13-nvidia-l4t-arm64-llama-cpp-localai-paged (l4t arm64). Matrix
tag-suffixes <-> index variant URIs form a clean 2:2 bijection.
Assisted-by: Claude:opus-4.8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
28
.github/backend-matrix.yml
vendored
28
.github/backend-matrix.yml
vendored
@@ -4886,20 +4886,6 @@ include:
|
||||
# swapped; builder-base-image is left UNCHANGED so these reuse the same
|
||||
# base-grpc-* prebuilt bases (same gRPC + same toolchain), needing no new
|
||||
# base-images.yml variant.
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp-localai-paged'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64'
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-localai-paged"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp-localai-paged"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -4928,20 +4914,6 @@ include:
|
||||
backend: "llama-cpp-localai-paged"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp-localai-paged"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-llama-cpp-localai-paged'
|
||||
builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-l4t-cuda-12-arm64'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "llama-cpp-localai-paged"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp-localai-paged"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
|
||||
# Darwin matrix (consumed by backend-jobs-darwin).
|
||||
includeDarwin:
|
||||
|
||||
@@ -102,12 +102,9 @@
|
||||
# backend. default points at cuda12 (mirrors faster-qwen3-tts) so the gallery
|
||||
# entries always resolve to a CUDA variant.
|
||||
capabilities:
|
||||
default: "cuda12-llama-cpp-localai-paged"
|
||||
nvidia: "cuda12-llama-cpp-localai-paged"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
default: "cuda13-llama-cpp-localai-paged"
|
||||
nvidia: "cuda13-llama-cpp-localai-paged"
|
||||
nvidia-cuda-13: "cuda13-llama-cpp-localai-paged"
|
||||
nvidia-cuda-12: "cuda12-llama-cpp-localai-paged"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
- &ds4
|
||||
name: "ds4"
|
||||
@@ -1679,12 +1676,9 @@
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "llama-cpp-localai-paged-development"
|
||||
capabilities:
|
||||
default: "cuda12-llama-cpp-localai-paged-development"
|
||||
nvidia: "cuda12-llama-cpp-localai-paged-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-localai-paged-development"
|
||||
default: "cuda13-llama-cpp-localai-paged-development"
|
||||
nvidia: "cuda13-llama-cpp-localai-paged-development"
|
||||
nvidia-cuda-13: "cuda13-llama-cpp-localai-paged-development"
|
||||
nvidia-cuda-12: "cuda12-llama-cpp-localai-paged-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-localai-paged-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-localai-paged-development"
|
||||
- !!merge <<: *ds4
|
||||
name: "ds4-development"
|
||||
@@ -2355,16 +2349,6 @@
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-turboquant
|
||||
## llama-cpp-localai-paged (CUDA-only; see backend/cpp/llama-cpp-localai-paged/README.md section 4c)
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "cuda12-llama-cpp-localai-paged"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-localai-paged"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-localai-paged
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "cuda12-llama-cpp-localai-paged-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp-localai-paged"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-12-llama-cpp-localai-paged
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "cuda13-llama-cpp-localai-paged"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-localai-paged"
|
||||
@@ -2375,16 +2359,6 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp-localai-paged"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp-localai-paged
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp-localai-paged
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "nvidia-l4t-arm64-llama-cpp-localai-paged-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp-localai-paged
|
||||
- !!merge <<: *llamacpplocalaipaged
|
||||
name: "cuda13-nvidia-l4t-arm64-llama-cpp-localai-paged"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-localai-paged"
|
||||
|
||||
Reference in New Issue
Block a user