feat(dllm): backend packaging, gallery index, CI matrix

Registers the dllm backend across every surface: backend gallery index
(cpu amd64+arm64 with manifest merge, cuda13, l4t-cuda13 for GB10-class
hardware; no darwin per engine scope), top-level Makefile targets,
bump_deps pin tracking for DLLM_VERSION, and the curated known-backends
list for /backends/known (pref-only: auto-detecting on .gguf would
shadow llama-cpp). Note: image builds and the nightly bump leg stay red
until github.com/mudler/dllm.cpp is published (planned at merge time).

Assisted-by: Claude Code (Fable 5)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-06-11 17:05:18 +00:00
parent 99184809fa
commit 52b3b68cea
8 changed files with 136 additions and 3 deletions

4
backend/go/dllm/Makefile Normal file → Executable file
View File

@@ -14,6 +14,10 @@
# That's what the gated C-ABI binding smoke uses (DLLM_TEST_LIBRARY). The
# default target below does the proper clone-at-pin + cmake build so CI
# doesn't need a side-checkout.
#
# NOTE: github.com/mudler/dllm.cpp is still private (publishing is planned);
# until then the anonymous clone below fails. Use the symlink shortcut above
# with a local checkout, or a git credential helper with access to the repo.
DLLM_VERSION?=b22fcebebfb225131113188599a9ae542b2935d7
DLLM_REPO?=https://github.com/mudler/dllm.cpp

View File

@@ -275,8 +275,8 @@ func (d *Dllm) requestOptsJSON(opts *pb.PredictOptions) (string, error) {
// The engine rounds n_predict UP to a whole number of diffusion
// blocks (the canvas is denoised block-wise), so the completion may
// run slightly past the requested budget. Tokens==0 omits the key so
// the engine's GGUF-metadata default applies (the C-ABI documents
// per-key defaults; no hardcoded 256 like ds4's grpc-server).
// the C-ABI default of 256 applies (hardcoded in capi.cpp's
// parse_gen_opts, independent of canvas_length).
m["n_predict"] = n
}
if s := opts.GetSeed(); s > 0 {

View File

@@ -95,6 +95,29 @@
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ds4"
metal: "metal-ds4"
metal-darwin-arm64: "metal-ds4"
- &dllm
name: "dllm"
alias: "dllm"
license: mit
description: |
mudler/dllm.cpp - DiffusionGemma block-diffusion LLM inference engine
(C++/ggml, GGUF weights). Decodes whole token canvases per diffusion
round instead of autoregressive sampling. Runs on CPU and NVIDIA CUDA 13
(including Jetson/GB10 L4T targets).
urls:
- https://github.com/mudler/dllm.cpp
tags:
- text-to-text
- LLM
- gguf
- diffusion
- CPU
- CUDA
capabilities:
default: "cpu-dllm"
nvidia: "cuda13-dllm"
nvidia-cuda-13: "cuda13-dllm"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-dllm"
- &whispercpp
name: "whisper"
alias: "whisper"
@@ -1272,6 +1295,13 @@
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ds4-development"
metal: "metal-ds4-development"
metal-darwin-arm64: "metal-ds4-development"
- !!merge <<: *dllm
name: "dllm-development"
capabilities:
default: "cpu-dllm-development"
nvidia: "cuda13-dllm-development"
nvidia-cuda-13: "cuda13-dllm-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-dllm-development"
- !!merge <<: *stablediffusionggml
name: "stablediffusion-ggml-development"
capabilities:
@@ -1859,6 +1889,37 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-ds4"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-ds4
## dllm
- !!merge <<: *dllm
name: "cpu-dllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-dllm"
mirrors:
- localai/localai-backends:latest-cpu-dllm
- !!merge <<: *dllm
name: "cpu-dllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-dllm"
mirrors:
- localai/localai-backends:master-cpu-dllm
- !!merge <<: *dllm
name: "cuda13-dllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-dllm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-dllm
- !!merge <<: *dllm
name: "cuda13-dllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-dllm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-dllm
- !!merge <<: *dllm
name: "cuda13-nvidia-l4t-arm64-dllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-dllm"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-dllm
- !!merge <<: *dllm
name: "cuda13-nvidia-l4t-arm64-dllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-dllm"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-dllm
## whisper
- !!merge <<: *whispercpp
name: "whisper-development"