mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 02:38:19 -04:00
feat(dllm): backend packaging, gallery index, CI matrix
Registers the dllm backend across every surface: backend gallery index (cpu amd64+arm64 with manifest merge, cuda13, l4t-cuda13 for GB10-class hardware; no darwin per engine scope), top-level Makefile targets, bump_deps pin tracking for DLLM_VERSION, and the curated known-backends list for /backends/known (pref-only: auto-detecting on .gguf would shadow llama-cpp). Note: image builds and the nightly bump leg stay red until github.com/mudler/dllm.cpp is published (planned at merge time). Assisted-by: Claude Code (Fable 5) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
4
backend/go/dllm/Makefile
Normal file → Executable file
4
backend/go/dllm/Makefile
Normal file → Executable file
@@ -14,6 +14,10 @@
|
||||
# That's what the gated C-ABI binding smoke uses (DLLM_TEST_LIBRARY). The
|
||||
# default target below does the proper clone-at-pin + cmake build so CI
|
||||
# doesn't need a side-checkout.
|
||||
#
|
||||
# NOTE: github.com/mudler/dllm.cpp is still private (publishing is planned);
|
||||
# until then the anonymous clone below fails. Use the symlink shortcut above
|
||||
# with a local checkout, or a git credential helper with access to the repo.
|
||||
|
||||
DLLM_VERSION?=b22fcebebfb225131113188599a9ae542b2935d7
|
||||
DLLM_REPO?=https://github.com/mudler/dllm.cpp
|
||||
|
||||
@@ -275,8 +275,8 @@ func (d *Dllm) requestOptsJSON(opts *pb.PredictOptions) (string, error) {
|
||||
// The engine rounds n_predict UP to a whole number of diffusion
|
||||
// blocks (the canvas is denoised block-wise), so the completion may
|
||||
// run slightly past the requested budget. Tokens==0 omits the key so
|
||||
// the engine's GGUF-metadata default applies (the C-ABI documents
|
||||
// per-key defaults; no hardcoded 256 like ds4's grpc-server).
|
||||
// the C-ABI default of 256 applies (hardcoded in capi.cpp's
|
||||
// parse_gen_opts, independent of canvas_length).
|
||||
m["n_predict"] = n
|
||||
}
|
||||
if s := opts.GetSeed(); s > 0 {
|
||||
|
||||
@@ -95,6 +95,29 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ds4"
|
||||
metal: "metal-ds4"
|
||||
metal-darwin-arm64: "metal-ds4"
|
||||
- &dllm
|
||||
name: "dllm"
|
||||
alias: "dllm"
|
||||
license: mit
|
||||
description: |
|
||||
mudler/dllm.cpp - DiffusionGemma block-diffusion LLM inference engine
|
||||
(C++/ggml, GGUF weights). Decodes whole token canvases per diffusion
|
||||
round instead of autoregressive sampling. Runs on CPU and NVIDIA CUDA 13
|
||||
(including Jetson/GB10 L4T targets).
|
||||
urls:
|
||||
- https://github.com/mudler/dllm.cpp
|
||||
tags:
|
||||
- text-to-text
|
||||
- LLM
|
||||
- gguf
|
||||
- diffusion
|
||||
- CPU
|
||||
- CUDA
|
||||
capabilities:
|
||||
default: "cpu-dllm"
|
||||
nvidia: "cuda13-dllm"
|
||||
nvidia-cuda-13: "cuda13-dllm"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-dllm"
|
||||
- &whispercpp
|
||||
name: "whisper"
|
||||
alias: "whisper"
|
||||
@@ -1272,6 +1295,13 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ds4-development"
|
||||
metal: "metal-ds4-development"
|
||||
metal-darwin-arm64: "metal-ds4-development"
|
||||
- !!merge <<: *dllm
|
||||
name: "dllm-development"
|
||||
capabilities:
|
||||
default: "cpu-dllm-development"
|
||||
nvidia: "cuda13-dllm-development"
|
||||
nvidia-cuda-13: "cuda13-dllm-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-dllm-development"
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "stablediffusion-ggml-development"
|
||||
capabilities:
|
||||
@@ -1859,6 +1889,37 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-ds4"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-ds4
|
||||
## dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cpu-dllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cpu-dllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cuda13-dllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cuda13-dllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cuda13-nvidia-l4t-arm64-dllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-dllm
|
||||
- !!merge <<: *dllm
|
||||
name: "cuda13-nvidia-l4t-arm64-dllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-dllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-dllm
|
||||
## whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "whisper-development"
|
||||
|
||||
Reference in New Issue
Block a user