mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-11 18:27:32 -04:00
feat(dllm): purego backend scaffold over the dllm.cpp C-ABI
Binds the 9-symbol flat C-ABI of dllm.cpp (DiffusionGemma engine) via purego: typed wrappers with correct string ownership (malloc'd returns freed via dllm_capi_free_string, borrowed last_error never freed), once-allocated stream-callback trampolines, and a gated Ginkgo binding smoke against the tiny fixture model. Assisted-by: Claude Code (Fable 5) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
10
backend/go/dllm/.gitignore
vendored
Normal file
10
backend/go/dllm/.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
.cache/
|
||||
sources/
|
||||
build/
|
||||
package/
|
||||
dllm-grpc
|
||||
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||
# symlinked for local dev; the real sources live in dllm.cpp upstream.
|
||||
*.so
|
||||
*.so.*
|
||||
compile_commands.json
|
||||
89
backend/go/dllm/Makefile
Normal file
89
backend/go/dllm/Makefile
Normal file
@@ -0,0 +1,89 @@
|
||||
# dllm backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as DLLM_VERSION?=<sha> so .github/bump_deps.sh
|
||||
# can find and update it - matches the whisper.cpp / parakeet-cpp / ds4
|
||||
# convention.
|
||||
#
|
||||
# Local dev shortcut: if you already have an out-of-tree dllm.cpp build,
|
||||
# you can symlink the .so into this directory and skip the clone/cmake
|
||||
# steps entirely, e.g.:
|
||||
#
|
||||
# ln -sf /path/to/dllm.cpp/build/libdllm.so .
|
||||
# go build -o dllm-grpc .
|
||||
#
|
||||
# That's what the gated C-ABI binding smoke uses (DLLM_TEST_LIBRARY). The
|
||||
# default target below does the proper clone-at-pin + cmake build so CI
|
||||
# doesn't need a side-checkout.
|
||||
|
||||
DLLM_VERSION?=b22fcebebfb225131113188599a9ae542b2935d7
|
||||
DLLM_REPO?=https://github.com/mudler/dllm.cpp
|
||||
|
||||
GOCMD?=go
|
||||
GO_TAGS?=
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
|
||||
# libdllm.so is self-contained: dllm.cpp's CMakeLists statically absorbs ggml
|
||||
# (BUILD_SHARED_LIBS=OFF + PIC) into the shared lib, so dlopen needs no
|
||||
# libggml*.so alongside it, only system libs (libstdc++/libgomp/libc) the
|
||||
# runtime image already provides. Tests/CLI are upstream-only concerns.
|
||||
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DDLLM_BUILD_TESTS=OFF
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# Same arch set the sibling ggml backends (acestep/vibevoice/qwen3-tts) bake
|
||||
# for their cublas images; override for a native build.
|
||||
CUDA_ARCHITECTURES?=75-virtual;80-virtual;86-real;89-real
|
||||
|
||||
# dllm.cpp gates CUDA behind DLLM_CUDA (set(GGML_CUDA ... CACHE FORCE)), so
|
||||
# forward that instead of a bare -DGGML_CUDA=ON.
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DDLLM_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="$(CUDA_ARCHITECTURES)"
|
||||
endif
|
||||
|
||||
.PHONY: dllm-grpc package build clean purge test all
|
||||
|
||||
all: dllm-grpc
|
||||
|
||||
# Clone the upstream dllm.cpp source at the pinned commit (ggml comes in as
|
||||
# a submodule). Directory acts as the target so make only re-clones when
|
||||
# missing. After a DLLM_VERSION bump, run 'make purge && make' to refetch.
|
||||
sources/dllm.cpp:
|
||||
mkdir -p sources/dllm.cpp
|
||||
cd sources/dllm.cpp && \
|
||||
git init -q && \
|
||||
git remote add origin $(DLLM_REPO) && \
|
||||
git fetch --depth 1 origin $(DLLM_VERSION) && \
|
||||
git checkout FETCH_HEAD && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
# Build the shared lib out-of-tree, then stage it next to the Go sources so
|
||||
# purego.Dlopen("libdllm.so") and the packaging step both pick it up.
|
||||
libdllm.so: sources/dllm.cpp
|
||||
cmake -B sources/dllm.cpp/build -S sources/dllm.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/dllm.cpp/build --config Release -j$(JOBS)
|
||||
cp -fv sources/dllm.cpp/build/libdllm.so ./
|
||||
|
||||
dllm-grpc: libdllm.so main.go capi.go
|
||||
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o dllm-grpc .
|
||||
|
||||
package: dllm-grpc
|
||||
bash package.sh
|
||||
|
||||
build: package
|
||||
|
||||
# Test target. The C-ABI binding smoke is gated on DLLM_TEST_LIBRARY +
|
||||
# DLLM_TEST_TINY_MODEL; without them the gated specs auto-skip and only the
|
||||
# pure-Go helper specs run.
|
||||
test:
|
||||
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||
|
||||
clean: purge
|
||||
rm -rf libdllm.so* package dllm-grpc
|
||||
|
||||
purge:
|
||||
rm -rf sources/dllm.cpp
|
||||
265
backend/go/dllm/capi.go
Normal file
265
backend/go/dllm/capi.go
Normal file
@@ -0,0 +1,265 @@
|
||||
package main
|
||||
|
||||
// Typed Go wrappers over dllm.cpp's flat C-ABI (include/dllm_capi.h, ABI v1).
|
||||
//
|
||||
// Contract highlights the wrappers encode (see the header + src/capi.cpp):
|
||||
// - tokenize_json/generate return malloc'd char* the CALLER owns: bound as
|
||||
// uintptr, copied with goStringFromCPtr, released via dllm_capi_free_string.
|
||||
// - last_error returns a BORROWED pointer (valid until the next call on the
|
||||
// same ctx): bound as a plain string (purego copies), never freed, and only
|
||||
// read AFTER the failing call has returned - reading it while a generate is
|
||||
// in flight on the same ctx violates the per-ctx serialization contract.
|
||||
// - All entry points except dllm_capi_cancel must be externally serialized
|
||||
// per ctx (one ctx = one concurrent generate/tokenize). Cancel only flips
|
||||
// an atomic and may be called from any goroutine mid-generate.
|
||||
// - No C++ exception crosses the boundary; failures land in last_error.
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
// dllmABIVersion is the DLLM_CAPI_ABI_VERSION this binding was written
|
||||
// against; main.go refuses to start against a libdllm.so reporting another.
|
||||
const dllmABIVersion = 1
|
||||
|
||||
// purego-bound entry points from libdllm.so. Names match dllm_capi.h
|
||||
// exactly; loadCAPI (main.go) fills these in at boot.
|
||||
var (
|
||||
cppAbiVersion func() int32
|
||||
cppLoad func(ggufPath, paramsJSON string) uintptr
|
||||
cppFree func(ctx uintptr)
|
||||
cppLastError func(ctx uintptr) string // borrowed pointer: purego copies, do NOT free
|
||||
cppFreeString func(s uintptr)
|
||||
// malloc'd char* returns, hence uintptr (see loadCAPI's doc comment).
|
||||
cppTokenizeJSON func(ctx uintptr, text string) uintptr
|
||||
cppGenerate func(ctx uintptr, prompt, optsJSON string) uintptr
|
||||
// on_block/on_step are C function pointers produced by purego.NewCallback;
|
||||
// userData carries the streamCallStates registry key.
|
||||
cppGenerateStream func(ctx uintptr, prompt, optsJSON string, onBlock, onStep, userData uintptr) int32
|
||||
cppCancel func(ctx uintptr)
|
||||
)
|
||||
|
||||
// Dllm is the LocalAI gRPC backend over the dllm.cpp C-ABI. T1 ships only
|
||||
// the binding scaffold; Load/PredictRich/PredictStreamRich (and the move to
|
||||
// a dedicated dllm.go with the per-model worker goroutine) land in T4.
|
||||
type Dllm struct {
|
||||
base.Base
|
||||
}
|
||||
|
||||
// Load is not wired yet: the binding smoke drives the C functions directly.
|
||||
func (d *Dllm) Load(opts *pb.ModelOptions) error {
|
||||
return errors.New("dllm: model loading not implemented yet (backend wiring lands in T4)")
|
||||
}
|
||||
|
||||
// cAbiVersion returns the library's DLLM_CAPI_ABI_VERSION.
|
||||
func cAbiVersion() int32 {
|
||||
return cppAbiVersion()
|
||||
}
|
||||
|
||||
// cLoad opens the GGUF at path with the flat params JSON (e.g.
|
||||
// {"n_gpu_layers":99}). Returns 0 on failure; per the header contract there
|
||||
// is no ctx to carry the reason, the C side logs it to stderr (and
|
||||
// cLastError(0) only yields the static NULL-ctx message).
|
||||
func cLoad(path, paramsJSON string) uintptr {
|
||||
return cppLoad(path, paramsJSON)
|
||||
}
|
||||
|
||||
// cFree releases a ctx; safe on 0 (delete nullptr).
|
||||
func cFree(h uintptr) {
|
||||
cppFree(h)
|
||||
}
|
||||
|
||||
// cLastError returns the ctx's last error message (or the static NULL-ctx
|
||||
// message for h==0). The C pointer is borrowed and only valid until the next
|
||||
// call on the same ctx; purego's string return copies it immediately, so the
|
||||
// returned Go string is safe to keep. Must not be called while another call
|
||||
// on the same ctx is in flight.
|
||||
func cLastError(h uintptr) string {
|
||||
return cppLastError(h)
|
||||
}
|
||||
|
||||
// lastErrorOr is cLastError with a fallback for the empty-message case, so
|
||||
// wrapped errors never end in ": ".
|
||||
func lastErrorOr(h uintptr, fallback string) string {
|
||||
if msg := cLastError(h); msg != "" {
|
||||
return msg
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
// cTokenizeJSON tokenizes text (the C side prepends bos per vocab.add_bos)
|
||||
// and returns the token ids as a JSON array string, e.g. "[2,18]".
|
||||
func cTokenizeJSON(h uintptr, text string) (string, error) {
|
||||
ret := cppTokenizeJSON(h, text)
|
||||
if ret == 0 {
|
||||
return "", fmt.Errorf("dllm: tokenize failed: %s", lastErrorOr(h, "unknown error"))
|
||||
}
|
||||
out := goStringFromCPtr(ret)
|
||||
cppFreeString(ret)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// cGenerate runs a blocking generation and returns the detokenized text.
|
||||
// optsJSON must be a FLAT JSON object of scalars (use buildOptsJSON); the C
|
||||
// parser rejects nested objects/arrays. NULL return -> last_error (read only
|
||||
// after the call returned, per the serialization contract); a cancelled call
|
||||
// surfaces as the "cancelled" message.
|
||||
func cGenerate(h uintptr, prompt, optsJSON string) (string, error) {
|
||||
ret := cppGenerate(h, prompt, optsJSON)
|
||||
if ret == 0 {
|
||||
return "", fmt.Errorf("dllm: generate failed: %s", lastErrorOr(h, "unknown error"))
|
||||
}
|
||||
out := goStringFromCPtr(ret)
|
||||
cppFreeString(ret)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// streamCallState carries the Go callbacks for one in-flight
|
||||
// cGenerateStream call; the registry key travels through C as user_data.
|
||||
// The map shape mirrors the whisper backend's streamCallStates: only one
|
||||
// entry per ctx is ever live (the C-ABI is serialized per ctx), but keying
|
||||
// by call survives multiple models/processes sharing the package.
|
||||
type streamCallState struct {
|
||||
onBlock func(text string)
|
||||
onStep func(step, total int, preview string)
|
||||
}
|
||||
|
||||
var (
|
||||
streamCallStates sync.Map // uint64 -> *streamCallState
|
||||
streamCallSeq atomic.Uint64
|
||||
|
||||
// purego.NewCallback allocates a finite, never-released callback slot, so
|
||||
// the two trampolines are created exactly once and reused across calls.
|
||||
streamCbOnce sync.Once
|
||||
blockCbPtr uintptr
|
||||
stepCbPtr uintptr
|
||||
)
|
||||
|
||||
// onBlockTrampoline is the Go side of dllm_block_cb. It runs on the C
|
||||
// calling thread, mid-generate: keep it tiny and non-blocking (callers that
|
||||
// bridge to goroutines must hand off via buffered channels). The text
|
||||
// pointer is only valid for the duration of the invocation, so it is copied
|
||||
// to a Go string immediately.
|
||||
func onBlockTrampoline(text uintptr, userData uintptr) {
|
||||
v, ok := streamCallStates.Load(uint64(userData))
|
||||
if !ok {
|
||||
return // call already torn down
|
||||
}
|
||||
state := v.(*streamCallState)
|
||||
if state.onBlock != nil {
|
||||
state.onBlock(goStringFromCPtr(text))
|
||||
}
|
||||
}
|
||||
|
||||
// onStepTrampoline is the Go side of dllm_step_cb; same threading and
|
||||
// lifetime caveats as onBlockTrampoline.
|
||||
func onStepTrampoline(step int32, totalSteps int32, canvasPreview uintptr, userData uintptr) {
|
||||
v, ok := streamCallStates.Load(uint64(userData))
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
state := v.(*streamCallState)
|
||||
if state.onStep != nil {
|
||||
state.onStep(int(step), int(totalSteps), goStringFromCPtr(canvasPreview))
|
||||
}
|
||||
}
|
||||
|
||||
// cGenerateStream runs a generation with per-committed-block (onBlock) and
|
||||
// per-denoising-step (onStep) callbacks; either may be nil. The callbacks
|
||||
// run on the C thread (see the trampoline docs). Returns an error carrying
|
||||
// last_error on failure; cancellation surfaces as the "cancelled" message.
|
||||
func cGenerateStream(h uintptr, prompt, optsJSON string, onBlock func(text string), onStep func(step, total int, preview string)) error {
|
||||
streamCbOnce.Do(func() {
|
||||
blockCbPtr = purego.NewCallback(onBlockTrampoline)
|
||||
stepCbPtr = purego.NewCallback(onStepTrampoline)
|
||||
})
|
||||
|
||||
id := streamCallSeq.Add(1)
|
||||
streamCallStates.Store(id, &streamCallState{onBlock: onBlock, onStep: onStep})
|
||||
defer streamCallStates.Delete(id)
|
||||
|
||||
// Pass NULL for absent callbacks so the C side skips the per-block /
|
||||
// per-step detokenize work entirely.
|
||||
var blockPtr, stepPtr uintptr
|
||||
if onBlock != nil {
|
||||
blockPtr = blockCbPtr
|
||||
}
|
||||
if onStep != nil {
|
||||
stepPtr = stepCbPtr
|
||||
}
|
||||
|
||||
if rc := cppGenerateStream(h, prompt, optsJSON, blockPtr, stepPtr, uintptr(id)); rc != 0 {
|
||||
return fmt.Errorf("dllm: generate_stream failed: %s", lastErrorOr(h, "unknown error"))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cCancel requests cancellation of the in-flight generate on h. This is the
|
||||
// ONE entry point safe to call from any goroutine while a generate runs (it
|
||||
// only flips an atomic). Note the cancel-reset race from the header: each
|
||||
// generate resets the flag on entry, so a watchdog should re-issue cancel if
|
||||
// the call has not returned.
|
||||
func cCancel(h uintptr) {
|
||||
cppCancel(h)
|
||||
}
|
||||
|
||||
// buildOptsJSON renders generation options as the flat JSON object the
|
||||
// C-ABI expects (known keys: n_predict, blocks, seed, eb_*, kv_cache). The
|
||||
// C-side scanner only understands scalar number/string values and rejects
|
||||
// nested objects/arrays loudly; bools are rejected here too because the
|
||||
// scanner has no concept of them. Fail loud rather than let an option be
|
||||
// silently misread.
|
||||
func buildOptsJSON(opts map[string]any) (string, error) {
|
||||
if len(opts) == 0 {
|
||||
return "{}", nil
|
||||
}
|
||||
for k, v := range opts {
|
||||
switch v.(type) {
|
||||
case string,
|
||||
int, int8, int16, int32, int64,
|
||||
uint, uint8, uint16, uint32, uint64,
|
||||
float32, float64,
|
||||
json.Number:
|
||||
// scalar: fine
|
||||
default:
|
||||
return "", fmt.Errorf("dllm: opts key %q has non-scalar value %T (the C-ABI only accepts flat number/string scalars)", k, v)
|
||||
}
|
||||
}
|
||||
b, err := json.Marshal(opts)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("dllm: marshal opts: %w", err)
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is
|
||||
// the raw pointer returned by purego from the C-ABI (a malloc'd buffer the
|
||||
// caller owns, or a callback argument only valid during the invocation);
|
||||
// owning callers must free it via cppFreeString after the copy lands.
|
||||
//
|
||||
// The uintptr->unsafe.Pointer conversion below trips go vet's unsafeptr
|
||||
// check, which can't distinguish a C-owned heap pointer from Go-managed
|
||||
// memory. It is safe here: the pointer addresses C memory the Go GC neither
|
||||
// tracks nor moves, and we dereference it immediately to copy the bytes out,
|
||||
// the same pattern (and the same tolerated warning) as the parakeet-cpp and
|
||||
// whisper backends.
|
||||
func goStringFromCPtr(cptr uintptr) string {
|
||||
if cptr == 0 {
|
||||
return ""
|
||||
}
|
||||
p := unsafe.Pointer(cptr) //nolint:govet // C-owned buffer, not Go-GC memory (see doc above)
|
||||
n := 0
|
||||
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||
n++
|
||||
}
|
||||
return string(unsafe.Slice((*byte)(p), n))
|
||||
}
|
||||
144
backend/go/dllm/dllm_test.go
Normal file
144
backend/go/dllm/dllm_test.go
Normal file
@@ -0,0 +1,144 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
"unsafe"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestDllm(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "dllm Backend Suite")
|
||||
}
|
||||
|
||||
var (
|
||||
libLoadOnce sync.Once
|
||||
libLoadErr error
|
||||
)
|
||||
|
||||
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the
|
||||
// C-ABI bridge without spinning up the gRPC server. The library path comes
|
||||
// from DLLM_TEST_LIBRARY (gated specs Skip when it is unset).
|
||||
func ensureLibLoaded() {
|
||||
libLoadOnce.Do(func() {
|
||||
libLoadErr = loadCAPI(os.Getenv("DLLM_TEST_LIBRARY"))
|
||||
})
|
||||
}
|
||||
|
||||
// C-ABI binding smoke: drives the real libdllm.so against the tiny GGUF
|
||||
// fixture from dllm.cpp (tests/fixtures/tiny_with_vocab.gguf). Gated on:
|
||||
//
|
||||
// DLLM_TEST_LIBRARY absolute path to libdllm.so
|
||||
// DLLM_TEST_TINY_MODEL absolute path to tiny_with_vocab.gguf
|
||||
var _ = Describe("C-ABI binding", func() {
|
||||
BeforeEach(func() {
|
||||
if os.Getenv("DLLM_TEST_LIBRARY") == "" || os.Getenv("DLLM_TEST_TINY_MODEL") == "" {
|
||||
Skip("set DLLM_TEST_LIBRARY and DLLM_TEST_TINY_MODEL to run the C-ABI binding smoke")
|
||||
}
|
||||
ensureLibLoaded()
|
||||
Expect(libLoadErr).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
It("binds the 9 symbols and round-trips the tiny model", func() {
|
||||
Expect(cAbiVersion()).To(Equal(int32(1)))
|
||||
|
||||
h := cLoad(os.Getenv("DLLM_TEST_TINY_MODEL"), "{}")
|
||||
Expect(h).ToNot(BeZero(), "dllm_capi_load of the tiny fixture")
|
||||
|
||||
// Tiny fixture vocab: "hello" tokenizes to ids [2,18] (bos prepended
|
||||
// by the C side: vocab.add_bos).
|
||||
toks, err := cTokenizeJSON(h, "hello")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(toks).To(Equal("[2,18]"))
|
||||
|
||||
// Deterministic generation: an explicit non-negative seed seeds
|
||||
// mt19937, so two identical calls must produce identical text.
|
||||
out1, err := cGenerate(h, "hello", `{"n_predict":16,"seed":7}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(out1).ToNot(BeEmpty())
|
||||
// Cancel with no call in flight is dropped: each generate resets the
|
||||
// cancel flag on entry (header contract), so this must not affect
|
||||
// the next call. Also binds the 9th symbol; safe on NULL too.
|
||||
cCancel(h)
|
||||
cCancel(0)
|
||||
|
||||
out2, err := cGenerate(h, "hello", `{"n_predict":16,"seed":7}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(out2).To(Equal(out1))
|
||||
|
||||
// Streaming variant: same opts, blocks arrive via the purego
|
||||
// callback trampoline. The per-block detokenize can differ from the
|
||||
// seamless full-text decode at block boundaries, so only assert that
|
||||
// blocks arrived and were non-trivial, not byte equality with out1.
|
||||
var blocks []string
|
||||
var steps int
|
||||
err = cGenerateStream(h, "hello", `{"n_predict":16,"seed":7}`,
|
||||
func(text string) { blocks = append(blocks, text) },
|
||||
func(step, total int, preview string) { steps++ },
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(blocks).ToNot(BeEmpty())
|
||||
Expect(steps).To(BeNumerically(">", 0))
|
||||
|
||||
// Load failure path: NULL ctx back, and last_error(NULL) returns the
|
||||
// static NULL-ctx message (there is no ctx to carry the real reason).
|
||||
bad := cLoad("/nonexistent/dllm-model.gguf", "{}")
|
||||
Expect(bad).To(BeZero())
|
||||
Expect(cLastError(0)).ToNot(BeEmpty())
|
||||
|
||||
// Free is safe on a live handle and a NULL one (delete nullptr).
|
||||
cFree(h)
|
||||
cFree(0)
|
||||
})
|
||||
})
|
||||
|
||||
// Ungated specs for the pure-Go helpers (no libdllm.so required).
|
||||
var _ = Describe("buildOptsJSON", func() {
|
||||
It("renders flat scalars as a JSON object", func() {
|
||||
out, err := buildOptsJSON(map[string]any{
|
||||
"n_predict": 16,
|
||||
"seed": int64(7),
|
||||
"eb_t_min": 0.5,
|
||||
"kv_cache": "auto",
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(out).To(MatchJSON(`{"n_predict":16,"seed":7,"eb_t_min":0.5,"kv_cache":"auto"}`))
|
||||
})
|
||||
|
||||
It("renders an empty object for no options", func() {
|
||||
out, err := buildOptsJSON(nil)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(out).To(Equal("{}"))
|
||||
})
|
||||
|
||||
It("rejects nested objects (the C-side scanner only reads flat scalars)", func() {
|
||||
_, err := buildOptsJSON(map[string]any{"sampler": map[string]any{"seed": 1}})
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
|
||||
It("rejects arrays", func() {
|
||||
_, err := buildOptsJSON(map[string]any{"stop": []string{"a"}})
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
|
||||
It("rejects booleans (the C-side scanner only understands numbers and strings)", func() {
|
||||
_, err := buildOptsJSON(map[string]any{"flag": true})
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("goStringFromCPtr", func() {
|
||||
It("copies a NUL-terminated buffer", func() {
|
||||
buf := []byte("dllm\x00")
|
||||
s := goStringFromCPtr(uintptr(unsafe.Pointer(&buf[0])))
|
||||
Expect(s).To(Equal("dllm"))
|
||||
})
|
||||
|
||||
It("returns the empty string for NULL", func() {
|
||||
Expect(goStringFromCPtr(0)).To(Equal(""))
|
||||
})
|
||||
})
|
||||
85
backend/go/dllm/main.go
Normal file
85
backend/go/dllm/main.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package main
|
||||
|
||||
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||
//
|
||||
// Loads libdllm.so via purego and registers the 9-symbol flat C-ABI
|
||||
// declared in dllm.cpp's include/dllm_capi.h (ABI v1). The library name can
|
||||
// be overridden with DLLM_LIBRARY (mirrors the PARAKEET_LIBRARY /
|
||||
// WHISPER_LIBRARY convention in the sibling backends); the default looks
|
||||
// for the .so next to this binary (run.sh puts the package dir on
|
||||
// LD_LIBRARY_PATH).
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
type LibFuncs struct {
|
||||
FuncPtr any
|
||||
Name string
|
||||
}
|
||||
|
||||
// loadCAPI dlopens libName and binds the 9 dllm_capi_* entry points 1:1 to
|
||||
// dllm_capi.h, so an `nm libdllm.so | grep dllm_capi` is enough to spot
|
||||
// drift. Shared with the test suite (ensureLibLoaded), which drives the
|
||||
// bridge without the gRPC server.
|
||||
//
|
||||
// The C-ABI returns malloc'd char* buffers from tokenize_json/generate; we
|
||||
// register those as uintptr so we get the raw pointer back and can call
|
||||
// dllm_capi_free_string on it (purego's string return would copy and forget
|
||||
// the original pointer, leaking it on every call). last_error returns a
|
||||
// BORROWED pointer instead, so it is registered as a plain string: purego
|
||||
// copies it and nothing must be freed.
|
||||
func loadCAPI(libName string) error {
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dllm: dlopen %q: %w", libName, err)
|
||||
}
|
||||
|
||||
libFuncs := []LibFuncs{
|
||||
{&cppAbiVersion, "dllm_capi_abi_version"},
|
||||
{&cppLoad, "dllm_capi_load"},
|
||||
{&cppFree, "dllm_capi_free"},
|
||||
{&cppLastError, "dllm_capi_last_error"},
|
||||
{&cppFreeString, "dllm_capi_free_string"},
|
||||
{&cppTokenizeJSON, "dllm_capi_tokenize_json"},
|
||||
{&cppGenerate, "dllm_capi_generate"},
|
||||
{&cppGenerateStream, "dllm_capi_generate_stream"},
|
||||
{&cppCancel, "dllm_capi_cancel"},
|
||||
}
|
||||
for _, lf := range libFuncs {
|
||||
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
libName := os.Getenv("DLLM_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libdllm.so"
|
||||
}
|
||||
|
||||
if err := loadCAPI(libName); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Hard-fail on an ABI mismatch: the flat-pointer bindings above would
|
||||
// otherwise misbehave silently against a future libdllm.so.
|
||||
if v := cAbiVersion(); v != dllmABIVersion {
|
||||
panic(fmt.Errorf("dllm: libdllm.so ABI=%d, this backend speaks ABI=%d", v, dllmABIVersion))
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "[dllm] ABI=%d\n", cAbiVersion())
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Dllm{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
24
backend/go/dllm/package.sh
Executable file
24
backend/go/dllm/package.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# T1 packaging stub: copy the binary, run.sh and libdllm.so into package/.
|
||||
# The full ldd walk (libc, libstdc++, libgomp, GPU runtimes, arch
|
||||
# detection) lands with the registration task, mirroring
|
||||
# backend/go/whisper/package.sh.
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
mkdir -p "$CURDIR/package/lib"
|
||||
|
||||
cp -avf "$CURDIR/dllm-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
# libdllm.so + any soname symlinks, should upstream ever add them.
|
||||
cp -avf "$CURDIR"/libdllm.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libdllm.so not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "T1 package layout (full ldd walk lands with registration):"
|
||||
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||
16
backend/go/dllm/run.sh
Executable file
16
backend/go/dllm/run.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the
|
||||
# packaged libc / libstdc++ are used instead of the host's (matches the
|
||||
# whisper / parakeet-cpp backends' runtime layout).
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/dllm-grpc" "$@"
|
||||
fi
|
||||
|
||||
exec "$CURDIR/dllm-grpc" "$@"
|
||||
Reference in New Issue
Block a user