diff --git a/backend/go/dllm/.gitignore b/backend/go/dllm/.gitignore new file mode 100644 index 000000000..5b1edf6d3 --- /dev/null +++ b/backend/go/dllm/.gitignore @@ -0,0 +1,10 @@ +.cache/ +sources/ +build/ +package/ +dllm-grpc +# build artifacts staged in-tree by the Makefile (cp from sources/) or +# symlinked for local dev; the real sources live in dllm.cpp upstream. +*.so +*.so.* +compile_commands.json diff --git a/backend/go/dllm/Makefile b/backend/go/dllm/Makefile new file mode 100644 index 000000000..3b7114c12 --- /dev/null +++ b/backend/go/dllm/Makefile @@ -0,0 +1,89 @@ +# dllm backend Makefile. +# +# Upstream pin lives below as DLLM_VERSION?= so .github/bump_deps.sh +# can find and update it - matches the whisper.cpp / parakeet-cpp / ds4 +# convention. +# +# Local dev shortcut: if you already have an out-of-tree dllm.cpp build, +# you can symlink the .so into this directory and skip the clone/cmake +# steps entirely, e.g.: +# +# ln -sf /path/to/dllm.cpp/build/libdllm.so . +# go build -o dllm-grpc . +# +# That's what the gated C-ABI binding smoke uses (DLLM_TEST_LIBRARY). The +# default target below does the proper clone-at-pin + cmake build so CI +# doesn't need a side-checkout. + +DLLM_VERSION?=b22fcebebfb225131113188599a9ae542b2935d7 +DLLM_REPO?=https://github.com/mudler/dllm.cpp + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) + +BUILD_TYPE?= +NATIVE?=false + +# libdllm.so is self-contained: dllm.cpp's CMakeLists statically absorbs ggml +# (BUILD_SHARED_LIBS=OFF + PIC) into the shared lib, so dlopen needs no +# libggml*.so alongside it, only system libs (libstdc++/libgomp/libc) the +# runtime image already provides. Tests/CLI are upstream-only concerns. +CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DDLLM_BUILD_TESTS=OFF + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +# Same arch set the sibling ggml backends (acestep/vibevoice/qwen3-tts) bake +# for their cublas images; override for a native build. +CUDA_ARCHITECTURES?=75-virtual;80-virtual;86-real;89-real + +# dllm.cpp gates CUDA behind DLLM_CUDA (set(GGML_CUDA ... CACHE FORCE)), so +# forward that instead of a bare -DGGML_CUDA=ON. +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DDLLM_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="$(CUDA_ARCHITECTURES)" +endif + +.PHONY: dllm-grpc package build clean purge test all + +all: dllm-grpc + +# Clone the upstream dllm.cpp source at the pinned commit (ggml comes in as +# a submodule). Directory acts as the target so make only re-clones when +# missing. After a DLLM_VERSION bump, run 'make purge && make' to refetch. +sources/dllm.cpp: + mkdir -p sources/dllm.cpp + cd sources/dllm.cpp && \ + git init -q && \ + git remote add origin $(DLLM_REPO) && \ + git fetch --depth 1 origin $(DLLM_VERSION) && \ + git checkout FETCH_HEAD && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Build the shared lib out-of-tree, then stage it next to the Go sources so +# purego.Dlopen("libdllm.so") and the packaging step both pick it up. +libdllm.so: sources/dllm.cpp + cmake -B sources/dllm.cpp/build -S sources/dllm.cpp $(CMAKE_ARGS) + cmake --build sources/dllm.cpp/build --config Release -j$(JOBS) + cp -fv sources/dllm.cpp/build/libdllm.so ./ + +dllm-grpc: libdllm.so main.go capi.go + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o dllm-grpc . + +package: dllm-grpc + bash package.sh + +build: package + +# Test target. The C-ABI binding smoke is gated on DLLM_TEST_LIBRARY + +# DLLM_TEST_TINY_MODEL; without them the gated specs auto-skip and only the +# pure-Go helper specs run. +test: + LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1 + +clean: purge + rm -rf libdllm.so* package dllm-grpc + +purge: + rm -rf sources/dllm.cpp diff --git a/backend/go/dllm/capi.go b/backend/go/dllm/capi.go new file mode 100644 index 000000000..d8c0ca11e --- /dev/null +++ b/backend/go/dllm/capi.go @@ -0,0 +1,265 @@ +package main + +// Typed Go wrappers over dllm.cpp's flat C-ABI (include/dllm_capi.h, ABI v1). +// +// Contract highlights the wrappers encode (see the header + src/capi.cpp): +// - tokenize_json/generate return malloc'd char* the CALLER owns: bound as +// uintptr, copied with goStringFromCPtr, released via dllm_capi_free_string. +// - last_error returns a BORROWED pointer (valid until the next call on the +// same ctx): bound as a plain string (purego copies), never freed, and only +// read AFTER the failing call has returned - reading it while a generate is +// in flight on the same ctx violates the per-ctx serialization contract. +// - All entry points except dllm_capi_cancel must be externally serialized +// per ctx (one ctx = one concurrent generate/tokenize). Cancel only flips +// an atomic and may be called from any goroutine mid-generate. +// - No C++ exception crosses the boundary; failures land in last_error. + +import ( + "encoding/json" + "errors" + "fmt" + "sync" + "sync/atomic" + "unsafe" + + "github.com/ebitengine/purego" + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +// dllmABIVersion is the DLLM_CAPI_ABI_VERSION this binding was written +// against; main.go refuses to start against a libdllm.so reporting another. +const dllmABIVersion = 1 + +// purego-bound entry points from libdllm.so. Names match dllm_capi.h +// exactly; loadCAPI (main.go) fills these in at boot. +var ( + cppAbiVersion func() int32 + cppLoad func(ggufPath, paramsJSON string) uintptr + cppFree func(ctx uintptr) + cppLastError func(ctx uintptr) string // borrowed pointer: purego copies, do NOT free + cppFreeString func(s uintptr) + // malloc'd char* returns, hence uintptr (see loadCAPI's doc comment). + cppTokenizeJSON func(ctx uintptr, text string) uintptr + cppGenerate func(ctx uintptr, prompt, optsJSON string) uintptr + // on_block/on_step are C function pointers produced by purego.NewCallback; + // userData carries the streamCallStates registry key. + cppGenerateStream func(ctx uintptr, prompt, optsJSON string, onBlock, onStep, userData uintptr) int32 + cppCancel func(ctx uintptr) +) + +// Dllm is the LocalAI gRPC backend over the dllm.cpp C-ABI. T1 ships only +// the binding scaffold; Load/PredictRich/PredictStreamRich (and the move to +// a dedicated dllm.go with the per-model worker goroutine) land in T4. +type Dllm struct { + base.Base +} + +// Load is not wired yet: the binding smoke drives the C functions directly. +func (d *Dllm) Load(opts *pb.ModelOptions) error { + return errors.New("dllm: model loading not implemented yet (backend wiring lands in T4)") +} + +// cAbiVersion returns the library's DLLM_CAPI_ABI_VERSION. +func cAbiVersion() int32 { + return cppAbiVersion() +} + +// cLoad opens the GGUF at path with the flat params JSON (e.g. +// {"n_gpu_layers":99}). Returns 0 on failure; per the header contract there +// is no ctx to carry the reason, the C side logs it to stderr (and +// cLastError(0) only yields the static NULL-ctx message). +func cLoad(path, paramsJSON string) uintptr { + return cppLoad(path, paramsJSON) +} + +// cFree releases a ctx; safe on 0 (delete nullptr). +func cFree(h uintptr) { + cppFree(h) +} + +// cLastError returns the ctx's last error message (or the static NULL-ctx +// message for h==0). The C pointer is borrowed and only valid until the next +// call on the same ctx; purego's string return copies it immediately, so the +// returned Go string is safe to keep. Must not be called while another call +// on the same ctx is in flight. +func cLastError(h uintptr) string { + return cppLastError(h) +} + +// lastErrorOr is cLastError with a fallback for the empty-message case, so +// wrapped errors never end in ": ". +func lastErrorOr(h uintptr, fallback string) string { + if msg := cLastError(h); msg != "" { + return msg + } + return fallback +} + +// cTokenizeJSON tokenizes text (the C side prepends bos per vocab.add_bos) +// and returns the token ids as a JSON array string, e.g. "[2,18]". +func cTokenizeJSON(h uintptr, text string) (string, error) { + ret := cppTokenizeJSON(h, text) + if ret == 0 { + return "", fmt.Errorf("dllm: tokenize failed: %s", lastErrorOr(h, "unknown error")) + } + out := goStringFromCPtr(ret) + cppFreeString(ret) + return out, nil +} + +// cGenerate runs a blocking generation and returns the detokenized text. +// optsJSON must be a FLAT JSON object of scalars (use buildOptsJSON); the C +// parser rejects nested objects/arrays. NULL return -> last_error (read only +// after the call returned, per the serialization contract); a cancelled call +// surfaces as the "cancelled" message. +func cGenerate(h uintptr, prompt, optsJSON string) (string, error) { + ret := cppGenerate(h, prompt, optsJSON) + if ret == 0 { + return "", fmt.Errorf("dllm: generate failed: %s", lastErrorOr(h, "unknown error")) + } + out := goStringFromCPtr(ret) + cppFreeString(ret) + return out, nil +} + +// streamCallState carries the Go callbacks for one in-flight +// cGenerateStream call; the registry key travels through C as user_data. +// The map shape mirrors the whisper backend's streamCallStates: only one +// entry per ctx is ever live (the C-ABI is serialized per ctx), but keying +// by call survives multiple models/processes sharing the package. +type streamCallState struct { + onBlock func(text string) + onStep func(step, total int, preview string) +} + +var ( + streamCallStates sync.Map // uint64 -> *streamCallState + streamCallSeq atomic.Uint64 + + // purego.NewCallback allocates a finite, never-released callback slot, so + // the two trampolines are created exactly once and reused across calls. + streamCbOnce sync.Once + blockCbPtr uintptr + stepCbPtr uintptr +) + +// onBlockTrampoline is the Go side of dllm_block_cb. It runs on the C +// calling thread, mid-generate: keep it tiny and non-blocking (callers that +// bridge to goroutines must hand off via buffered channels). The text +// pointer is only valid for the duration of the invocation, so it is copied +// to a Go string immediately. +func onBlockTrampoline(text uintptr, userData uintptr) { + v, ok := streamCallStates.Load(uint64(userData)) + if !ok { + return // call already torn down + } + state := v.(*streamCallState) + if state.onBlock != nil { + state.onBlock(goStringFromCPtr(text)) + } +} + +// onStepTrampoline is the Go side of dllm_step_cb; same threading and +// lifetime caveats as onBlockTrampoline. +func onStepTrampoline(step int32, totalSteps int32, canvasPreview uintptr, userData uintptr) { + v, ok := streamCallStates.Load(uint64(userData)) + if !ok { + return + } + state := v.(*streamCallState) + if state.onStep != nil { + state.onStep(int(step), int(totalSteps), goStringFromCPtr(canvasPreview)) + } +} + +// cGenerateStream runs a generation with per-committed-block (onBlock) and +// per-denoising-step (onStep) callbacks; either may be nil. The callbacks +// run on the C thread (see the trampoline docs). Returns an error carrying +// last_error on failure; cancellation surfaces as the "cancelled" message. +func cGenerateStream(h uintptr, prompt, optsJSON string, onBlock func(text string), onStep func(step, total int, preview string)) error { + streamCbOnce.Do(func() { + blockCbPtr = purego.NewCallback(onBlockTrampoline) + stepCbPtr = purego.NewCallback(onStepTrampoline) + }) + + id := streamCallSeq.Add(1) + streamCallStates.Store(id, &streamCallState{onBlock: onBlock, onStep: onStep}) + defer streamCallStates.Delete(id) + + // Pass NULL for absent callbacks so the C side skips the per-block / + // per-step detokenize work entirely. + var blockPtr, stepPtr uintptr + if onBlock != nil { + blockPtr = blockCbPtr + } + if onStep != nil { + stepPtr = stepCbPtr + } + + if rc := cppGenerateStream(h, prompt, optsJSON, blockPtr, stepPtr, uintptr(id)); rc != 0 { + return fmt.Errorf("dllm: generate_stream failed: %s", lastErrorOr(h, "unknown error")) + } + return nil +} + +// cCancel requests cancellation of the in-flight generate on h. This is the +// ONE entry point safe to call from any goroutine while a generate runs (it +// only flips an atomic). Note the cancel-reset race from the header: each +// generate resets the flag on entry, so a watchdog should re-issue cancel if +// the call has not returned. +func cCancel(h uintptr) { + cppCancel(h) +} + +// buildOptsJSON renders generation options as the flat JSON object the +// C-ABI expects (known keys: n_predict, blocks, seed, eb_*, kv_cache). The +// C-side scanner only understands scalar number/string values and rejects +// nested objects/arrays loudly; bools are rejected here too because the +// scanner has no concept of them. Fail loud rather than let an option be +// silently misread. +func buildOptsJSON(opts map[string]any) (string, error) { + if len(opts) == 0 { + return "{}", nil + } + for k, v := range opts { + switch v.(type) { + case string, + int, int8, int16, int32, int64, + uint, uint8, uint16, uint32, uint64, + float32, float64, + json.Number: + // scalar: fine + default: + return "", fmt.Errorf("dllm: opts key %q has non-scalar value %T (the C-ABI only accepts flat number/string scalars)", k, v) + } + } + b, err := json.Marshal(opts) + if err != nil { + return "", fmt.Errorf("dllm: marshal opts: %w", err) + } + return string(b), nil +} + +// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is +// the raw pointer returned by purego from the C-ABI (a malloc'd buffer the +// caller owns, or a callback argument only valid during the invocation); +// owning callers must free it via cppFreeString after the copy lands. +// +// The uintptr->unsafe.Pointer conversion below trips go vet's unsafeptr +// check, which can't distinguish a C-owned heap pointer from Go-managed +// memory. It is safe here: the pointer addresses C memory the Go GC neither +// tracks nor moves, and we dereference it immediately to copy the bytes out, +// the same pattern (and the same tolerated warning) as the parakeet-cpp and +// whisper backends. +func goStringFromCPtr(cptr uintptr) string { + if cptr == 0 { + return "" + } + p := unsafe.Pointer(cptr) //nolint:govet // C-owned buffer, not Go-GC memory (see doc above) + n := 0 + for *(*byte)(unsafe.Add(p, n)) != 0 { + n++ + } + return string(unsafe.Slice((*byte)(p), n)) +} diff --git a/backend/go/dllm/dllm_test.go b/backend/go/dllm/dllm_test.go new file mode 100644 index 000000000..a6f1e5697 --- /dev/null +++ b/backend/go/dllm/dllm_test.go @@ -0,0 +1,144 @@ +package main + +import ( + "os" + "sync" + "testing" + "unsafe" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestDllm(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "dllm Backend Suite") +} + +var ( + libLoadOnce sync.Once + libLoadErr error +) + +// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the +// C-ABI bridge without spinning up the gRPC server. The library path comes +// from DLLM_TEST_LIBRARY (gated specs Skip when it is unset). +func ensureLibLoaded() { + libLoadOnce.Do(func() { + libLoadErr = loadCAPI(os.Getenv("DLLM_TEST_LIBRARY")) + }) +} + +// C-ABI binding smoke: drives the real libdllm.so against the tiny GGUF +// fixture from dllm.cpp (tests/fixtures/tiny_with_vocab.gguf). Gated on: +// +// DLLM_TEST_LIBRARY absolute path to libdllm.so +// DLLM_TEST_TINY_MODEL absolute path to tiny_with_vocab.gguf +var _ = Describe("C-ABI binding", func() { + BeforeEach(func() { + if os.Getenv("DLLM_TEST_LIBRARY") == "" || os.Getenv("DLLM_TEST_TINY_MODEL") == "" { + Skip("set DLLM_TEST_LIBRARY and DLLM_TEST_TINY_MODEL to run the C-ABI binding smoke") + } + ensureLibLoaded() + Expect(libLoadErr).ToNot(HaveOccurred()) + }) + + It("binds the 9 symbols and round-trips the tiny model", func() { + Expect(cAbiVersion()).To(Equal(int32(1))) + + h := cLoad(os.Getenv("DLLM_TEST_TINY_MODEL"), "{}") + Expect(h).ToNot(BeZero(), "dllm_capi_load of the tiny fixture") + + // Tiny fixture vocab: "hello" tokenizes to ids [2,18] (bos prepended + // by the C side: vocab.add_bos). + toks, err := cTokenizeJSON(h, "hello") + Expect(err).ToNot(HaveOccurred()) + Expect(toks).To(Equal("[2,18]")) + + // Deterministic generation: an explicit non-negative seed seeds + // mt19937, so two identical calls must produce identical text. + out1, err := cGenerate(h, "hello", `{"n_predict":16,"seed":7}`) + Expect(err).ToNot(HaveOccurred()) + Expect(out1).ToNot(BeEmpty()) + // Cancel with no call in flight is dropped: each generate resets the + // cancel flag on entry (header contract), so this must not affect + // the next call. Also binds the 9th symbol; safe on NULL too. + cCancel(h) + cCancel(0) + + out2, err := cGenerate(h, "hello", `{"n_predict":16,"seed":7}`) + Expect(err).ToNot(HaveOccurred()) + Expect(out2).To(Equal(out1)) + + // Streaming variant: same opts, blocks arrive via the purego + // callback trampoline. The per-block detokenize can differ from the + // seamless full-text decode at block boundaries, so only assert that + // blocks arrived and were non-trivial, not byte equality with out1. + var blocks []string + var steps int + err = cGenerateStream(h, "hello", `{"n_predict":16,"seed":7}`, + func(text string) { blocks = append(blocks, text) }, + func(step, total int, preview string) { steps++ }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(blocks).ToNot(BeEmpty()) + Expect(steps).To(BeNumerically(">", 0)) + + // Load failure path: NULL ctx back, and last_error(NULL) returns the + // static NULL-ctx message (there is no ctx to carry the real reason). + bad := cLoad("/nonexistent/dllm-model.gguf", "{}") + Expect(bad).To(BeZero()) + Expect(cLastError(0)).ToNot(BeEmpty()) + + // Free is safe on a live handle and a NULL one (delete nullptr). + cFree(h) + cFree(0) + }) +}) + +// Ungated specs for the pure-Go helpers (no libdllm.so required). +var _ = Describe("buildOptsJSON", func() { + It("renders flat scalars as a JSON object", func() { + out, err := buildOptsJSON(map[string]any{ + "n_predict": 16, + "seed": int64(7), + "eb_t_min": 0.5, + "kv_cache": "auto", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(MatchJSON(`{"n_predict":16,"seed":7,"eb_t_min":0.5,"kv_cache":"auto"}`)) + }) + + It("renders an empty object for no options", func() { + out, err := buildOptsJSON(nil) + Expect(err).ToNot(HaveOccurred()) + Expect(out).To(Equal("{}")) + }) + + It("rejects nested objects (the C-side scanner only reads flat scalars)", func() { + _, err := buildOptsJSON(map[string]any{"sampler": map[string]any{"seed": 1}}) + Expect(err).To(HaveOccurred()) + }) + + It("rejects arrays", func() { + _, err := buildOptsJSON(map[string]any{"stop": []string{"a"}}) + Expect(err).To(HaveOccurred()) + }) + + It("rejects booleans (the C-side scanner only understands numbers and strings)", func() { + _, err := buildOptsJSON(map[string]any{"flag": true}) + Expect(err).To(HaveOccurred()) + }) +}) + +var _ = Describe("goStringFromCPtr", func() { + It("copies a NUL-terminated buffer", func() { + buf := []byte("dllm\x00") + s := goStringFromCPtr(uintptr(unsafe.Pointer(&buf[0]))) + Expect(s).To(Equal("dllm")) + }) + + It("returns the empty string for NULL", func() { + Expect(goStringFromCPtr(0)).To(Equal("")) + }) +}) diff --git a/backend/go/dllm/main.go b/backend/go/dllm/main.go new file mode 100644 index 000000000..41d4368f2 --- /dev/null +++ b/backend/go/dllm/main.go @@ -0,0 +1,85 @@ +package main + +// Started internally by LocalAI - one gRPC server per loaded model. +// +// Loads libdllm.so via purego and registers the 9-symbol flat C-ABI +// declared in dllm.cpp's include/dllm_capi.h (ABI v1). The library name can +// be overridden with DLLM_LIBRARY (mirrors the PARAKEET_LIBRARY / +// WHISPER_LIBRARY convention in the sibling backends); the default looks +// for the .so next to this binary (run.sh puts the package dir on +// LD_LIBRARY_PATH). +import ( + "flag" + "fmt" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +// loadCAPI dlopens libName and binds the 9 dllm_capi_* entry points 1:1 to +// dllm_capi.h, so an `nm libdllm.so | grep dllm_capi` is enough to spot +// drift. Shared with the test suite (ensureLibLoaded), which drives the +// bridge without the gRPC server. +// +// The C-ABI returns malloc'd char* buffers from tokenize_json/generate; we +// register those as uintptr so we get the raw pointer back and can call +// dllm_capi_free_string on it (purego's string return would copy and forget +// the original pointer, leaking it on every call). last_error returns a +// BORROWED pointer instead, so it is registered as a plain string: purego +// copies it and nothing must be freed. +func loadCAPI(libName string) error { + lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + return fmt.Errorf("dllm: dlopen %q: %w", libName, err) + } + + libFuncs := []LibFuncs{ + {&cppAbiVersion, "dllm_capi_abi_version"}, + {&cppLoad, "dllm_capi_load"}, + {&cppFree, "dllm_capi_free"}, + {&cppLastError, "dllm_capi_last_error"}, + {&cppFreeString, "dllm_capi_free_string"}, + {&cppTokenizeJSON, "dllm_capi_tokenize_json"}, + {&cppGenerate, "dllm_capi_generate"}, + {&cppGenerateStream, "dllm_capi_generate_stream"}, + {&cppCancel, "dllm_capi_cancel"}, + } + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name) + } + return nil +} + +func main() { + libName := os.Getenv("DLLM_LIBRARY") + if libName == "" { + libName = "libdllm.so" + } + + if err := loadCAPI(libName); err != nil { + panic(err) + } + + // Hard-fail on an ABI mismatch: the flat-pointer bindings above would + // otherwise misbehave silently against a future libdllm.so. + if v := cAbiVersion(); v != dllmABIVersion { + panic(fmt.Errorf("dllm: libdllm.so ABI=%d, this backend speaks ABI=%d", v, dllmABIVersion)) + } + fmt.Fprintf(os.Stderr, "[dllm] ABI=%d\n", cAbiVersion()) + + flag.Parse() + + if err := grpc.StartServer(*addr, &Dllm{}); err != nil { + panic(err) + } +} diff --git a/backend/go/dllm/package.sh b/backend/go/dllm/package.sh new file mode 100755 index 000000000..5b2b8f8b9 --- /dev/null +++ b/backend/go/dllm/package.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# T1 packaging stub: copy the binary, run.sh and libdllm.so into package/. +# The full ldd walk (libc, libstdc++, libgomp, GPU runtimes, arch +# detection) lands with the registration task, mirroring +# backend/go/whisper/package.sh. + +set -e + +CURDIR=$(dirname "$(realpath "$0")") + +mkdir -p "$CURDIR/package/lib" + +cp -avf "$CURDIR/dllm-grpc" "$CURDIR/package/" +cp -avf "$CURDIR/run.sh" "$CURDIR/package/" + +# libdllm.so + any soname symlinks, should upstream ever add them. +cp -avf "$CURDIR"/libdllm.so* "$CURDIR/package/lib/" 2>/dev/null || { + echo "ERROR: libdllm.so not found in $CURDIR, run 'make' first" >&2 + exit 1 +} + +echo "T1 package layout (full ldd walk lands with registration):" +ls -liah "$CURDIR/package/" "$CURDIR/package/lib/" diff --git a/backend/go/dllm/run.sh b/backend/go/dllm/run.sh new file mode 100755 index 000000000..ab30af4b0 --- /dev/null +++ b/backend/go/dllm/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath "$0")") + +export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}" + +# If a self-contained ld.so was packaged, route through it so the +# packaged libc / libstdc++ are used instead of the host's (matches the +# whisper / parakeet-cpp backends' runtime layout). +if [ -f "$CURDIR/lib/ld.so" ]; then + echo "Using lib/ld.so" + exec "$CURDIR/lib/ld.so" "$CURDIR/dllm-grpc" "$@" +fi + +exec "$CURDIR/dllm-grpc" "$@"