mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-05 07:16:10 -04:00
* feat(parakeet-cpp): dynamic-batching scheduler (queue + dispatcher) Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(parakeet-cpp): dynamic batching for AudioTranscription via batched JSON C-API Drop SingleThread; route unary transcription through the in-process batcher which coalesces concurrent requests into one batched engine call. Streaming stays mutually exclusive via engineMu. Adds batch_max_size / batch_max_wait_ms options (size=1 disables; recommended on CPU). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): tear down dispatcher in Free; log batch config; preallocate; clarify stream lock Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): Ginkgo batcher tests; optional batch C-API binding with per-request fallback The batched JSON C-API symbol exists only in newer libparakeet.so (ABI >= 2); probe it with Dlsym and register optionally so the backend still loads against an older library, falling back to per-request transcription. Rewrites the batcher unit tests as Ginkgo/Gomega specs (forbidigo bans t.Fatal in tests). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(parakeet-cpp): debug-log coalesced batch size in runBatch Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): default batch_max_size to 1 (batching opt-in) Dynamic batching now defaults off (batch_max_size:1, one request at a time). Raise batch_max_size to opt in: it is a large throughput win on GPU under concurrent load, but on CPU and low-concurrency setups it only adds latency, so off is the safer default. The startup log now states whether batching is on or off, and the audio-to-text docs are updated to match. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(parakeet-cpp): bump parakeet.cpp to 8a7c482 (batched decode + B=1 fast-path) parakeet.cpp PR #1 merged the batched encoder/decode and the B=1 encoder fast-path to master. Point PARAKEET_VERSION at that commit so the backend builds the batched C-API (parakeet_capi_transcribe_pcm_batch_json) that the dynamic batcher calls; the prior pin (30a3075) predated it, so only the per-request fallback path was exercised. Verified the shared lib builds with the backend's CMake flags and exports the batch symbol. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
76 lines
2.4 KiB
Go
76 lines
2.4 KiB
Go
package main
|
|
|
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
|
//
|
|
// Loads libparakeet.so via purego and registers the flat C-API entry
|
|
// points declared in parakeet_capi.h. The library name can be overridden
|
|
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
|
|
// convention in the sibling backends); the default looks for the .so next
|
|
// to this binary.
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
|
|
"github.com/ebitengine/purego"
|
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
)
|
|
|
|
var (
|
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
)
|
|
|
|
type LibFuncs struct {
|
|
FuncPtr any
|
|
Name string
|
|
}
|
|
|
|
func main() {
|
|
libName := os.Getenv("PARAKEET_LIBRARY")
|
|
if libName == "" {
|
|
libName = "libparakeet.so"
|
|
}
|
|
|
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
|
if err != nil {
|
|
panic(fmt.Errorf("parakeet-cpp: dlopen %q: %w", libName, err))
|
|
}
|
|
|
|
// Bound 1:1 to parakeet_capi.h. The C-API returns malloc'd char*
|
|
// buffers from transcribe_*; we register those as uintptr so we get
|
|
// the raw pointer back and can call parakeet_capi_free_string on it
|
|
// (purego's string return would copy and forget the original pointer,
|
|
// leaking it on every call).
|
|
libFuncs := []LibFuncs{
|
|
{&CppAbiVersion, "parakeet_capi_abi_version"},
|
|
{&CppLoad, "parakeet_capi_load"},
|
|
{&CppFree, "parakeet_capi_free"},
|
|
{&CppTranscribePath, "parakeet_capi_transcribe_path"},
|
|
{&CppTranscribePathJSON, "parakeet_capi_transcribe_path_json"},
|
|
{&CppStreamBegin, "parakeet_capi_stream_begin"},
|
|
{&CppStreamFeed, "parakeet_capi_stream_feed"},
|
|
{&CppStreamFinalize, "parakeet_capi_stream_finalize"},
|
|
{&CppStreamFree, "parakeet_capi_stream_free"},
|
|
{&CppFreeString, "parakeet_capi_free_string"},
|
|
{&CppLastError, "parakeet_capi_last_error"},
|
|
}
|
|
for _, lf := range libFuncs {
|
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
|
}
|
|
|
|
// The batched-JSON entry point exists only in newer libparakeet.so (ABI >= 2).
|
|
// Probe with Dlsym and register only if present, so the backend still loads
|
|
// against an older library (it falls back to per-request transcription).
|
|
if sym, err := purego.Dlsym(lib, "parakeet_capi_transcribe_pcm_batch_json"); err == nil && sym != 0 {
|
|
purego.RegisterLibFunc(&CppTranscribePcmBatchJSON, lib, "parakeet_capi_transcribe_pcm_batch_json")
|
|
}
|
|
|
|
fmt.Fprintf(os.Stderr, "[parakeet-cpp] ABI=%d\n", CppAbiVersion())
|
|
|
|
flag.Parse()
|
|
|
|
if err := grpc.StartServer(*addr, &ParakeetCpp{}); err != nil {
|
|
panic(err)
|
|
}
|
|
}
|