mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-05 15:26:14 -04:00
* feat(parakeet-cpp): dynamic-batching scheduler (queue + dispatcher) Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(parakeet-cpp): dynamic batching for AudioTranscription via batched JSON C-API Drop SingleThread; route unary transcription through the in-process batcher which coalesces concurrent requests into one batched engine call. Streaming stays mutually exclusive via engineMu. Adds batch_max_size / batch_max_wait_ms options (size=1 disables; recommended on CPU). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): tear down dispatcher in Free; log batch config; preallocate; clarify stream lock Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): Ginkgo batcher tests; optional batch C-API binding with per-request fallback The batched JSON C-API symbol exists only in newer libparakeet.so (ABI >= 2); probe it with Dlsym and register optionally so the backend still loads against an older library, falling back to per-request transcription. Rewrites the batcher unit tests as Ginkgo/Gomega specs (forbidigo bans t.Fatal in tests). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(parakeet-cpp): debug-log coalesced batch size in runBatch Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(parakeet-cpp): default batch_max_size to 1 (batching opt-in) Dynamic batching now defaults off (batch_max_size:1, one request at a time). Raise batch_max_size to opt in: it is a large throughput win on GPU under concurrent load, but on CPU and low-concurrency setups it only adds latency, so off is the safer default. The startup log now states whether batching is on or off, and the audio-to-text docs are updated to match. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(parakeet-cpp): bump parakeet.cpp to 8a7c482 (batched decode + B=1 fast-path) parakeet.cpp PR #1 merged the batched encoder/decode and the B=1 encoder fast-path to master. Point PARAKEET_VERSION at that commit so the backend builds the batched C-API (parakeet_capi_transcribe_pcm_batch_json) that the dynamic batcher calls; the prior pin (30a3075) predated it, so only the per-request fallback path was exercised. Verified the shared lib builds with the backend's CMake flags and exports the batch symbol. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
109 lines
2.6 KiB
Go
109 lines
2.6 KiB
Go
package main
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("batcher", func() {
|
|
echoReply := func(reqs []*batchRequest) {
|
|
for _, r := range reqs {
|
|
r.reply <- batchReply{json: r.tag}
|
|
}
|
|
}
|
|
|
|
It("coalesces concurrent submits into batches", func() {
|
|
var mu sync.Mutex
|
|
var sizes []int
|
|
run := func(reqs []*batchRequest) {
|
|
mu.Lock()
|
|
sizes = append(sizes, len(reqs))
|
|
mu.Unlock()
|
|
echoReply(reqs)
|
|
}
|
|
b := newBatcher(4, 50*time.Millisecond, run)
|
|
stop := make(chan struct{})
|
|
go b.run(stop)
|
|
defer close(stop)
|
|
|
|
const N = 4
|
|
var wg sync.WaitGroup
|
|
got := make([]string, N)
|
|
for i := 0; i < N; i++ {
|
|
wg.Add(1)
|
|
go func(i int) {
|
|
defer wg.Done()
|
|
rep := make(chan batchReply, 1)
|
|
b.submit <- &batchRequest{tag: string(rune('a' + i)), reply: rep}
|
|
got[i] = (<-rep).json
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
total, maxBatch := 0, 0
|
|
for _, s := range sizes {
|
|
total += s
|
|
if s > maxBatch {
|
|
maxBatch = s
|
|
}
|
|
}
|
|
Expect(total).To(Equal(N))
|
|
Expect(maxBatch).To(BeNumerically(">=", 2), "expected at least one batch to coalesce >1 request")
|
|
})
|
|
|
|
It("dispatches when max size is reached", func() {
|
|
dispatched := make(chan int, 8)
|
|
run := func(reqs []*batchRequest) {
|
|
dispatched <- len(reqs)
|
|
echoReply(reqs)
|
|
}
|
|
b := newBatcher(2, time.Hour, run) // huge window: only size can trigger
|
|
stop := make(chan struct{})
|
|
go b.run(stop)
|
|
defer close(stop)
|
|
for i := 0; i < 2; i++ {
|
|
rep := make(chan batchReply, 1)
|
|
b.submit <- &batchRequest{tag: "x", reply: rep}
|
|
go func(rep chan batchReply) { <-rep }(rep)
|
|
}
|
|
Eventually(dispatched, "2s").Should(Receive(Equal(2)))
|
|
})
|
|
|
|
It("dispatches when the wait window elapses", func() {
|
|
dispatched := make(chan int, 8)
|
|
run := func(reqs []*batchRequest) {
|
|
dispatched <- len(reqs)
|
|
echoReply(reqs)
|
|
}
|
|
b := newBatcher(8, 20*time.Millisecond, run) // size unreachable; window fires
|
|
stop := make(chan struct{})
|
|
go b.run(stop)
|
|
defer close(stop)
|
|
rep := make(chan batchReply, 1)
|
|
b.submit <- &batchRequest{tag: "x", reply: rep}
|
|
go func() { <-rep }()
|
|
Eventually(dispatched, "2s").Should(Receive(Equal(1)))
|
|
})
|
|
|
|
It("bypasses batching when max size is 1", func() {
|
|
dispatched := make(chan int, 8)
|
|
run := func(reqs []*batchRequest) {
|
|
dispatched <- len(reqs)
|
|
echoReply(reqs)
|
|
}
|
|
b := newBatcher(1, time.Hour, run) // size 1 => immediate dispatch
|
|
stop := make(chan struct{})
|
|
go b.run(stop)
|
|
defer close(stop)
|
|
rep := make(chan batchReply, 1)
|
|
b.submit <- &batchRequest{tag: "x", reply: rep}
|
|
go func() { <-rep }()
|
|
Eventually(dispatched, "2s").Should(Receive(Equal(1)))
|
|
})
|
|
})
|