Files
LocalAI/backend/go/parakeet-cpp/main.go
Ettore Di Giacinto 071872bb53 feat(parakeet-cpp): real segment timestamps (NeMo-faithful)
Offline: replace the single synthetic whole-clip segment with multiple
segments grouped exactly like NeMo's get_segment_offsets - a new segment
after sentence-ending punctuation ('. ? !'), each carrying start/end and
its time-window token ids. The optional model option segment_gap_threshold
(NeMo's unit: encoder FRAMES, default 0=off) adds NeMo's silence-gap split,
converted to seconds via the JSON frame_sec the engine now reports.
Per-segment words are still gated behind timestamp_granularities=["word"];
a zero-word document falls back to a single text segment.

Streaming: when libparakeet.so exposes the ABI v4 JSON entry points
(probed), drive parakeet_capi_stream_feed_json / _finalize_json and
accumulate the streamed per-word timestamps into per-utterance segments
(EOU stays the boundary), so streaming FinalResult segments now carry
start/end. Falls back to the text-only feed against an older library.

Pure-Go specs cover splitWordsIntoSegments (punctuation + gap rules, NeMo
elif order, fallback), transcriptResultFromDoc (multi-segment, token
windows, word-granularity gate), and the streaming segmenter.

Assisted-by: Claude:claude-opus-4-8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-07 08:47:12 +00:00

95 lines
3.6 KiB
Go

package main
// Started internally by LocalAI - one gRPC server per loaded model.
//
// Loads libparakeet.so via purego and registers the flat C-API entry
// points declared in parakeet_capi.h. The library name can be overridden
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
// convention in the sibling backends); the default looks for the .so next
// to this binary.
import (
"flag"
"fmt"
"os"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
type LibFuncs struct {
FuncPtr any
Name string
}
func main() {
libName := os.Getenv("PARAKEET_LIBRARY")
if libName == "" {
libName = "libparakeet.so"
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(fmt.Errorf("parakeet-cpp: dlopen %q: %w", libName, err))
}
// Bound 1:1 to parakeet_capi.h. The C-API returns malloc'd char*
// buffers from transcribe_*; we register those as uintptr so we get
// the raw pointer back and can call parakeet_capi_free_string on it
// (purego's string return would copy and forget the original pointer,
// leaking it on every call).
libFuncs := []LibFuncs{
{&CppAbiVersion, "parakeet_capi_abi_version"},
{&CppLoad, "parakeet_capi_load"},
{&CppFree, "parakeet_capi_free"},
{&CppTranscribePath, "parakeet_capi_transcribe_path"},
{&CppTranscribePathJSON, "parakeet_capi_transcribe_path_json"},
{&CppStreamBegin, "parakeet_capi_stream_begin"},
{&CppStreamFeed, "parakeet_capi_stream_feed"},
{&CppStreamFinalize, "parakeet_capi_stream_finalize"},
{&CppStreamFree, "parakeet_capi_stream_free"},
{&CppFreeString, "parakeet_capi_free_string"},
{&CppLastError, "parakeet_capi_last_error"},
}
for _, lf := range libFuncs {
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
}
// The batched-JSON entry point exists only in newer libparakeet.so (ABI >= 2).
// Probe with Dlsym and register only if present, so the backend still loads
// against an older library (it falls back to per-request transcription).
if sym, err := purego.Dlsym(lib, "parakeet_capi_transcribe_pcm_batch_json"); err == nil && sym != 0 {
purego.RegisterLibFunc(&CppTranscribePcmBatchJSON, lib, "parakeet_capi_transcribe_pcm_batch_json")
}
// Per-request language variants (multilingual nemotron). Same probe pattern:
// present only in libparakeet.so built with multilingual support, so the
// backend still loads against an older library and falls back to the
// non-lang batched + streaming entry points (model default / "auto").
if sym, err := purego.Dlsym(lib, "parakeet_capi_transcribe_pcm_batch_json_lang"); err == nil && sym != 0 {
purego.RegisterLibFunc(&CppTranscribePcmBatchJSONLang, lib, "parakeet_capi_transcribe_pcm_batch_json_lang")
}
if sym, err := purego.Dlsym(lib, "parakeet_capi_stream_begin_lang"); err == nil && sym != 0 {
purego.RegisterLibFunc(&CppStreamBeginLang, lib, "parakeet_capi_stream_begin_lang")
}
// Streaming JSON entry points (ABI v4): surface per-word timestamps on the
// streaming path. Same probe pattern; absent in older libparakeet.so, where
// the backend falls back to the text-only streaming feed.
if sym, err := purego.Dlsym(lib, "parakeet_capi_stream_feed_json"); err == nil && sym != 0 {
purego.RegisterLibFunc(&CppStreamFeedJSON, lib, "parakeet_capi_stream_feed_json")
purego.RegisterLibFunc(&CppStreamFinalizeJSON, lib, "parakeet_capi_stream_finalize_json")
}
fmt.Fprintf(os.Stderr, "[parakeet-cpp] ABI=%d\n", CppAbiVersion())
flag.Parse()
if err := grpc.StartServer(*addr, &ParakeetCpp{}); err != nil {
panic(err)
}
}