mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 11:49:33 -04:00
* feat(omnivoice-cpp): add C wrapper + CMake/Makefile build over OmniVoice ov_* ABI Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(omnivoice-cpp): add option/language parsing + WAV framing helpers with tests Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(omnivoice-cpp): wire purego binding with TTS + streaming TTSStream Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * build(omnivoice-cpp): wire backend into root Makefile Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci(omnivoice-cpp): add build matrix entries + dep-bump registration Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(omnivoice-cpp): register backend meta + image entries Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(omnivoice-cpp): expose as preference-only importable backend Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(gallery): add omnivoice-cpp TTS models (Q8_0 default + BF16 HQ) Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs(omnivoice-cpp): document the OmniVoice TTS backend Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test(omnivoice-cpp): add env-gated e2e for TTS + streaming Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(omnivoice-cpp): honor tts.audio_path/tts.voice config as default cloning reference The model config tts.audio_path (ModelOptions.AudioPath) and tts.voice now provide a default voice-cloning reference used when a request omits Voice, so a cloned voice can be pinned in the model YAML instead of passed per request. A per-request voice still overrides. Paths resolve relative to the model dir. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(omnivoice-cpp): add missing omnivoice-cpp-development backend meta Mirrors the whisper/vibevoice convention: a -development meta aggregating the master-tagged image variants (the production meta and per-variant prod+dev image entries already existed; only the development meta aggregator was missing). Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
75 lines
2.2 KiB
Go
75 lines
2.2 KiB
Go
package main
|
|
|
|
import (
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/ebitengine/purego"
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
func ttsReq(text, voice string, lang *string, dst string) *pb.TTSRequest {
|
|
return &pb.TTSRequest{Text: text, Voice: voice, Language: lang, Dst: dst}
|
|
}
|
|
|
|
var _ = Describe("OmniVoice e2e", Label("e2e"), func() {
|
|
var loaded bool
|
|
|
|
BeforeEach(func() {
|
|
modelPath := os.Getenv("OMNIVOICE_MODEL")
|
|
codecPath := os.Getenv("OMNIVOICE_CODEC")
|
|
if modelPath == "" || codecPath == "" {
|
|
Skip("OMNIVOICE_MODEL / OMNIVOICE_CODEC not set; skipping e2e")
|
|
}
|
|
if !loaded {
|
|
lib := os.Getenv("OMNIVOICE_LIBRARY")
|
|
if lib == "" {
|
|
lib = "./libgomnivoicecpp-fallback.so"
|
|
}
|
|
h, err := purego.Dlopen(lib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
purego.RegisterLibFunc(&CppLoad, h, "omni_load")
|
|
purego.RegisterLibFunc(&CppTTS, h, "omni_tts")
|
|
purego.RegisterLibFunc(&CppTTSStream, h, "omni_tts_stream")
|
|
purego.RegisterLibFunc(&CppPCMFree, h, "omni_pcm_free")
|
|
purego.RegisterLibFunc(&CppUnload, h, "omni_unload")
|
|
Expect(CppLoad(modelPath, codecPath, 0, 0)).To(Equal(0))
|
|
loaded = true
|
|
}
|
|
})
|
|
|
|
It("synthesizes a WAV file via TTS", func() {
|
|
b := &OmnivoiceCpp{opts: loadOptions{seed: 42, denoise: true}}
|
|
dst := GinkgoT().TempDir() + "/out.wav"
|
|
lang := "en"
|
|
err := b.TTS(ttsReq("Hello world.", "", &lang, dst))
|
|
Expect(err).ToNot(HaveOccurred())
|
|
fi, err := os.Stat(dst)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(fi.Size()).To(BeNumerically(">", int64(44)))
|
|
})
|
|
|
|
It("streams audio chunks via TTSStream", func() {
|
|
b := &OmnivoiceCpp{opts: loadOptions{seed: 42, denoise: true}}
|
|
results := make(chan []byte, 1024)
|
|
lang := "en"
|
|
done := make(chan error, 1)
|
|
go func() { done <- b.TTSStream(ttsReq("Hello there, streaming test.", "", &lang, ""), results) }()
|
|
|
|
var chunks int
|
|
var first []byte
|
|
for c := range results {
|
|
if chunks == 0 {
|
|
first = c
|
|
}
|
|
chunks++
|
|
}
|
|
Expect(<-done).ToNot(HaveOccurred())
|
|
Expect(chunks).To(BeNumerically(">=", 2))
|
|
Expect(string(first[0:4])).To(Equal("RIFF"))
|
|
Expect(strings.HasPrefix(string(first[8:12]), "WAVE")).To(BeTrue())
|
|
})
|
|
})
|