mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 03:37:47 -04:00
* feat(qwen3-tts-cpp): repoint upstream to ServeurpersoCom/qwentts.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): flatten qt_* ABI into qt3_* purego shim Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): build shim against upstream qwen-core static lib Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): add option/language/voice/sampling parsing Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): add 24kHz WAV encode/decode/stream-header helpers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): purego backend with streaming, speakers, voice design Map TTSRequest onto qwentts.cpp: instructions->instruct, voice->named speaker or clone-reference path, params map->ref_text + sampling. Add TTSStream over the qt chunk callback. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * test(qwen3-tts-cpp): unit specs + build-gated TTS/TTSStream e2e Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * fix(qwen3-tts-cpp): close defensive PCM-free gap on zero-sample result Register CppPCMFree before the n<=0 guard so a non-null buffer with zero samples cannot leak (the C contract returns NULL on failure, so this is defensive). Raised in code review. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): advertise TTSStream capability Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(qwen3-tts-cpp): update backend index metadata for qwentts.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(gallery): qwentts.cpp models - base/customvoice/voicedesign, Q8_0 & Q4_K_M Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * docs(qwen3-tts-cpp): release note for qwentts.cpp migration Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * test(qwen3-tts-cpp): cover audio_path voice-cloning fallback Add resolveRequest unit specs (config audio_path used as the clone reference when Voice is empty; per-request audio Voice overrides it; a named-speaker Voice does not trigger cloning) plus a real-inference e2e that clones from audio_path (confirmed ref_spk_emb=yes in the pipeline). Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(qwen3-tts-cpp): drop the release-note doc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
96 lines
3.0 KiB
Go
96 lines
3.0 KiB
Go
package main
|
|
|
|
import (
|
|
"math"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/ebitengine/purego"
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
func ttsReq(text, voice string, lang *string, dst string) *pb.TTSRequest {
|
|
return &pb.TTSRequest{Text: text, Voice: voice, Language: lang, Dst: dst}
|
|
}
|
|
|
|
var _ = Describe("qwen3-tts-cpp e2e", Label("e2e"), func() {
|
|
var loaded bool
|
|
|
|
BeforeEach(func() {
|
|
modelPath := os.Getenv("QWEN3TTS_MODEL")
|
|
codecPath := os.Getenv("QWEN3TTS_CODEC")
|
|
if modelPath == "" || codecPath == "" {
|
|
Skip("QWEN3TTS_MODEL / QWEN3TTS_CODEC not set; skipping e2e")
|
|
}
|
|
if !loaded {
|
|
lib := os.Getenv("QWEN3TTS_LIBRARY")
|
|
if lib == "" {
|
|
lib = "./libgoqwen3ttscpp-fallback.so"
|
|
}
|
|
h, err := purego.Dlopen(lib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
purego.RegisterLibFunc(&CppLoad, h, "qt3_load")
|
|
purego.RegisterLibFunc(&CppTTS, h, "qt3_tts")
|
|
purego.RegisterLibFunc(&CppTTSStream, h, "qt3_tts_stream")
|
|
purego.RegisterLibFunc(&CppPCMFree, h, "qt3_pcm_free")
|
|
purego.RegisterLibFunc(&CppUnload, h, "qt3_unload")
|
|
Expect(CppLoad(modelPath, codecPath, 1, 0)).To(Equal(0))
|
|
loaded = true
|
|
}
|
|
})
|
|
|
|
It("synthesizes a WAV file via TTS", func() {
|
|
b := &Qwen3TtsCpp{opts: loadOptions{seed: 42, useFA: true}}
|
|
dst := GinkgoT().TempDir() + "/out.wav"
|
|
lang := "english"
|
|
err := b.TTS(ttsReq("Hello world.", "", &lang, dst))
|
|
Expect(err).ToNot(HaveOccurred())
|
|
fi, err := os.Stat(dst)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(fi.Size()).To(BeNumerically(">", int64(44)))
|
|
})
|
|
|
|
It("streams audio chunks via TTSStream", func() {
|
|
b := &Qwen3TtsCpp{opts: loadOptions{seed: 42, useFA: true}}
|
|
results := make(chan []byte, 1024)
|
|
lang := "english"
|
|
done := make(chan error, 1)
|
|
go func() { done <- b.TTSStream(ttsReq("Hello there, streaming test.", "", &lang, ""), results) }()
|
|
|
|
var chunks int
|
|
var first []byte
|
|
for c := range results {
|
|
if chunks == 0 {
|
|
first = c
|
|
}
|
|
chunks++
|
|
}
|
|
Expect(<-done).ToNot(HaveOccurred())
|
|
Expect(chunks).To(BeNumerically(">=", 2))
|
|
Expect(string(first[0:4])).To(Equal("RIFF"))
|
|
Expect(strings.HasPrefix(string(first[8:12]), "WAVE")).To(BeTrue())
|
|
})
|
|
|
|
It("clones a voice from the config audio_path reference", func() {
|
|
// 1s of 24kHz mono audio as a clone reference; the base model carries
|
|
// a speaker encoder, so audio_path drives x-vector voice cloning.
|
|
ref := GinkgoT().TempDir() + "/ref.wav"
|
|
samples := make([]float32, qwen3ttsSampleRate)
|
|
for i := range samples {
|
|
samples[i] = float32(0.05 * math.Sin(float64(i)*0.06))
|
|
}
|
|
Expect(writeWAV24k(ref, samples)).To(Succeed())
|
|
|
|
b := &Qwen3TtsCpp{opts: loadOptions{seed: 42, useFA: true}, audioPath: ref}
|
|
dst := GinkgoT().TempDir() + "/clone.wav"
|
|
lang := "english"
|
|
// Empty Voice -> the config audio_path is used as the clone reference.
|
|
Expect(b.TTS(ttsReq("Cloned voice test.", "", &lang, dst))).To(Succeed())
|
|
fi, err := os.Stat(dst)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(fi.Size()).To(BeNumerically(">", int64(44)))
|
|
})
|
|
})
|