Files
LocalAI/backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go
LocalAI [bot] 4bb592cf91 feat(qwen3-tts-cpp): migrate to ServeurpersoCom/qwentts.cpp (streaming, speakers, voice design) (#10316)
* feat(qwen3-tts-cpp): repoint upstream to ServeurpersoCom/qwentts.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): flatten qt_* ABI into qt3_* purego shim

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): build shim against upstream qwen-core static lib

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): add option/language/voice/sampling parsing

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): add 24kHz WAV encode/decode/stream-header helpers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): purego backend with streaming, speakers, voice design

Map TTSRequest onto qwentts.cpp: instructions->instruct, voice->named
speaker or clone-reference path, params map->ref_text + sampling. Add
TTSStream over the qt chunk callback.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* test(qwen3-tts-cpp): unit specs + build-gated TTS/TTSStream e2e

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* fix(qwen3-tts-cpp): close defensive PCM-free gap on zero-sample result

Register CppPCMFree before the n<=0 guard so a non-null buffer with zero
samples cannot leak (the C contract returns NULL on failure, so this is
defensive). Raised in code review.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(qwen3-tts-cpp): advertise TTSStream capability

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* chore(qwen3-tts-cpp): update backend index metadata for qwentts.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* feat(gallery): qwentts.cpp models - base/customvoice/voicedesign, Q8_0 & Q4_K_M

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* docs(qwen3-tts-cpp): release note for qwentts.cpp migration

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* test(qwen3-tts-cpp): cover audio_path voice-cloning fallback

Add resolveRequest unit specs (config audio_path used as the clone
reference when Voice is empty; per-request audio Voice overrides it; a
named-speaker Voice does not trigger cloning) plus a real-inference e2e
that clones from audio_path (confirmed ref_spk_emb=yes in the pipeline).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

* chore(qwen3-tts-cpp): drop the release-note doc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-13 23:09:59 +02:00

137 lines
4.3 KiB
Go

package main
import (
"bytes"
"encoding/binary"
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestQwen3TtsCpp(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "qwen3-tts-cpp suite")
}
var _ = Describe("normalizeLanguage", func() {
DescribeTable("maps caller language to qwentts language names",
func(in, want string) {
Expect(normalizeLanguage(in)).To(Equal(want))
},
Entry("empty stays empty", "", ""),
Entry("auto maps to empty", "auto", ""),
Entry("english full name", "English", "english"),
Entry("english code", "en", "english"),
Entry("locale suffix stripped", "en-US", "english"),
Entry("underscore locale", "zh_CN", "chinese"),
Entry("mandarin alias", "mandarin", "chinese"),
Entry("japanese already full", "japanese", "japanese"),
Entry("unknown passes through normalized", "xx", "xx"),
)
})
var _ = Describe("resolveVoice", func() {
It("treats a bare token as a named speaker", func() {
sp, ref := resolveVoice("serena")
Expect(sp).To(Equal("serena"))
Expect(ref).To(BeEmpty())
})
It("treats an audio path as a clone reference (case-insensitive ext)", func() {
sp, ref := resolveVoice("/x/ref.WAV")
Expect(sp).To(BeEmpty())
Expect(ref).To(Equal("/x/ref.WAV"))
})
It("recognizes mp3/flac/ogg/m4a", func() {
for _, p := range []string{"a.mp3", "b.flac", "c.ogg", "d.m4a"} {
sp, ref := resolveVoice(p)
Expect(sp).To(BeEmpty())
Expect(ref).To(Equal(p))
}
})
It("returns empty for empty input", func() {
sp, ref := resolveVoice(" ")
Expect(sp).To(BeEmpty())
Expect(ref).To(BeEmpty())
})
})
var _ = Describe("parseOptions", func() {
It("extracts codec, use_fa, clamp_fp16, seed", func() {
o := parseOptions([]string{
"tokenizer:tok.gguf", "use_fa:false", "clamp_fp16:true",
"seed:7", "unknown:ignored",
})
Expect(o.codecPath).To(Equal("tok.gguf"))
Expect(o.useFA).To(BeFalse())
Expect(o.clampFP16).To(BeTrue())
Expect(o.seed).To(Equal(int64(7)))
})
It("accepts codec: as an alias for tokenizer:", func() {
Expect(parseOptions([]string{"codec:c.gguf"}).codecPath).To(Equal("c.gguf"))
})
It("defaults use_fa true and seed -1", func() {
o := parseOptions(nil)
Expect(o.useFA).To(BeTrue())
Expect(o.seed).To(Equal(int64(-1)))
})
})
var _ = Describe("parseSampling", func() {
It("applies qt defaults when params are absent", func() {
s := parseSampling(nil, -1)
Expect(s.temperature).To(BeNumerically("~", 0.9, 1e-6))
Expect(s.topK).To(Equal(50))
Expect(s.topP).To(BeNumerically("~", 1.0, 1e-6))
Expect(s.repPen).To(BeNumerically("~", 1.05, 1e-6))
Expect(s.maxNew).To(Equal(2048))
Expect(s.seed).To(Equal(int64(-1)))
})
It("reads overrides and falls back to default seed", func() {
s := parseSampling(map[string]string{
"temperature": "0.5", "top_k": "10", "top_p": "0.8",
"repetition_penalty": "1.2", "max_new_tokens": "512",
}, 99)
Expect(s.temperature).To(BeNumerically("~", 0.5, 1e-6))
Expect(s.topK).To(Equal(10))
Expect(s.topP).To(BeNumerically("~", 0.8, 1e-6))
Expect(s.repPen).To(BeNumerically("~", 1.2, 1e-6))
Expect(s.maxNew).To(Equal(512))
Expect(s.seed).To(Equal(int64(99)))
})
It("reads an explicit seed override", func() {
Expect(parseSampling(map[string]string{"seed": "123"}, -1).seed).To(Equal(int64(123)))
})
})
var _ = Describe("wavHeader24k", func() {
It("emits a 44-byte streaming WAV header at 24 kHz mono 16-bit", func() {
h := wavHeader24k()
Expect(h).To(HaveLen(44))
Expect(string(h[0:4])).To(Equal("RIFF"))
Expect(string(h[8:12])).To(Equal("WAVE"))
Expect(string(h[12:16])).To(Equal("fmt "))
Expect(string(h[36:40])).To(Equal("data"))
var sampleRate uint32
Expect(binary.Read(bytes.NewReader(h[24:28]), binary.LittleEndian, &sampleRate)).To(Succeed())
Expect(sampleRate).To(Equal(uint32(24000)))
})
})
var _ = Describe("floatToPCM16LE", func() {
It("clamps and converts float PCM to little-endian int16 bytes", func() {
b := floatToPCM16LE([]float32{0, 1.0, -1.0, 2.0, -2.0})
Expect(b).To(HaveLen(10))
read := func(off int) int16 {
var v int16
_ = binary.Read(bytes.NewReader(b[off:off+2]), binary.LittleEndian, &v)
return v
}
Expect(read(0)).To(Equal(int16(0)))
Expect(read(2)).To(Equal(int16(32767)))
Expect(read(4)).To(Equal(int16(-32767)))
Expect(read(6)).To(Equal(int16(32767))) // clamped from 2.0
Expect(read(8)).To(Equal(int16(-32767))) // clamped from -2.0
})
})