mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 03:37:47 -04:00
* feat(qwen3-tts-cpp): repoint upstream to ServeurpersoCom/qwentts.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): flatten qt_* ABI into qt3_* purego shim Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): build shim against upstream qwen-core static lib Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): add option/language/voice/sampling parsing Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): add 24kHz WAV encode/decode/stream-header helpers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): purego backend with streaming, speakers, voice design Map TTSRequest onto qwentts.cpp: instructions->instruct, voice->named speaker or clone-reference path, params map->ref_text + sampling. Add TTSStream over the qt chunk callback. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * test(qwen3-tts-cpp): unit specs + build-gated TTS/TTSStream e2e Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * fix(qwen3-tts-cpp): close defensive PCM-free gap on zero-sample result Register CppPCMFree before the n<=0 guard so a non-null buffer with zero samples cannot leak (the C contract returns NULL on failure, so this is defensive). Raised in code review. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(qwen3-tts-cpp): advertise TTSStream capability Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(qwen3-tts-cpp): update backend index metadata for qwentts.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(gallery): qwentts.cpp models - base/customvoice/voicedesign, Q8_0 & Q4_K_M Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * docs(qwen3-tts-cpp): release note for qwentts.cpp migration Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * test(qwen3-tts-cpp): cover audio_path voice-cloning fallback Add resolveRequest unit specs (config audio_path used as the clone reference when Voice is empty; per-request audio Voice overrides it; a named-speaker Voice does not trigger cloning) plus a real-inference e2e that clones from audio_path (confirmed ref_spk_emb=yes in the pipeline). Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * chore(qwen3-tts-cpp): drop the release-note doc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
137 lines
4.3 KiB
Go
137 lines
4.3 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"testing"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
func TestQwen3TtsCpp(t *testing.T) {
|
|
RegisterFailHandler(Fail)
|
|
RunSpecs(t, "qwen3-tts-cpp suite")
|
|
}
|
|
|
|
var _ = Describe("normalizeLanguage", func() {
|
|
DescribeTable("maps caller language to qwentts language names",
|
|
func(in, want string) {
|
|
Expect(normalizeLanguage(in)).To(Equal(want))
|
|
},
|
|
Entry("empty stays empty", "", ""),
|
|
Entry("auto maps to empty", "auto", ""),
|
|
Entry("english full name", "English", "english"),
|
|
Entry("english code", "en", "english"),
|
|
Entry("locale suffix stripped", "en-US", "english"),
|
|
Entry("underscore locale", "zh_CN", "chinese"),
|
|
Entry("mandarin alias", "mandarin", "chinese"),
|
|
Entry("japanese already full", "japanese", "japanese"),
|
|
Entry("unknown passes through normalized", "xx", "xx"),
|
|
)
|
|
})
|
|
|
|
var _ = Describe("resolveVoice", func() {
|
|
It("treats a bare token as a named speaker", func() {
|
|
sp, ref := resolveVoice("serena")
|
|
Expect(sp).To(Equal("serena"))
|
|
Expect(ref).To(BeEmpty())
|
|
})
|
|
It("treats an audio path as a clone reference (case-insensitive ext)", func() {
|
|
sp, ref := resolveVoice("/x/ref.WAV")
|
|
Expect(sp).To(BeEmpty())
|
|
Expect(ref).To(Equal("/x/ref.WAV"))
|
|
})
|
|
It("recognizes mp3/flac/ogg/m4a", func() {
|
|
for _, p := range []string{"a.mp3", "b.flac", "c.ogg", "d.m4a"} {
|
|
sp, ref := resolveVoice(p)
|
|
Expect(sp).To(BeEmpty())
|
|
Expect(ref).To(Equal(p))
|
|
}
|
|
})
|
|
It("returns empty for empty input", func() {
|
|
sp, ref := resolveVoice(" ")
|
|
Expect(sp).To(BeEmpty())
|
|
Expect(ref).To(BeEmpty())
|
|
})
|
|
})
|
|
|
|
var _ = Describe("parseOptions", func() {
|
|
It("extracts codec, use_fa, clamp_fp16, seed", func() {
|
|
o := parseOptions([]string{
|
|
"tokenizer:tok.gguf", "use_fa:false", "clamp_fp16:true",
|
|
"seed:7", "unknown:ignored",
|
|
})
|
|
Expect(o.codecPath).To(Equal("tok.gguf"))
|
|
Expect(o.useFA).To(BeFalse())
|
|
Expect(o.clampFP16).To(BeTrue())
|
|
Expect(o.seed).To(Equal(int64(7)))
|
|
})
|
|
It("accepts codec: as an alias for tokenizer:", func() {
|
|
Expect(parseOptions([]string{"codec:c.gguf"}).codecPath).To(Equal("c.gguf"))
|
|
})
|
|
It("defaults use_fa true and seed -1", func() {
|
|
o := parseOptions(nil)
|
|
Expect(o.useFA).To(BeTrue())
|
|
Expect(o.seed).To(Equal(int64(-1)))
|
|
})
|
|
})
|
|
|
|
var _ = Describe("parseSampling", func() {
|
|
It("applies qt defaults when params are absent", func() {
|
|
s := parseSampling(nil, -1)
|
|
Expect(s.temperature).To(BeNumerically("~", 0.9, 1e-6))
|
|
Expect(s.topK).To(Equal(50))
|
|
Expect(s.topP).To(BeNumerically("~", 1.0, 1e-6))
|
|
Expect(s.repPen).To(BeNumerically("~", 1.05, 1e-6))
|
|
Expect(s.maxNew).To(Equal(2048))
|
|
Expect(s.seed).To(Equal(int64(-1)))
|
|
})
|
|
It("reads overrides and falls back to default seed", func() {
|
|
s := parseSampling(map[string]string{
|
|
"temperature": "0.5", "top_k": "10", "top_p": "0.8",
|
|
"repetition_penalty": "1.2", "max_new_tokens": "512",
|
|
}, 99)
|
|
Expect(s.temperature).To(BeNumerically("~", 0.5, 1e-6))
|
|
Expect(s.topK).To(Equal(10))
|
|
Expect(s.topP).To(BeNumerically("~", 0.8, 1e-6))
|
|
Expect(s.repPen).To(BeNumerically("~", 1.2, 1e-6))
|
|
Expect(s.maxNew).To(Equal(512))
|
|
Expect(s.seed).To(Equal(int64(99)))
|
|
})
|
|
It("reads an explicit seed override", func() {
|
|
Expect(parseSampling(map[string]string{"seed": "123"}, -1).seed).To(Equal(int64(123)))
|
|
})
|
|
})
|
|
|
|
var _ = Describe("wavHeader24k", func() {
|
|
It("emits a 44-byte streaming WAV header at 24 kHz mono 16-bit", func() {
|
|
h := wavHeader24k()
|
|
Expect(h).To(HaveLen(44))
|
|
Expect(string(h[0:4])).To(Equal("RIFF"))
|
|
Expect(string(h[8:12])).To(Equal("WAVE"))
|
|
Expect(string(h[12:16])).To(Equal("fmt "))
|
|
Expect(string(h[36:40])).To(Equal("data"))
|
|
var sampleRate uint32
|
|
Expect(binary.Read(bytes.NewReader(h[24:28]), binary.LittleEndian, &sampleRate)).To(Succeed())
|
|
Expect(sampleRate).To(Equal(uint32(24000)))
|
|
})
|
|
})
|
|
|
|
var _ = Describe("floatToPCM16LE", func() {
|
|
It("clamps and converts float PCM to little-endian int16 bytes", func() {
|
|
b := floatToPCM16LE([]float32{0, 1.0, -1.0, 2.0, -2.0})
|
|
Expect(b).To(HaveLen(10))
|
|
read := func(off int) int16 {
|
|
var v int16
|
|
_ = binary.Read(bytes.NewReader(b[off:off+2]), binary.LittleEndian, &v)
|
|
return v
|
|
}
|
|
Expect(read(0)).To(Equal(int16(0)))
|
|
Expect(read(2)).To(Equal(int16(32767)))
|
|
Expect(read(4)).To(Equal(int16(-32767)))
|
|
Expect(read(6)).To(Equal(int16(32767))) // clamped from 2.0
|
|
Expect(read(8)).To(Equal(int16(-32767))) // clamped from -2.0
|
|
})
|
|
})
|