mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-06 15:56:06 -04:00
The OpenAI-compatible TTS endpoint accepts an `instructions` field, but it was silently dropped at the HTTP->gRPC boundary: neither schema.TTSRequest nor the gRPC TTSRequest proto carried it, so backends could only read such a value from static YAML options (identical for every request). This blocked per-line emotion/style and, for Qwen3-TTS VoiceDesign, limited a model config to a single designed voice. Plumb a generic per-request instruction string end to end, plus an optional backend-specific params map: - proto: add `optional string instructions` and `map<string,string> params` to TTSRequest. - schema: add Instructions (maps OpenAI `instructions`) and Params (LocalAI extension) to schema.TTSRequest. - core: thread both through ModelTTS/ModelTTSStream via a newTTSRequest helper that attaches instructions only when non-empty (so backends can fall back to YAML when unset); forward them from the /v1/audio/speech handler. - qwen-tts: prefer the per-request instruction over the YAML `instruct` option (used by both mode detection and generation) and merge per-request params. - chatterbox: merge per-request params (coerced to float/int/bool) over YAML options into generate() kwargs. Fully backward compatible: empty instructions fall back to the YAML option and backends that don't support style/voice instructions ignore the field. Closes #10164 Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
43 lines
1.7 KiB
Go
43 lines
1.7 KiB
Go
package backend
|
|
|
|
// Specs for the TTSRequest assembly that carries the per-request
|
|
// instructions/params from the OpenAI `instructions` field (and the LocalAI
|
|
// `params` extension) through to the gRPC boundary. Before this plumbing the
|
|
// instruction value was dropped before reaching the backend; these specs pin
|
|
// that it now survives, and that the empty case stays backward compatible.
|
|
|
|
import (
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("newTTSRequest", func() {
|
|
It("attaches the instructions when a per-request value is set", func() {
|
|
req := newTTSRequest("hi", "/m", "alloy", "/out.wav", "en", "cheerful narrator", nil)
|
|
Expect(req.Instructions).ToNot(BeNil())
|
|
Expect(req.GetInstructions()).To(Equal("cheerful narrator"))
|
|
Expect(req.GetText()).To(Equal("hi"))
|
|
Expect(req.GetVoice()).To(Equal("alloy"))
|
|
Expect(req.GetDst()).To(Equal("/out.wav"))
|
|
Expect(req.GetLanguage()).To(Equal("en"))
|
|
})
|
|
|
|
It("leaves instructions unset when empty so backends fall back to YAML", func() {
|
|
req := newTTSRequest("hi", "/m", "", "/out.wav", "", "", nil)
|
|
Expect(req.Instructions).To(BeNil())
|
|
Expect(req.GetInstructions()).To(Equal(""))
|
|
})
|
|
|
|
It("forwards per-request params through to the backend", func() {
|
|
params := map[string]string{"exaggeration": "0.7", "cfg_weight": "0.3"}
|
|
req := newTTSRequest("hi", "/m", "", "/out.wav", "", "", params)
|
|
Expect(req.GetParams()).To(HaveKeyWithValue("exaggeration", "0.7"))
|
|
Expect(req.GetParams()).To(HaveKeyWithValue("cfg_weight", "0.3"))
|
|
})
|
|
|
|
It("leaves params nil when none are supplied", func() {
|
|
req := newTTSRequest("hi", "/m", "", "/out.wav", "", "", nil)
|
|
Expect(req.GetParams()).To(BeNil())
|
|
})
|
|
})
|