From 994063ba9a09d985cb213295362a21cb585d0712 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:26:31 +0200 Subject: [PATCH] feat(qwen3-tts-cpp): normalize request language for flexible matching (#10174) The qwen3-tts.cpp backend honored the request `language` field only via exact lowercase two-letter codes in the C++ language_to_id table, silently defaulting to English for anything else (en-US, EN, english, ...). Add normalizeLanguage() in the Go handler: lowercase + trim, strip the region/locale suffix (en-US, pt_BR, zh-Hans -> en/pt/zh), and resolve common English full names (english -> en). The canonical codes match the existing C++ table, so no C++ change is needed. Covered by a pure-Go Ginkgo spec. Also document the language field and accepted forms under the Qwen3-TTS docs. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] Co-authored-by: Ettore Di Giacinto --- backend/go/qwen3-tts-cpp/goqwen3ttscpp.go | 40 ++++++++++++++++- backend/go/qwen3-tts-cpp/language_test.go | 53 +++++++++++++++++++++++ docs/content/features/text-to-audio.md | 22 ++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 backend/go/qwen3-tts-cpp/language_test.go diff --git a/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go b/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go index 484e06053..d34c2b746 100644 --- a/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go +++ b/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/mudler/LocalAI/pkg/grpc/base" pb "github.com/mudler/LocalAI/pkg/grpc/proto" @@ -21,6 +22,43 @@ type Qwen3TtsCpp struct { threads int } +// languageNameAliases maps common full language names to the canonical +// two-letter code understood by the C++ language_to_id table. +var languageNameAliases = map[string]string{ + "english": "en", + "russian": "ru", + "chinese": "zh", + "japanese": "ja", + "korean": "ko", + "german": "de", + "french": "fr", + "spanish": "es", + "italian": "it", + "portuguese": "pt", +} + +// normalizeLanguage coerces a caller-supplied language into the canonical code +// the model expects. It lowercases, trims, strips any region/locale suffix +// (en-US, en_US, ja.JP -> en/ja), and resolves common full names (english -> en). +// An empty input stays empty so the C++ side applies its English default; an +// unrecognized value is returned normalized so C++ can log it and default. +func normalizeLanguage(lang string) string { + lang = strings.ToLower(strings.TrimSpace(lang)) + if lang == "" { + return "" + } + + // Strip region/locale suffix: keep the segment before the first separator. + if i := strings.IndexAny(lang, "-_."); i >= 0 { + lang = lang[:i] + } + + if code, ok := languageNameAliases[lang]; ok { + return code + } + return lang +} + func (q *Qwen3TtsCpp) Load(opts *pb.ModelOptions) error { // ModelFile is the model directory path (containing GGUF files) modelDir := opts.ModelFile @@ -54,7 +92,7 @@ func (q *Qwen3TtsCpp) TTS(req *pb.TTSRequest) error { dst := req.Dst language := "" if req.Language != nil { - language = *req.Language + language = normalizeLanguage(*req.Language) } // Synthesis parameters with sensible defaults diff --git a/backend/go/qwen3-tts-cpp/language_test.go b/backend/go/qwen3-tts-cpp/language_test.go new file mode 100644 index 000000000..9c3526669 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/language_test.go @@ -0,0 +1,53 @@ +package main + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestLanguageNormalization(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "qwen3-tts-cpp language normalization") +} + +var _ = Describe("normalizeLanguage", func() { + DescribeTable("maps caller input to the canonical model language code", + func(input, expected string) { + Expect(normalizeLanguage(input)).To(Equal(expected)) + }, + // Canonical codes pass through unchanged + Entry("canonical en", "en", "en"), + Entry("canonical zh", "zh", "zh"), + Entry("canonical pt", "pt", "pt"), + + // Case-insensitive + Entry("uppercase", "EN", "en"), + Entry("mixed case", "Ja", "ja"), + + // Surrounding whitespace + Entry("trims whitespace", " en ", "en"), + + // Region/locale stripping + Entry("BCP-47 region", "en-US", "en"), + Entry("underscore region", "en_US", "en"), + Entry("dotted locale", "ja.JP", "ja"), + Entry("region + case", "ZH-CN", "zh"), + + // Full-name aliases + Entry("english name", "english", "en"), + Entry("chinese name cased", "Chinese", "zh"), + Entry("japanese name", "japanese", "ja"), + Entry("russian name", "russian", "ru"), + Entry("portuguese name", "portuguese", "pt"), + + // Empty stays empty (C++ applies the English default) + Entry("empty", "", ""), + Entry("whitespace only", " ", ""), + + // Unknown values pass through normalized so C++ can log + default + Entry("unknown code", "klingon", "klingon"), + Entry("unknown with region", "xx-YY", "xx"), + ) +}) diff --git a/docs/content/features/text-to-audio.md b/docs/content/features/text-to-audio.md index 7399e01d1..219814e9d 100644 --- a/docs/content/features/text-to-audio.md +++ b/docs/content/features/text-to-audio.md @@ -296,6 +296,28 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ }' | aplay ``` +#### Language + +You can hint the synthesis language with the `language` request field: + +``` +curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model": "qwen-tts", + "input": "Bonjour le monde.", + "language": "fr" + }' | aplay +``` + +Supported languages: `en` (English), `zh` (Chinese), `ru` (Russian), `ja` (Japanese), `ko` (Korean), `de` (German), `fr` (French), `es` (Spanish), `it` (Italian), `pt` (Portuguese). + +The value is matched case-insensitively and accepts a few forms for convenience: + +- the two-letter code (`fr`, `FR`) +- a locale/region form, whose region is ignored (`fr-FR`, `pt_BR`, `zh-Hans` → `fr`/`pt`/`zh`) +- the English full name (`french`, `Portuguese`) + +If the field is omitted or the value isn't one of the supported languages, the backend defaults to English. + #### Custom Voice Mode Qwen3-TTS supports predefined speakers. You can specify a speaker using the `voice` parameter: