fix(realtime): keep transcription model on a language-only session.update (#10295)

A transcription session.update that carries only a language (no model) —
e.g. a client forcing the STT input language — has an empty
Transcription.Model. updateSession unconditionally copied that into
session.ModelConfig.Pipeline.Transcription, blanking the pipeline's
configured transcription backend. The next utterance then transcribed
against an empty model and the backend RPC failed with "unimplemented"
(surfaced to the client as transcription_failed), so transcription
silently stopped whenever a language was selected.

Only adopt the incoming transcription model when it is non-empty, and
preserve the existing model otherwise (mirroring updateTransSession).

Signed-off-by: mudler <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
LocalAI [bot]
2026-06-13 01:01:36 +02:00
committed by GitHub
parent 51f4f67c47
commit edeacf22c4

View File

@@ -990,8 +990,18 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
}
if rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil {
session.InputAudioTranscription = rt.Audio.Input.Transcription
session.ModelConfig.Pipeline.Transcription = rt.Audio.Input.Transcription.Model
trUpd := rt.Audio.Input.Transcription
// A language-only update (e.g. a client forcing the STT language) carries
// an empty Model. Preserve the pipeline's configured transcription backend
// instead of blanking it — otherwise the next utterance transcribes against
// an empty model and the backend RPC fails with "unimplemented".
if trUpd.Model == "" && session.InputAudioTranscription != nil {
trUpd.Model = session.InputAudioTranscription.Model
}
session.InputAudioTranscription = trUpd
if trUpd.Model != "" {
session.ModelConfig.Pipeline.Transcription = trUpd.Model
}
}
if rt.Model != "" || (rt.Audio != nil && rt.Audio.Output != nil && rt.Audio.Output.Voice != "") || (rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil) {