From 047a8c57a7d08f5dca8a81927bc5b9cb52c41de6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 7 May 2026 16:00:42 +0000 Subject: [PATCH] refactor(audio): propagate request ctx into TTS, sound-gen, audio-transform Same ctx-plumbing pattern applied to the rest of the audio path. CLI callers use context.Background() since there is no request scope; HTTP callers use c.Request().Context(). Assisted-by: Claude:claude-haiku-4-5 Signed-off-by: Ettore Di Giacinto --- core/backend/audio_transform.go | 3 ++- core/backend/soundgeneration.go | 3 ++- core/backend/tts.go | 6 ++++-- core/cli/soundgeneration.go | 2 +- core/cli/tts.go | 2 +- core/http/endpoints/elevenlabs/soundgeneration.go | 1 + core/http/endpoints/elevenlabs/tts.go | 2 +- core/http/endpoints/localai/audio_transform.go | 2 +- core/http/endpoints/localai/tts.go | 4 ++-- core/http/endpoints/openai/realtime_model.go | 2 +- 10 files changed, 16 insertions(+), 11 deletions(-) diff --git a/core/backend/audio_transform.go b/core/backend/audio_transform.go index ea44ab6fd..3dbc8c833 100644 --- a/core/backend/audio_transform.go +++ b/core/backend/audio_transform.go @@ -40,6 +40,7 @@ type AudioTransformOutputs struct { // required; `referencePath` is optional (empty => backend zero-fills the // reference channel). func ModelAudioTransform( + ctx context.Context, audioPath, referencePath string, opts AudioTransformOptions, loader *model.ModelLoader, @@ -81,7 +82,7 @@ func ModelAudioTransform( startTime = time.Now() } - res, err := transformModel.AudioTransform(context.Background(), &proto.AudioTransformRequest{ + res, err := transformModel.AudioTransform(ctx, &proto.AudioTransformRequest{ AudioPath: audioPath, ReferencePath: referencePath, Dst: dst, diff --git a/core/backend/soundgeneration.go b/core/backend/soundgeneration.go index f7f4d2f82..dccc4df74 100644 --- a/core/backend/soundgeneration.go +++ b/core/backend/soundgeneration.go @@ -15,6 +15,7 @@ import ( ) func SoundGeneration( + ctx context.Context, text string, duration *float32, temperature *float32, @@ -101,7 +102,7 @@ func SoundGeneration( startTime = time.Now() } - res, err := soundGenModel.SoundGeneration(context.Background(), req) + res, err := soundGenModel.SoundGeneration(ctx, req) if appConfig.EnableTracing { errStr := "" diff --git a/core/backend/tts.go b/core/backend/tts.go index 2f3d31193..9af9d0d44 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -21,6 +21,7 @@ import ( ) func ModelTTS( + ctx context.Context, text, voice, language string, @@ -70,7 +71,7 @@ func ModelTTS( startTime = time.Now() } - res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{ + res, err := ttsModel.TTS(ctx, &proto.TTSRequest{ Text: text, Model: modelPath, Voice: voice, @@ -121,6 +122,7 @@ func ModelTTS( } func ModelTTSStream( + ctx context.Context, text, voice, language string, @@ -172,7 +174,7 @@ func ModelTTSStream( var totalPCMBytes int snippetCapped := false - err = ttsModel.TTSStream(context.Background(), &proto.TTSRequest{ + err = ttsModel.TTSStream(ctx, &proto.TTSRequest{ Text: text, Model: modelPath, Voice: voice, diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go index 4798de628..3eb6cfa4b 100644 --- a/core/cli/soundgeneration.go +++ b/core/cli/soundgeneration.go @@ -97,7 +97,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error { inputFile = &t.InputFile } - filePath, _, err := backend.SoundGeneration(text, + filePath, _, err := backend.SoundGeneration(context.Background(), text, parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample, inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), nil, "", "", nil, "", "", "", nil, diff --git a/core/cli/tts.go b/core/cli/tts.go index 72d4ee24b..0f7b8bc6c 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -62,7 +62,7 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error { options.Backend = t.Backend options.Model = t.Model - filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options) + filePath, _, err := backend.ModelTTS(context.Background(), text, t.Voice, t.Language, ml, opts, options) if err != nil { return err } diff --git a/core/http/endpoints/elevenlabs/soundgeneration.go b/core/http/endpoints/elevenlabs/soundgeneration.go index 7034ea042..eb9152e43 100644 --- a/core/http/endpoints/elevenlabs/soundgeneration.go +++ b/core/http/endpoints/elevenlabs/soundgeneration.go @@ -44,6 +44,7 @@ func SoundGenerationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader bpm = &b } filePath, _, err := backend.SoundGeneration( + c.Request().Context(), input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, input.Think, input.Caption, input.Lyrics, bpm, input.Keyscale, diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 3fc8c8f07..110ae292a 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -37,7 +37,7 @@ func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig xlog.Debug("elevenlabs TTS request received", "modelName", input.ModelID) - filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg) + filePath, _, err := backend.ModelTTS(c.Request().Context(), input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/http/endpoints/localai/audio_transform.go b/core/http/endpoints/localai/audio_transform.go index b8ce8530d..a11c6595a 100644 --- a/core/http/endpoints/localai/audio_transform.go +++ b/core/http/endpoints/localai/audio_transform.go @@ -109,7 +109,7 @@ func AudioTransformEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, } } - out, _, err := backend.ModelAudioTransform(audioPath, referencePath, backend.AudioTransformOptions{ + out, _, err := backend.ModelAudioTransform(c.Request().Context(), audioPath, referencePath, backend.AudioTransformOptions{ Params: params, }, ml, appConfig, *cfg) if err != nil { diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 40e488191..fe9199c24 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -59,7 +59,7 @@ func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig c.Response().Header().Set("Connection", "keep-alive") // Stream audio chunks as they're generated - err := backend.ModelTTSStream(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg, func(audioChunk []byte) error { + err := backend.ModelTTSStream(c.Request().Context(), input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg, func(audioChunk []byte) error { _, writeErr := c.Response().Write(audioChunk) if writeErr != nil { return writeErr @@ -75,7 +75,7 @@ func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig } // Non-streaming TTS (existing behavior) - filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg) + filePath, _, err := backend.ModelTTS(c.Request().Context(), input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/http/endpoints/openai/realtime_model.go b/core/http/endpoints/openai/realtime_model.go index 25d62ec27..bfeb70739 100644 --- a/core/http/endpoints/openai/realtime_model.go +++ b/core/http/endpoints/openai/realtime_model.go @@ -241,7 +241,7 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im } func (m *wrappedModel) TTS(ctx context.Context, text, voice, language string) (string, *proto.Result, error) { - return backend.ModelTTS(text, voice, language, m.modelLoader, m.appConfig, *m.TTSConfig) + return backend.ModelTTS(ctx, text, voice, language, m.modelLoader, m.appConfig, *m.TTSConfig) } func (m *wrappedModel) PredictConfig() *config.ModelConfig {