From 9fac275fd36cf60aaab3149f23cef9062559378e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 7 May 2026 16:08:17 +0000
Subject: [PATCH] refactor(backend): propagate request ctx into biometric,
 detection, rerank, diarization paths

Replaces remaining context.Background() sites in core/backend with the
caller's ctx. After this commit, every core/backend/*.go entry point
threads the request ctx end-to-end to the gRPC client.

Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/backend/detection.go                        | 3 ++-
 core/backend/diarization.go                      | 4 ++--
 core/backend/face_analyze.go                     | 3 ++-
 core/backend/face_embed.go                       | 3 ++-
 core/backend/face_verify.go                      | 3 ++-
 core/backend/rerank.go                           | 4 ++--
 core/backend/token_metrics.go                    | 3 ++-
 core/backend/voice_analyze.go                    | 3 ++-
 core/backend/voice_embed.go                      | 3 ++-
 core/backend/voice_verify.go                     | 3 ++-
 core/http/endpoints/jina/rerank.go               | 2 +-
 core/http/endpoints/localai/detection.go         | 2 +-
 core/http/endpoints/localai/face_analyze.go      | 2 +-
 core/http/endpoints/localai/face_embed.go        | 2 +-
 core/http/endpoints/localai/face_identify.go     | 2 +-
 core/http/endpoints/localai/face_register.go     | 2 +-
 core/http/endpoints/localai/face_verify.go       | 2 +-
 core/http/endpoints/localai/get_token_metrics.go | 2 +-
 core/http/endpoints/localai/voice_analyze.go     | 2 +-
 core/http/endpoints/localai/voice_embed.go       | 2 +-
 core/http/endpoints/localai/voice_identify.go    | 2 +-
 core/http/endpoints/localai/voice_register.go    | 2 +-
 core/http/endpoints/localai/voice_verify.go      | 2 +-
 core/http/endpoints/openai/diarization.go        | 2 +-
 24 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/core/backend/detection.go b/core/backend/detection.go
index 1a98c47a9..13a923e9f 100644
--- a/core/backend/detection.go
+++ b/core/backend/detection.go
@@ -12,6 +12,7 @@ import (
 )
 
 func Detection(
+	ctx context.Context,
 	sourceFile string,
 	prompt string,
 	points []float32,
@@ -38,7 +39,7 @@ func Detection(
 		startTime = time.Now()
 	}
 
-	res, err := detectionModel.Detect(context.Background(), &proto.DetectOptions{
+	res, err := detectionModel.Detect(ctx, &proto.DetectOptions{
 		Src:       sourceFile,
 		Prompt:    prompt,
 		Points:    points,
diff --git a/core/backend/diarization.go b/core/backend/diarization.go
index d311d4c45..ba973d773 100644
--- a/core/backend/diarization.go
+++ b/core/backend/diarization.go
@@ -63,7 +63,7 @@ func loadDiarizationModel(ml *model.ModelLoader, modelConfig config.ModelConfig,
 
 // ModelDiarization runs the Diarize RPC against the configured backend
 // and returns a normalized schema.DiarizationResult.
-func ModelDiarization(req DiarizationRequest, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.DiarizationResult, error) {
+func ModelDiarization(ctx context.Context, req DiarizationRequest, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.DiarizationResult, error) {
 	m, err := loadDiarizationModel(ml, modelConfig, appConfig)
 	if err != nil {
 		return nil, err
@@ -74,7 +74,7 @@ func ModelDiarization(req DiarizationRequest, ml *model.ModelLoader, modelConfig
 		threads = uint32(*modelConfig.Threads)
 	}
 
-	r, err := m.Diarize(context.Background(), req.toProto(threads))
+	r, err := m.Diarize(ctx, req.toProto(threads))
 	if err != nil {
 		return nil, err
 	}
diff --git a/core/backend/face_analyze.go b/core/backend/face_analyze.go
index 7293b09c7..24d70ac40 100644
--- a/core/backend/face_analyze.go
+++ b/core/backend/face_analyze.go
@@ -12,6 +12,7 @@ import (
 )
 
 func FaceAnalyze(
+	ctx context.Context,
 	img string,
 	actions []string,
 	antiSpoofing bool,
@@ -35,7 +36,7 @@ func FaceAnalyze(
 		startTime = time.Now()
 	}
 
-	res, err := faceModel.FaceAnalyze(context.Background(), &proto.FaceAnalyzeRequest{
+	res, err := faceModel.FaceAnalyze(ctx, &proto.FaceAnalyzeRequest{
 		Img:          img,
 		Actions:      actions,
 		AntiSpoofing: antiSpoofing,
diff --git a/core/backend/face_embed.go b/core/backend/face_embed.go
index 77bbb4a7c..dc9fecad0 100644
--- a/core/backend/face_embed.go
+++ b/core/backend/face_embed.go
@@ -14,6 +14,7 @@ import (
 // backend picks the highest-confidence face and returns its
 // L2-normalized embedding.
 func FaceEmbed(
+	ctx context.Context,
 	imgBase64 string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@@ -32,7 +33,7 @@ func FaceEmbed(
 	predictOpts := gRPCPredictOpts(modelConfig, loader.ModelPath)
 	predictOpts.Images = []string{imgBase64}
 
-	res, err := faceModel.Embeddings(context.Background(), predictOpts)
+	res, err := faceModel.Embeddings(ctx, predictOpts)
 	if err != nil {
 		return nil, err
 	}
diff --git a/core/backend/face_verify.go b/core/backend/face_verify.go
index 43b128e79..15b7dcdaf 100644
--- a/core/backend/face_verify.go
+++ b/core/backend/face_verify.go
@@ -12,6 +12,7 @@ import (
 )
 
 func FaceVerify(
+	ctx context.Context,
 	img1, img2 string,
 	threshold float32,
 	antiSpoofing bool,
@@ -35,7 +36,7 @@ func FaceVerify(
 		startTime = time.Now()
 	}
 
-	res, err := faceModel.FaceVerify(context.Background(), &proto.FaceVerifyRequest{
+	res, err := faceModel.FaceVerify(ctx, &proto.FaceVerifyRequest{
 		Img1:         img1,
 		Img2:         img2,
 		Threshold:    threshold,
diff --git a/core/backend/rerank.go b/core/backend/rerank.go
index 4b8f8b288..9672a1ca8 100644
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -11,7 +11,7 @@ import (
 	model "github.com/mudler/LocalAI/pkg/model"
 )
 
-func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, modelConfig config.ModelConfig) (*proto.RerankResult, error) {
+func Rerank(ctx context.Context, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, modelConfig config.ModelConfig) (*proto.RerankResult, error) {
 	opts := ModelOptions(modelConfig, appConfig)
 	rerankModel, err := loader.Load(opts...)
 	if err != nil {
@@ -29,7 +29,7 @@ func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *
 		startTime = time.Now()
 	}
 
-	res, err := rerankModel.Rerank(context.Background(), request)
+	res, err := rerankModel.Rerank(ctx, request)
 
 	if appConfig.EnableTracing {
 		errStr := ""
diff --git a/core/backend/token_metrics.go b/core/backend/token_metrics.go
index 4a9289eec..45d60a406 100644
--- a/core/backend/token_metrics.go
+++ b/core/backend/token_metrics.go
@@ -10,6 +10,7 @@ import (
 )
 
 func TokenMetrics(
+	ctx context.Context,
 	modelFile string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@@ -26,7 +27,7 @@ func TokenMetrics(
 		return nil, fmt.Errorf("could not loadmodel model")
 	}
 
-	res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
+	res, err := model.GetTokenMetrics(ctx, &proto.MetricsRequest{})
 
 	return res, err
 }
diff --git a/core/backend/voice_analyze.go b/core/backend/voice_analyze.go
index 47ffebe5e..022692921 100644
--- a/core/backend/voice_analyze.go
+++ b/core/backend/voice_analyze.go
@@ -12,6 +12,7 @@ import (
 )
 
 func VoiceAnalyze(
+	ctx context.Context,
 	audio string,
 	actions []string,
 	loader *model.ModelLoader,
@@ -34,7 +35,7 @@ func VoiceAnalyze(
 		startTime = time.Now()
 	}
 
-	res, err := voiceModel.VoiceAnalyze(context.Background(), &proto.VoiceAnalyzeRequest{
+	res, err := voiceModel.VoiceAnalyze(ctx, &proto.VoiceAnalyzeRequest{
 		Audio:   audio,
 		Actions: actions,
 	})
diff --git a/core/backend/voice_embed.go b/core/backend/voice_embed.go
index e72842591..6cdc9b6a2 100644
--- a/core/backend/voice_embed.go
+++ b/core/backend/voice_embed.go
@@ -16,6 +16,7 @@ import (
 // OpenAI-compatible and text-only), this call takes an audio path and
 // returns the backend's speaker-encoder output.
 func VoiceEmbed(
+	ctx context.Context,
 	audioPath string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@@ -37,7 +38,7 @@ func VoiceEmbed(
 		startTime = time.Now()
 	}
 
-	res, err := voiceModel.VoiceEmbed(context.Background(), &proto.VoiceEmbedRequest{
+	res, err := voiceModel.VoiceEmbed(ctx, &proto.VoiceEmbedRequest{
 		Audio: audioPath,
 	})
 
diff --git a/core/backend/voice_verify.go b/core/backend/voice_verify.go
index 97cc7b9b1..bd4c04808 100644
--- a/core/backend/voice_verify.go
+++ b/core/backend/voice_verify.go
@@ -12,6 +12,7 @@ import (
 )
 
 func VoiceVerify(
+	ctx context.Context,
 	audio1, audio2 string,
 	threshold float32,
 	antiSpoofing bool,
@@ -35,7 +36,7 @@ func VoiceVerify(
 		startTime = time.Now()
 	}
 
-	res, err := voiceModel.VoiceVerify(context.Background(), &proto.VoiceVerifyRequest{
+	res, err := voiceModel.VoiceVerify(ctx, &proto.VoiceVerifyRequest{
 		Audio1:       audio1,
 		Audio2:       audio2,
 		Threshold:    threshold,
diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go
index 6dabd35f6..348446b28 100644
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -52,7 +52,7 @@ func JINARerankEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 			Documents: input.Documents,
 		}
 
-		results, err := backend.Rerank(request, ml, appConfig, *cfg)
+		results, err := backend.Rerank(c.Request().Context(), request, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/localai/detection.go b/core/http/endpoints/localai/detection.go
index 0a9463e59..0f1c72282 100644
--- a/core/http/endpoints/localai/detection.go
+++ b/core/http/endpoints/localai/detection.go
@@ -38,7 +38,7 @@ func DetectionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
 			return err
 		}
 
-		res, err := backend.Detection(image, input.Prompt, input.Points, input.Boxes, input.Threshold, ml, appConfig, *cfg)
+		res, err := backend.Detection(c.Request().Context(), image, input.Prompt, input.Points, input.Boxes, input.Threshold, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/face_analyze.go b/core/http/endpoints/localai/face_analyze.go
index e4eda3ddd..441b7f0af 100644
--- a/core/http/endpoints/localai/face_analyze.go
+++ b/core/http/endpoints/localai/face_analyze.go
@@ -35,7 +35,7 @@ func FaceAnalyzeEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, ap
 		}
 
 		xlog.Debug("FaceAnalyze", "model", cfg.Name, "backend", cfg.Backend, "actions", input.Actions)
-		res, err := backend.FaceAnalyze(img, input.Actions, input.AntiSpoofing, ml, appConfig, *cfg)
+		res, err := backend.FaceAnalyze(c.Request().Context(), img, input.Actions, input.AntiSpoofing, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/face_embed.go b/core/http/endpoints/localai/face_embed.go
index 7a0f18e34..58524cafa 100644
--- a/core/http/endpoints/localai/face_embed.go
+++ b/core/http/endpoints/localai/face_embed.go
@@ -41,7 +41,7 @@ func FaceEmbedEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
 		}
 
 		xlog.Debug("FaceEmbed", "model", cfg.Name, "backend", cfg.Backend)
-		vec, err := backend.FaceEmbed(img, ml, appConfig, *cfg)
+		vec, err := backend.FaceEmbed(c.Request().Context(), img, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/face_identify.go b/core/http/endpoints/localai/face_identify.go
index 527174127..15e7e2c3c 100644
--- a/core/http/endpoints/localai/face_identify.go
+++ b/core/http/endpoints/localai/face_identify.go
@@ -45,7 +45,7 @@ func FaceIdentifyEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 		threshold := cmp.Or(input.Threshold, defaultIdentifyThreshold)
 
 		xlog.Debug("FaceIdentify", "model", cfg.Name, "topK", topK, "threshold", threshold)
-		probe, err := backend.FaceEmbed(img, ml, appConfig, *cfg)
+		probe, err := backend.FaceEmbed(c.Request().Context(), img, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/face_register.go b/core/http/endpoints/localai/face_register.go
index 308a194a7..fbeb29e0c 100644
--- a/core/http/endpoints/localai/face_register.go
+++ b/core/http/endpoints/localai/face_register.go
@@ -39,7 +39,7 @@ func FaceRegisterEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 		}
 
 		xlog.Debug("FaceRegister", "model", cfg.Name, "name", input.Name)
-		embedding, err := backend.FaceEmbed(img, ml, appConfig, *cfg)
+		embedding, err := backend.FaceEmbed(c.Request().Context(), img, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/face_verify.go b/core/http/endpoints/localai/face_verify.go
index 26398b7f8..ef608c57a 100644
--- a/core/http/endpoints/localai/face_verify.go
+++ b/core/http/endpoints/localai/face_verify.go
@@ -39,7 +39,7 @@ func FaceVerifyEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 		}
 
 		xlog.Debug("FaceVerify", "model", cfg.Name, "backend", cfg.Backend)
-		res, err := backend.FaceVerify(img1, img2, input.Threshold, input.AntiSpoofing, ml, appConfig, *cfg)
+		res, err := backend.FaceVerify(c.Request().Context(), img1, img2, input.Threshold, input.AntiSpoofing, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go
index 36b0301b7..215928ab1 100644
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -49,7 +49,7 @@ func TokenMetricsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 		}
 		xlog.Debug("Token Metrics for model", "model", modelFile)
 
-		response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg)
+		response, err := backend.TokenMetrics(c.Request().Context(), modelFile, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/localai/voice_analyze.go b/core/http/endpoints/localai/voice_analyze.go
index 4712cd5b0..ff4d3c45d 100644
--- a/core/http/endpoints/localai/voice_analyze.go
+++ b/core/http/endpoints/localai/voice_analyze.go
@@ -36,7 +36,7 @@ func VoiceAnalyzeEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 		defer cleanup()
 
 		xlog.Debug("VoiceAnalyze", "model", cfg.Name, "backend", cfg.Backend, "actions", input.Actions)
-		res, err := backend.VoiceAnalyze(audio, input.Actions, ml, appConfig, *cfg)
+		res, err := backend.VoiceAnalyze(c.Request().Context(), audio, input.Actions, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/voice_embed.go b/core/http/endpoints/localai/voice_embed.go
index 1f878efd6..d84a4dc2b 100644
--- a/core/http/endpoints/localai/voice_embed.go
+++ b/core/http/endpoints/localai/voice_embed.go
@@ -41,7 +41,7 @@ func VoiceEmbedEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 		defer cleanup()
 
 		xlog.Debug("VoiceEmbed", "model", cfg.Name, "backend", cfg.Backend)
-		res, err := backend.VoiceEmbed(audio, ml, appConfig, *cfg)
+		res, err := backend.VoiceEmbed(c.Request().Context(), audio, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/voice_identify.go b/core/http/endpoints/localai/voice_identify.go
index b048bf96f..eda5aec3d 100644
--- a/core/http/endpoints/localai/voice_identify.go
+++ b/core/http/endpoints/localai/voice_identify.go
@@ -47,7 +47,7 @@ func VoiceIdentifyEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		threshold := cmp.Or(input.Threshold, defaultVoiceIdentifyThreshold)
 
 		xlog.Debug("VoiceIdentify", "model", cfg.Name, "topK", topK, "threshold", threshold)
-		embed, err := backend.VoiceEmbed(audio, ml, appConfig, *cfg)
+		embed, err := backend.VoiceEmbed(c.Request().Context(), audio, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/voice_register.go b/core/http/endpoints/localai/voice_register.go
index 27605cd71..d8d97d619 100644
--- a/core/http/endpoints/localai/voice_register.go
+++ b/core/http/endpoints/localai/voice_register.go
@@ -40,7 +40,7 @@ func VoiceRegisterEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		defer cleanup()
 
 		xlog.Debug("VoiceRegister", "model", cfg.Name, "name", input.Name)
-		res, err := backend.VoiceEmbed(audio, ml, appConfig, *cfg)
+		res, err := backend.VoiceEmbed(c.Request().Context(), audio, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/localai/voice_verify.go b/core/http/endpoints/localai/voice_verify.go
index 9e81b8a15..d762ea51b 100644
--- a/core/http/endpoints/localai/voice_verify.go
+++ b/core/http/endpoints/localai/voice_verify.go
@@ -42,7 +42,7 @@ func VoiceVerifyEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, ap
 		defer cleanup2()
 
 		xlog.Debug("VoiceVerify", "model", cfg.Name, "backend", cfg.Backend)
-		res, err := backend.VoiceVerify(audio1, audio2, input.Threshold, input.AntiSpoofing, ml, appConfig, *cfg)
+		res, err := backend.VoiceVerify(c.Request().Context(), audio1, audio2, input.Threshold, input.AntiSpoofing, ml, appConfig, *cfg)
 		if err != nil {
 			return mapBackendError(err)
 		}
diff --git a/core/http/endpoints/openai/diarization.go b/core/http/endpoints/openai/diarization.go
index 2f927ddae..75e9715db 100644
--- a/core/http/endpoints/openai/diarization.go
+++ b/core/http/endpoints/openai/diarization.go
@@ -105,7 +105,7 @@ func DiarizationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, ap
 		_ = dstFile.Close()
 		req.Audio = dst
 
-		result, err := backend.ModelDiarization(req, ml, *modelConfig, appConfig)
+		result, err := backend.ModelDiarization(c.Request().Context(), req, ml, *modelConfig, appConfig)
 		if err != nil {
 			return err
 		}