fix(config): add face/speaker recognition constants and register insightface + speaker-recognition (#10110)

FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION already existed as ModelConfigUsecase bitmask flags, and GuessUsecases already gate-checks both backends by name — but BackendCapabilities had no entries for either, so the UI could not classify them. Also missing were the Method* constants for the five proto-defined RPCs these backends implement (FaceVerify, FaceAnalyze, VoiceVerify, VoiceEmbed, VoiceAnalyze) and the corresponding Usecase* strings and UsecaseInfoMap entries needed to wire them into the rest of the capability system. Changes: - Add MethodFaceVerify, MethodFaceAnalyze, MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze GRPCMethod constants - Add UsecaseFaceRecognition ("face_recognition") and UsecaseSpeakerRecognition ("speaker_recognition") Usecase constants - Add UsecaseInfoMap entries for both new usecases, referencing the existing FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION flags - Register insightface: Embedding + Detect + FaceVerify + FaceAnalyze - Register speaker-recognition: VoiceVerify + VoiceEmbed + VoiceAnalyze Follows up on #10107 which left these two out because they needed new constants first. Assisted-by: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com>
2026-07-30 09:57:57 -04:00 · 2026-06-04 22:48:01 +03:00
parent 92726f7631
commit ef80a0e825
1 changed files with 35 additions and 3 deletions
--- a/core/config/backend_capabilities.go
+++ b/core/config/backend_capabilities.go
@@ -22,9 +22,11 @@ const (
 	UsecaseRerank          = "rerank"
 	UsecaseDetection       = "detection"
 	UsecaseVAD             = "vad"
-	UsecaseAudioTransform  = "audio_transform"
-	UsecaseDiarization     = "diarization"
-	UsecaseRealtimeAudio   = "realtime_audio"
+	UsecaseAudioTransform      = "audio_transform"
+	UsecaseDiarization         = "diarization"
+	UsecaseRealtimeAudio       = "realtime_audio"
+	UsecaseFaceRecognition     = "face_recognition"
+	UsecaseSpeakerRecognition  = "speaker_recognition"
 )

 // GRPCMethod identifies a Backend service RPC from backend.proto.
@@ -47,6 +49,11 @@ const (
 	MethodAudioTransform     GRPCMethod = "AudioTransform"
 	MethodDiarize            GRPCMethod = "Diarize"
 	MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream"
+	MethodFaceVerify         GRPCMethod = "FaceVerify"
+	MethodFaceAnalyze        GRPCMethod = "FaceAnalyze"
+	MethodVoiceVerify        GRPCMethod = "VoiceVerify"
+	MethodVoiceEmbed         GRPCMethod = "VoiceEmbed"
+	MethodVoiceAnalyze       GRPCMethod = "VoiceAnalyze"
 )

 // UsecaseInfo describes a single known_usecase value and how it maps
@@ -154,6 +161,16 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
 		GRPCMethod:  MethodAudioToAudioStream,
 		Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.",
 	},
+	UsecaseFaceRecognition: {
+		Flag:        FLAG_FACE_RECOGNITION,
+		GRPCMethod:  MethodFaceVerify,
+		Description: "Face recognition — verify identity, analyze attributes (age/gender/emotion) via FaceVerify and FaceAnalyze RPCs.",
+	},
+	UsecaseSpeakerRecognition: {
+		Flag:        FLAG_SPEAKER_RECOGNITION,
+		GRPCMethod:  MethodVoiceVerify,
+		Description: "Speaker recognition — verify identity, embed and analyze voice via VoiceVerify, VoiceEmbed and VoiceAnalyze RPCs.",
+	},
 }

 // BackendCapability describes which gRPC methods and usecases a backend supports.
@@ -471,6 +488,21 @@ var BackendCapabilities = map[string]BackendCapability{
 		DefaultUsecases:  []string{UsecaseDetection},
 		Description:      "RF-DETR C++ object detection",
 	},
+
+	// --- Face and speaker recognition backends ---
+	"insightface": {
+		GRPCMethods:      []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
+		PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
+		DefaultUsecases:  []string{UsecaseFaceRecognition},
+		AcceptsImages:    true,
+		Description:      "InsightFace — face detection, embedding, verification and attribute analysis",
+	},
+	"speaker-recognition": {
+		GRPCMethods:      []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
+		PossibleUsecases: []string{UsecaseSpeakerRecognition},
+		DefaultUsecases:  []string{UsecaseSpeakerRecognition},
+		Description:      "Speaker recognition — voice identity verification and analysis",
+	},
 	"silero-vad": {
 		GRPCMethods:      []GRPCMethod{MethodVAD},
 		PossibleUsecases: []string{UsecaseVAD},