mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-04 23:06:22 -04:00
fix(config): add face/speaker recognition constants and register insightface + speaker-recognition (#10110)
FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION already existed as
ModelConfigUsecase bitmask flags, and GuessUsecases already gate-checks
both backends by name — but BackendCapabilities had no entries for
either, so the UI could not classify them.
Also missing were the Method* constants for the five proto-defined RPCs
these backends implement (FaceVerify, FaceAnalyze, VoiceVerify,
VoiceEmbed, VoiceAnalyze) and the corresponding Usecase* strings
and UsecaseInfoMap entries needed to wire them into the rest of the
capability system.
Changes:
- Add MethodFaceVerify, MethodFaceAnalyze, MethodVoiceVerify,
MethodVoiceEmbed, MethodVoiceAnalyze GRPCMethod constants
- Add UsecaseFaceRecognition ("face_recognition") and
UsecaseSpeakerRecognition ("speaker_recognition") Usecase constants
- Add UsecaseInfoMap entries for both new usecases, referencing the
existing FLAG_FACE_RECOGNITION and FLAG_SPEAKER_RECOGNITION flags
- Register insightface: Embedding + Detect + FaceVerify + FaceAnalyze
- Register speaker-recognition: VoiceVerify + VoiceEmbed + VoiceAnalyze
Follows up on #10107 which left these two out because they needed new
constants first.
Assisted-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com>
This commit is contained in:
@@ -22,9 +22,11 @@ const (
|
||||
UsecaseRerank = "rerank"
|
||||
UsecaseDetection = "detection"
|
||||
UsecaseVAD = "vad"
|
||||
UsecaseAudioTransform = "audio_transform"
|
||||
UsecaseDiarization = "diarization"
|
||||
UsecaseRealtimeAudio = "realtime_audio"
|
||||
UsecaseAudioTransform = "audio_transform"
|
||||
UsecaseDiarization = "diarization"
|
||||
UsecaseRealtimeAudio = "realtime_audio"
|
||||
UsecaseFaceRecognition = "face_recognition"
|
||||
UsecaseSpeakerRecognition = "speaker_recognition"
|
||||
)
|
||||
|
||||
// GRPCMethod identifies a Backend service RPC from backend.proto.
|
||||
@@ -47,6 +49,11 @@ const (
|
||||
MethodAudioTransform GRPCMethod = "AudioTransform"
|
||||
MethodDiarize GRPCMethod = "Diarize"
|
||||
MethodAudioToAudioStream GRPCMethod = "AudioToAudioStream"
|
||||
MethodFaceVerify GRPCMethod = "FaceVerify"
|
||||
MethodFaceAnalyze GRPCMethod = "FaceAnalyze"
|
||||
MethodVoiceVerify GRPCMethod = "VoiceVerify"
|
||||
MethodVoiceEmbed GRPCMethod = "VoiceEmbed"
|
||||
MethodVoiceAnalyze GRPCMethod = "VoiceAnalyze"
|
||||
)
|
||||
|
||||
// UsecaseInfo describes a single known_usecase value and how it maps
|
||||
@@ -154,6 +161,16 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
|
||||
GRPCMethod: MethodAudioToAudioStream,
|
||||
Description: "Self-contained any-to-any audio model for the Realtime API — accepts microphone audio and emits speech + transcript (+ optional function calls) from a single backend via the AudioToAudioStream RPC.",
|
||||
},
|
||||
UsecaseFaceRecognition: {
|
||||
Flag: FLAG_FACE_RECOGNITION,
|
||||
GRPCMethod: MethodFaceVerify,
|
||||
Description: "Face recognition — verify identity, analyze attributes (age/gender/emotion) via FaceVerify and FaceAnalyze RPCs.",
|
||||
},
|
||||
UsecaseSpeakerRecognition: {
|
||||
Flag: FLAG_SPEAKER_RECOGNITION,
|
||||
GRPCMethod: MethodVoiceVerify,
|
||||
Description: "Speaker recognition — verify identity, embed and analyze voice via VoiceVerify, VoiceEmbed and VoiceAnalyze RPCs.",
|
||||
},
|
||||
}
|
||||
|
||||
// BackendCapability describes which gRPC methods and usecases a backend supports.
|
||||
@@ -471,6 +488,21 @@ var BackendCapabilities = map[string]BackendCapability{
|
||||
DefaultUsecases: []string{UsecaseDetection},
|
||||
Description: "RF-DETR C++ object detection",
|
||||
},
|
||||
|
||||
// --- Face and speaker recognition backends ---
|
||||
"insightface": {
|
||||
GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
|
||||
PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
|
||||
DefaultUsecases: []string{UsecaseFaceRecognition},
|
||||
AcceptsImages: true,
|
||||
Description: "InsightFace — face detection, embedding, verification and attribute analysis",
|
||||
},
|
||||
"speaker-recognition": {
|
||||
GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
|
||||
PossibleUsecases: []string{UsecaseSpeakerRecognition},
|
||||
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||
Description: "Speaker recognition — voice identity verification and analysis",
|
||||
},
|
||||
"silero-vad": {
|
||||
GRPCMethods: []GRPCMethod{MethodVAD},
|
||||
PossibleUsecases: []string{UsecaseVAD},
|
||||
|
||||
Reference in New Issue
Block a user