diff --git a/backend/go/face-detect/.gitignore b/backend/go/face-detect/.gitignore new file mode 100644 index 000000000..7c80b29ab --- /dev/null +++ b/backend/go/face-detect/.gitignore @@ -0,0 +1,18 @@ +# Fetched upstream sources +sources/ + +# CMake build directories +build*/ + +# build artifacts staged in-tree by the Makefile (cp from sources/) or +# symlinked for local dev; the real sources live in face-detect.cpp upstream. +*.so +*.so.* +facedetect_capi.h +compile_commands.json + +# Compiled backend binary +face-detect-grpc + +# Packaging output +package/ diff --git a/backend/go/face-detect/Makefile b/backend/go/face-detect/Makefile new file mode 100644 index 000000000..ecf101a22 --- /dev/null +++ b/backend/go/face-detect/Makefile @@ -0,0 +1,97 @@ +# face-detect backend Makefile. +# +# Upstream pin lives below as FACEDETECT_VERSION?=636a1963... (.github/bump_deps.sh +# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp +# convention). +# +# Local dev shortcut: if you already have an out-of-tree face-detect.cpp build, +# symlink the .so + header into this directory and skip the clone/cmake steps: +# +# ln -sf /path/to/face-detect.cpp/build-shared/libfacedetect.so . +# ln -sf /path/to/face-detect.cpp/include/facedetect_capi.h . +# go build -o face-detect-grpc . +# +# The default target below does the proper clone-at-pin + cmake build so CI does +# not need a side-checkout. + +FACEDETECT_VERSION?=636a19631a400694a08edb7e707288003b7093aa +FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) + +BUILD_TYPE?= +NATIVE?=false + +# Build ggml + the vendored libjpeg-turbo statically into libfacedetect.so (PIC) +# so the shared lib is self-contained: dlopen needs no libggml*.so alongside it, +# only system libs (libstdc++/libgomp/libc) the runtime image already provides. +# The vendored jpeg symbols are hidden via -Wl,--exclude-libs,ALL on the C++ +# side, so only the facedetect_capi_* surface is exported. +CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DFACEDETECT_SHARED=ON -DFACEDETECT_BUILD_CLI=OFF -DFACEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +# face-detect.cpp gates its GGML backends behind FACEDETECT_GGML_* options and +# does set(GGML_CUDA ${FACEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare +# -DGGML_CUDA=ON is overwritten back to OFF. Forward the FACEDETECT_GGML_* +# options instead. (openblas is not gated, so -DGGML_BLAS passes through.) +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DFACEDETECT_GGML_CUDA=ON +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +else ifeq ($(BUILD_TYPE),hipblas) + CMAKE_ARGS+=-DFACEDETECT_GGML_HIP=ON +else ifeq ($(BUILD_TYPE),vulkan) + CMAKE_ARGS+=-DFACEDETECT_GGML_VULKAN=ON +else ifeq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DFACEDETECT_GGML_METAL=ON +endif + +.PHONY: face-detect-grpc package build clean purge test all + +all: face-detect-grpc + +# Clone the upstream face-detect.cpp source at the pinned commit. Directory acts +# as the target so make only re-clones when missing. After a FACEDETECT_VERSION +# bump, run 'make purge && make' to refetch. +sources/face-detect.cpp: + mkdir -p sources/face-detect.cpp + cd sources/face-detect.cpp && \ + git init -q && \ + git remote add origin $(FACEDETECT_REPO) && \ + git fetch --depth 1 origin $(FACEDETECT_VERSION) && \ + git checkout FETCH_HEAD && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Build the shared lib + header out-of-tree, then stage them next to the Go +# sources so purego.Dlopen("libfacedetect.so") and the cgo-less build both pick +# them up. +libfacedetect.so: sources/face-detect.cpp + cmake -B sources/face-detect.cpp/build-shared -S sources/face-detect.cpp $(CMAKE_ARGS) + cmake --build sources/face-detect.cpp/build-shared --config Release -j$(JOBS) --target facedetect + cp -fv sources/face-detect.cpp/build-shared/libfacedetect.so* ./ 2>/dev/null || true + cp -fv sources/face-detect.cpp/include/facedetect_capi.h ./ + +face-detect-grpc: libfacedetect.so main.go gofacedetect.go options.go + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o face-detect-grpc . + +package: face-detect-grpc + bash package.sh + +build: package + +# Test target. The embed/detect/verify/analyze smoke specs are gated on +# FACEDETECT_BACKEND_TEST_MODEL + FACEDETECT_BACKEND_TEST_IMAGE; without them the +# heavy specs auto-skip and only the pure-Go parsing specs run. +test: + LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1 + +clean: purge + rm -rf libfacedetect.so* facedetect_capi.h package face-detect-grpc + +purge: + rm -rf sources/face-detect.cpp diff --git a/backend/go/face-detect/gofacedetect.go b/backend/go/face-detect/gofacedetect.go new file mode 100644 index 000000000..5577a2404 --- /dev/null +++ b/backend/go/face-detect/gofacedetect.go @@ -0,0 +1,416 @@ +package main + +import ( + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "math" + "os" + "path/filepath" + "strings" + "time" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "github.com/mudler/xlog" +) + +// purego-bound entry points from libfacedetect.so. Names match +// facedetect_capi.h exactly so a `nm libfacedetect.so | grep facedetect_capi` +// is enough to spot drift. +// +// The opaque ctx and the malloc'd char*/float* return values are declared as +// uintptr so we get the raw pointer back and can release it via the matching +// capi free function. purego's native string/[]float32 returns would copy and +// forget the original pointer, leaking the C-owned buffer on every call. +var ( + CppAbiVersion func() int32 + CppLoad func(ggufPath string) uintptr + CppFree func(ctx uintptr) + CppLastError func(ctx uintptr) string + CppFreeString func(s uintptr) + CppFreeVec func(v uintptr) + CppEmbedPath func(ctx uintptr, imagePath string, outVec, outDim unsafe.Pointer) int32 + CppEmbedRGB func(ctx uintptr, rgb []byte, width, height int32, outVec, outDim unsafe.Pointer) int32 + CppDetectJSON func(ctx uintptr, imagePath string) uintptr + CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, antiSpoof int32, outDistance, outVerified unsafe.Pointer) int32 + CppAnalyzeJSON func(ctx uintptr, imagePath string) uintptr +) + +// FaceDetect implements the face-recognition (biometric) subset of the Backend +// gRPC service over libfacedetect.so. The C side keeps a single loaded model +// pack plus a per-ctx last-error buffer and is not reentrant, so +// base.SingleThread serializes every call. +type FaceDetect struct { + base.SingleThread + opts loadOptions + ctxPtr uintptr +} + +func (f *FaceDetect) Load(opts *pb.ModelOptions) error { + model := opts.ModelFile + if model == "" { + model = opts.ModelPath + } + if !filepath.IsAbs(model) && opts.ModelPath != "" { + model = filepath.Join(opts.ModelPath, model) + } + if model == "" { + return errors.New("face-detect: ModelFile is required") + } + + f.opts = parseOptions(opts.Options) + if f.opts.modelName == "" { + f.opts.modelName = filepath.Base(model) + } + + xlog.Info("face-detect: loading model", "model", model, + "verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion()) + + ctx := CppLoad(model) + if ctx == 0 { + // The last-error buffer lives on the ctx that was never returned, so + // surface the path the operator tried to load instead. + return fmt.Errorf("face-detect: facedetect_capi_load failed for %q", model) + } + f.ctxPtr = ctx + return nil +} + +// Embeddings returns the L2-normalized ArcFace embedding of the primary face in +// the supplied image. Mirroring the Python face backend, the image is read from +// Images[0] as a base64 payload; materializeImage decodes it to a temp file so +// the path-based C-API can run its own decode (cv2.imread parity). The gRPC +// server wraps the returned slice in an EmbeddingResult. +func (f *FaceDetect) Embeddings(req *pb.PredictOptions) ([]float32, error) { + if f.ctxPtr == 0 { + return nil, errors.New("face-detect: model not loaded") + } + if len(req.Images) == 0 || req.Images[0] == "" { + return nil, errors.New("face-detect: Embedding requires Images[0] to be a base64 image") + } + + path, cleanup, err := materializeImage(req.Images[0]) + if err != nil { + return nil, err + } + defer cleanup() + + return f.embedPath(path) +} + +func (f *FaceDetect) embedPath(path string) ([]float32, error) { + var vec uintptr + var dim int32 + rc := CppEmbedPath(f.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim)) + if rc != 0 || vec == 0 || dim <= 0 { + return nil, f.lastErr("embed", path) + } + defer CppFreeVec(vec) + // Copy out of the C-owned malloc'd buffer before freeing it. The + // uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell + // a C heap pointer from Go-managed memory; safe here, the GC neither tracks + // nor moves this buffer and we copy immediately. + src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free + out := make([]float32, int(dim)) + copy(out, src) + return out, nil +} + +// Detect runs SCRFD over the image and returns one Detection per face. The +// C-API emits a box as [x1,y1,x2,y2] in pixels; the proto carries x/y plus +// width/height, so the corners are converted. The 5 facial landmarks the engine +// also returns are dropped: the Detection message has no field for them. +func (f *FaceDetect) Detect(req *pb.DetectOptions) (pb.DetectResponse, error) { + if f.ctxPtr == 0 { + return pb.DetectResponse{}, errors.New("face-detect: model not loaded") + } + if req.Src == "" { + return pb.DetectResponse{}, errors.New("face-detect: src image is required") + } + + path, cleanup, err := materializeImage(req.Src) + if err != nil { + return pb.DetectResponse{}, err + } + defer cleanup() + + faces, err := f.detectFaces(path) + if err != nil { + return pb.DetectResponse{}, err + } + + dets := make([]*pb.Detection, 0, len(faces)) + for _, fc := range faces { + if req.Threshold > 0 && fc.Score < req.Threshold { + continue + } + x, y, w, h := fc.xywh() + dets = append(dets, &pb.Detection{ + X: x, + Y: y, + Width: w, + Height: h, + Confidence: fc.Score, + ClassName: "face", + }) + } + return pb.DetectResponse{Detections: dets}, nil +} + +// FaceVerify embeds the primary face in each image and reports whether they are +// the same identity by cosine distance against a threshold. A request threshold +// <= 0 falls back to the model-configured default (verify_threshold option, +// 0.35 if unset). When anti_spoofing is set, the C-API applies a MiniFASNet +// veto internally (verified forced false on a spoof); the per-image liveness +// scores are not exposed by the verify entry point, so img*_is_real / +// img*_antispoof_score stay at their zero values. +func (f *FaceDetect) FaceVerify(req *pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) { + if f.ctxPtr == 0 { + return pb.FaceVerifyResponse{}, errors.New("face-detect: model not loaded") + } + if req.Img1 == "" || req.Img2 == "" { + return pb.FaceVerifyResponse{}, errors.New("face-detect: img1 and img2 are required") + } + + path1, cleanup1, err := materializeImage(req.Img1) + if err != nil { + return pb.FaceVerifyResponse{}, err + } + defer cleanup1() + path2, cleanup2, err := materializeImage(req.Img2) + if err != nil { + return pb.FaceVerifyResponse{}, err + } + defer cleanup2() + + threshold := req.Threshold + if threshold <= 0 { + threshold = f.opts.verifyThreshold + } + + antiSpoof := int32(0) + if req.AntiSpoofing { + antiSpoof = 1 + } + + started := time.Now() + var distance float32 + var verified int32 + rc := CppVerifyPaths(f.ctxPtr, path1, path2, threshold, antiSpoof, + unsafe.Pointer(&distance), unsafe.Pointer(&verified)) + if rc != 0 { + return pb.FaceVerifyResponse{}, f.lastErr("verify", req.Img1[:min(8, len(req.Img1))]+"...") + } + elapsedMs := float32(time.Since(started).Seconds() * 1000.0) + + // Confidence decays linearly from 100 at distance 0 to 0 at the threshold, + // matching the Python face backend's reporting. + confidence := float32(0) + if threshold > 0 { + confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0))) + } + + return pb.FaceVerifyResponse{ + Verified: verified != 0, + Distance: distance, + Threshold: threshold, + Confidence: confidence, + Model: f.opts.modelName, + Img1Area: f.bestArea(path1), + Img2Area: f.bestArea(path2), + ProcessingTimeMs: elapsedMs, + }, nil +} + +// FaceAnalyze runs the genderage head on every detected face. The C-API returns +// "M"/"F" gender labels and a rounded age; the labels are normalized to the +// "Man"/"Woman" values the proto documents. +func (f *FaceDetect) FaceAnalyze(req *pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error) { + if f.ctxPtr == 0 { + return pb.FaceAnalyzeResponse{}, errors.New("face-detect: model not loaded") + } + if req.Img == "" { + return pb.FaceAnalyzeResponse{}, errors.New("face-detect: img is required") + } + + path, cleanup, err := materializeImage(req.Img) + if err != nil { + return pb.FaceAnalyzeResponse{}, err + } + defer cleanup() + + ptr := CppAnalyzeJSON(f.ctxPtr, path) + if ptr == 0 { + return pb.FaceAnalyzeResponse{}, f.lastErr("analyze", path) + } + defer CppFreeString(ptr) + + faces, err := parseAnalyzeJSON(goStringFromCPtr(ptr)) + if err != nil { + return pb.FaceAnalyzeResponse{}, fmt.Errorf("face-detect: analyze JSON: %w", err) + } + return pb.FaceAnalyzeResponse{Faces: faces}, nil +} + +// faceBox is one entry of the detect/analyze JSON documents the engine emits. +type faceBox struct { + Score float32 `json:"score"` + Box []float32 `json:"box"` + Age float32 `json:"age"` + Gender string `json:"gender"` +} + +// xywh converts the engine's [x1,y1,x2,y2] box into the x/y/width/height the +// proto carries. A short or missing box yields zeros. +func (b faceBox) xywh() (x, y, w, h float32) { + if len(b.Box) < 4 { + return 0, 0, 0, 0 + } + return b.Box[0], b.Box[1], b.Box[2] - b.Box[0], b.Box[3] - b.Box[1] +} + +type facesJSON struct { + Faces []faceBox `json:"faces"` +} + +func (f *FaceDetect) detectFaces(path string) ([]faceBox, error) { + ptr := CppDetectJSON(f.ctxPtr, path) + if ptr == 0 { + return nil, f.lastErr("detect", path) + } + defer CppFreeString(ptr) + + var doc facesJSON + if err := json.Unmarshal([]byte(goStringFromCPtr(ptr)), &doc); err != nil { + return nil, fmt.Errorf("face-detect: detect JSON: %w", err) + } + return doc.Faces, nil +} + +// bestArea returns the FacialArea of the highest-scoring face in an image, or an +// empty area when detection fails or finds nothing. Best-effort: verify already +// succeeded, so a missing region must not turn a valid match into an error. +func (f *FaceDetect) bestArea(path string) *pb.FacialArea { + faces, err := f.detectFaces(path) + if err != nil || len(faces) == 0 { + return &pb.FacialArea{} + } + best := faces[0] + for _, fc := range faces[1:] { + if fc.Score > best.Score { + best = fc + } + } + x, y, w, h := best.xywh() + return &pb.FacialArea{X: x, Y: y, W: w, H: h} +} + +// parseAnalyzeJSON maps the engine's analyze document onto FaceAnalysis entries. +// The engine reports gender as "M"/"F"; both the dominant label and the score +// map are filled with the "Man"/"Woman" form the proto documents. +func parseAnalyzeJSON(doc string) ([]*pb.FaceAnalysis, error) { + var parsed facesJSON + if err := json.Unmarshal([]byte(doc), &parsed); err != nil { + return nil, err + } + + out := make([]*pb.FaceAnalysis, 0, len(parsed.Faces)) + for _, fc := range parsed.Faces { + x, y, w, h := fc.xywh() + fa := &pb.FaceAnalysis{ + Region: &pb.FacialArea{X: x, Y: y, W: w, H: h}, + FaceConfidence: fc.Score, + Age: fc.Age, + } + if label := normalizeGender(fc.Gender); label != "" { + fa.DominantGender = label + fa.Gender = map[string]float32{label: 1.0} + } + out = append(out, fa) + } + return out, nil +} + +// normalizeGender maps the engine's "M"/"F" code to the "Man"/"Woman" labels the +// proto documents. Unknown codes pass through unchanged. +func normalizeGender(g string) string { + switch strings.ToUpper(strings.TrimSpace(g)) { + case "M": + return "Man" + case "F": + return "Woman" + case "": + return "" + default: + return g + } +} + +// materializeImage decodes a base64 image payload into a temp file and returns +// its path plus a cleanup func. As a convenience for callers that already pass a +// filesystem path (e.g. a test fixture), an existing path is used as-is with a +// no-op cleanup. data: URI prefixes are stripped before decoding. +func materializeImage(src string) (path string, cleanup func(), err error) { + noop := func() {} + if src == "" { + return "", noop, errors.New("face-detect: empty image input") + } + if _, statErr := os.Stat(src); statErr == nil { + return src, noop, nil + } + + payload := src + if i := strings.Index(payload, ","); strings.HasPrefix(payload, "data:") && i >= 0 { + payload = payload[i+1:] + } + data, decErr := base64.StdEncoding.DecodeString(strings.TrimSpace(payload)) + if decErr != nil || len(data) == 0 { + return "", noop, errors.New("face-detect: image is neither an existing path nor valid base64") + } + + tmp, createErr := os.CreateTemp("", "face-detect-*.img") + if createErr != nil { + return "", noop, fmt.Errorf("face-detect: create temp image: %w", createErr) + } + cleanup = func() { _ = os.Remove(tmp.Name()) } + if _, wErr := tmp.Write(data); wErr != nil { + _ = tmp.Close() + cleanup() + return "", noop, fmt.Errorf("face-detect: write temp image: %w", wErr) + } + if cErr := tmp.Close(); cErr != nil { + cleanup() + return "", noop, fmt.Errorf("face-detect: close temp image: %w", cErr) + } + return tmp.Name(), cleanup, nil +} + +// lastErr wraps the C-API's per-ctx last-error buffer into a Go error. +func (f *FaceDetect) lastErr(op, subject string) error { + msg := strings.TrimSpace(CppLastError(f.ctxPtr)) + if msg == "" { + msg = "no error detail" + } + return fmt.Errorf("face-detect: %s failed for %q: %s", op, subject, msg) +} + +// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a +// malloc'd buffer the caller owns; release it via CppFreeString after the copy. +// +// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell +// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor +// moves the buffer and we dereference it immediately to copy the bytes out. +func goStringFromCPtr(cptr uintptr) string { + if cptr == 0 { + return "" + } + p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above) + n := 0 + for *(*byte)(unsafe.Add(p, n)) != 0 { + n++ + } + return string(unsafe.Slice((*byte)(p), n)) +} diff --git a/backend/go/face-detect/gofacedetect_test.go b/backend/go/face-detect/gofacedetect_test.go new file mode 100644 index 000000000..54a942fba --- /dev/null +++ b/backend/go/face-detect/gofacedetect_test.go @@ -0,0 +1,230 @@ +package main + +import ( + "encoding/base64" + "os" + "sync" + "testing" + + "github.com/ebitengine/purego" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestFaceDetect(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "face-detect Backend Suite") +} + +var ( + libLoadOnce sync.Once + libLoadErr error +) + +// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API +// bridge without spinning up the gRPC server. Records the error (the smoke +// specs skip themselves) when libfacedetect.so is not loadable from cwd +// (LD_LIBRARY_PATH or a symlink in ./). +func ensureLibLoaded() error { + libLoadOnce.Do(func() { + libName := os.Getenv("FACEDETECT_LIBRARY") + if libName == "" { + libName = "libfacedetect.so" + } + lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + libLoadErr = err + return + } + purego.RegisterLibFunc(&CppAbiVersion, lib, "facedetect_capi_abi_version") + purego.RegisterLibFunc(&CppLoad, lib, "facedetect_capi_load") + purego.RegisterLibFunc(&CppFree, lib, "facedetect_capi_free") + purego.RegisterLibFunc(&CppLastError, lib, "facedetect_capi_last_error") + purego.RegisterLibFunc(&CppFreeString, lib, "facedetect_capi_free_string") + purego.RegisterLibFunc(&CppFreeVec, lib, "facedetect_capi_free_vec") + purego.RegisterLibFunc(&CppEmbedPath, lib, "facedetect_capi_embed_path") + purego.RegisterLibFunc(&CppEmbedRGB, lib, "facedetect_capi_embed_rgb") + purego.RegisterLibFunc(&CppDetectJSON, lib, "facedetect_capi_detect_path_json") + purego.RegisterLibFunc(&CppVerifyPaths, lib, "facedetect_capi_verify_paths") + purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "facedetect_capi_analyze_path_json") + }) + return libLoadErr +} + +var _ = Describe("parseOptions", func() { + It("defaults verify_threshold to 0.35", func() { + o := parseOptions(nil) + Expect(o.verifyThreshold).To(Equal(float32(0.35))) + Expect(o.modelName).To(Equal("")) + }) + + It("parses verify_threshold, threshold alias and model_name", func() { + o := parseOptions([]string{"verify_threshold:0.4", "model_name:buffalo_l", "unknown:x"}) + Expect(o.verifyThreshold).To(Equal(float32(0.4))) + Expect(o.modelName).To(Equal("buffalo_l")) + + o2 := parseOptions([]string{"threshold:0.3"}) + Expect(o2.verifyThreshold).To(Equal(float32(0.3))) + }) + + It("ignores non-positive thresholds and keeps the default", func() { + o := parseOptions([]string{"verify_threshold:0", "threshold:-1"}) + Expect(o.verifyThreshold).To(Equal(float32(0.35))) + }) +}) + +var _ = Describe("normalizeGender", func() { + It("maps M/F codes to Man/Woman", func() { + Expect(normalizeGender("M")).To(Equal("Man")) + Expect(normalizeGender("f")).To(Equal("Woman")) + Expect(normalizeGender(" m ")).To(Equal("Man")) + }) + + It("passes empty and unknown codes through", func() { + Expect(normalizeGender("")).To(Equal("")) + Expect(normalizeGender("nonbinary")).To(Equal("nonbinary")) + }) +}) + +var _ = Describe("faceBox.xywh", func() { + It("converts an [x1,y1,x2,y2] box to x/y/width/height", func() { + b := faceBox{Box: []float32{10, 20, 50, 80}} + x, y, w, h := b.xywh() + Expect(x).To(Equal(float32(10))) + Expect(y).To(Equal(float32(20))) + Expect(w).To(Equal(float32(40))) + Expect(h).To(Equal(float32(60))) + }) + + It("returns zeros for a short box", func() { + x, y, w, h := faceBox{Box: []float32{1, 2}}.xywh() + Expect([]float32{x, y, w, h}).To(Equal([]float32{0, 0, 0, 0})) + }) +}) + +var _ = Describe("parseAnalyzeJSON", func() { + It("maps region, age and gender for each face", func() { + doc := `{"faces":[ + {"score":0.997,"box":[10,20,50,80],"age":31,"gender":"M"}, + {"score":0.81,"box":[0,0,40,40],"age":24,"gender":"F"}]}` + faces, err := parseAnalyzeJSON(doc) + Expect(err).ToNot(HaveOccurred()) + Expect(faces).To(HaveLen(2)) + + Expect(faces[0].FaceConfidence).To(BeNumerically("~", 0.997, 1e-4)) + Expect(faces[0].Age).To(BeNumerically("~", 31, 1e-4)) + Expect(faces[0].DominantGender).To(Equal("Man")) + Expect(faces[0].Gender).To(HaveKeyWithValue("Man", float32(1.0))) + Expect(faces[0].Region.W).To(Equal(float32(40))) + Expect(faces[0].Region.H).To(Equal(float32(60))) + + Expect(faces[1].DominantGender).To(Equal("Woman")) + }) + + It("tolerates a missing gender field", func() { + faces, err := parseAnalyzeJSON(`{"faces":[{"score":0.5,"box":[0,0,10,10],"age":40}]}`) + Expect(err).ToNot(HaveOccurred()) + Expect(faces).To(HaveLen(1)) + Expect(faces[0].DominantGender).To(Equal("")) + Expect(faces[0].Gender).To(BeEmpty()) + }) + + It("returns no faces for an empty document", func() { + faces, err := parseAnalyzeJSON(`{"faces":[]}`) + Expect(err).ToNot(HaveOccurred()) + Expect(faces).To(BeEmpty()) + }) + + It("returns an error on malformed JSON", func() { + _, err := parseAnalyzeJSON(`{not-json`) + Expect(err).To(HaveOccurred()) + }) +}) + +var _ = Describe("materializeImage", func() { + It("decodes a base64 payload to a temp file", func() { + payload := base64.StdEncoding.EncodeToString([]byte("\xff\xd8\xff\xe0fake-jpeg")) + path, cleanup, err := materializeImage(payload) + Expect(err).ToNot(HaveOccurred()) + defer cleanup() + data, rerr := os.ReadFile(path) + Expect(rerr).ToNot(HaveOccurred()) + Expect(data).To(Equal([]byte("\xff\xd8\xff\xe0fake-jpeg"))) + }) + + It("strips a data: URI prefix before decoding", func() { + payload := "data:image/png;base64," + base64.StdEncoding.EncodeToString([]byte("hello")) + path, cleanup, err := materializeImage(payload) + Expect(err).ToNot(HaveOccurred()) + defer cleanup() + data, rerr := os.ReadFile(path) + Expect(rerr).ToNot(HaveOccurred()) + Expect(data).To(Equal([]byte("hello"))) + }) + + It("uses an existing path as-is", func() { + tmp, err := os.CreateTemp("", "face-detect-fixture-*.bin") + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = os.Remove(tmp.Name()) }() + Expect(tmp.Close()).To(Succeed()) + + path, cleanup, err := materializeImage(tmp.Name()) + Expect(err).ToNot(HaveOccurred()) + defer cleanup() + Expect(path).To(Equal(tmp.Name())) + }) + + It("errors on input that is neither a path nor base64", func() { + _, _, err := materializeImage("not base64!!!") + Expect(err).To(HaveOccurred()) + }) +}) + +// The specs below exercise the real C-API end to end. They run only when both a +// model GGUF and a test image are provided, and skip cleanly otherwise so the +// suite stays green without large assets. +var _ = Describe("FaceDetect end-to-end", Ordered, func() { + var ( + f *FaceDetect + modelPath = os.Getenv("FACEDETECT_BACKEND_TEST_MODEL") + imagePath = os.Getenv("FACEDETECT_BACKEND_TEST_IMAGE") + ) + + BeforeAll(func() { + if modelPath == "" || imagePath == "" { + Skip("set FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE to run the e2e specs") + } + if err := ensureLibLoaded(); err != nil { + Skip("libfacedetect.so not loadable: " + err.Error()) + } + f = &FaceDetect{} + Expect(f.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed()) + }) + + It("embeds the primary face in an image", func() { + emb, err := f.Embeddings(&pb.PredictOptions{Images: []string{imagePath}}) + Expect(err).ToNot(HaveOccurred()) + Expect(emb).ToNot(BeEmpty()) + }) + + It("detects at least one face", func() { + resp, err := f.Detect(&pb.DetectOptions{Src: imagePath}) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.Detections).ToNot(BeEmpty()) + Expect(resp.Detections[0].ClassName).To(Equal("face")) + }) + + It("verifies an image against itself as the same identity", func() { + resp, err := f.FaceVerify(&pb.FaceVerifyRequest{Img1: imagePath, Img2: imagePath}) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.Verified).To(BeTrue()) + Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold)) + }) + + It("analyzes age/gender for each face", func() { + resp, err := f.FaceAnalyze(&pb.FaceAnalyzeRequest{Img: imagePath}) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.Faces).ToNot(BeEmpty()) + }) +}) diff --git a/backend/go/face-detect/main.go b/backend/go/face-detect/main.go new file mode 100644 index 000000000..dc52f1e60 --- /dev/null +++ b/backend/go/face-detect/main.go @@ -0,0 +1,65 @@ +package main + +// Started internally by LocalAI - one gRPC server per loaded model. +// +// Loads libfacedetect.so via purego and registers the flat C-API entry points +// declared in facedetect_capi.h. The library name can be overridden with +// FACEDETECT_LIBRARY (mirrors the VOICEDETECT_LIBRARY / PARAKEET_LIBRARY +// convention in the sibling backends); the default looks for the .so next to +// this binary (resolved via LD_LIBRARY_PATH by run.sh). +import ( + "flag" + "fmt" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +func main() { + libName := os.Getenv("FACEDETECT_LIBRARY") + if libName == "" { + libName = "libfacedetect.so" + } + + lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(fmt.Errorf("face-detect: dlopen %q: %w", libName, err)) + } + + // Bound 1:1 to facedetect_capi.h. char*/float* returns are registered as + // uintptr so the raw pointer can be freed via the matching capi free fn. + libFuncs := []LibFuncs{ + {&CppAbiVersion, "facedetect_capi_abi_version"}, + {&CppLoad, "facedetect_capi_load"}, + {&CppFree, "facedetect_capi_free"}, + {&CppLastError, "facedetect_capi_last_error"}, + {&CppFreeString, "facedetect_capi_free_string"}, + {&CppFreeVec, "facedetect_capi_free_vec"}, + {&CppEmbedPath, "facedetect_capi_embed_path"}, + {&CppEmbedRGB, "facedetect_capi_embed_rgb"}, + {&CppDetectJSON, "facedetect_capi_detect_path_json"}, + {&CppVerifyPaths, "facedetect_capi_verify_paths"}, + {&CppAnalyzeJSON, "facedetect_capi_analyze_path_json"}, + } + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name) + } + + fmt.Fprintf(os.Stderr, "[face-detect] ABI=%d\n", CppAbiVersion()) + + flag.Parse() + + if err := grpc.StartServer(*addr, &FaceDetect{}); err != nil { + panic(err) + } +} diff --git a/backend/go/face-detect/options.go b/backend/go/face-detect/options.go new file mode 100644 index 000000000..51951bfd7 --- /dev/null +++ b/backend/go/face-detect/options.go @@ -0,0 +1,47 @@ +package main + +import ( + "strconv" + "strings" +) + +// defaultVerifyThreshold is the cosine-distance cutoff used when a request does +// not set one. Matches the insightface buffalo_l ArcFace R50 default the Python +// face backend ships with so the two implementations agree on verdicts out of +// the box. +const defaultVerifyThreshold float32 = 0.35 + +// loadOptions holds the parsed model-level options for face-detect. +type loadOptions struct { + verifyThreshold float32 + modelName string +} + +func splitOption(o string) (key, value string, ok bool) { + i := strings.Index(o, ":") + if i < 0 { + return "", "", false + } + return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true +} + +// parseOptions reads the backend "key:value" option slice. Unknown keys are +// ignored. Defaults: verify_threshold 0.35, model_name derived from the file. +func parseOptions(opts []string) loadOptions { + o := loadOptions{verifyThreshold: defaultVerifyThreshold} + for _, oo := range opts { + key, value, ok := splitOption(oo) + if !ok { + continue + } + switch key { + case "verify_threshold", "threshold": + if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 { + o.verifyThreshold = float32(f) + } + case "model_name": + o.modelName = value + } + } + return o +} diff --git a/backend/go/face-detect/package.sh b/backend/go/face-detect/package.sh new file mode 100644 index 000000000..36ffa8993 --- /dev/null +++ b/backend/go/face-detect/package.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Bundle the face-detect-grpc binary, libfacedetect.so, the core runtime libs +# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE +# so the package is self-contained. Mirrors backend/go/voice-detect/package.sh; +# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc +# is used instead of the host's. + +set -e + +CURDIR=$(dirname "$(realpath "$0")") +REPO_ROOT="${CURDIR}/../../.." + +mkdir -p "$CURDIR/package/lib" + +cp -avf "$CURDIR/face-detect-grpc" "$CURDIR/package/" +cp -avf "$CURDIR/run.sh" "$CURDIR/package/" + +# libfacedetect.so + any soname symlinks. purego.Dlopen resolves it via +# LD_LIBRARY_PATH, which run.sh points at lib/. +cp -avf "$CURDIR"/libfacedetect.so* "$CURDIR/package/lib/" 2>/dev/null || { + echo "ERROR: libfacedetect.so not found in $CURDIR, run 'make' first" >&2 + exit 1 +} + +# Detect architecture and copy the core runtime libs libfacedetect.so links +# against, plus the matching dynamic loader as lib/ld.so. +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so" + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6" + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1" + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6" + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6" + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1" + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2" + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1" + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0" +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so" + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6" + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1" + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6" + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6" + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1" + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2" + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1" + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0" +elif [ "$(uname -s)" = "Darwin" ]; then + echo "Detected Darwin" +else + echo "Error: Could not detect architecture" + exit 1 +fi + +# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on +# BUILD_TYPE so the backend can reach the GPU without the runtime base image +# shipping those drivers. +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + +echo "Packaging completed successfully" +ls -liah "$CURDIR/package/" "$CURDIR/package/lib/" diff --git a/backend/go/face-detect/run.sh b/backend/go/face-detect/run.sh new file mode 100644 index 000000000..a6cc59034 --- /dev/null +++ b/backend/go/face-detect/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath "$0")") + +export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}" + +# If a self-contained ld.so was packaged, route through it so the packaged +# libc / libstdc++ are used instead of the host's (matches the voice-detect / +# whisper / parakeet backends' runtime layout). +if [ -f "$CURDIR/lib/ld.so" ]; then + echo "Using lib/ld.so" + exec "$CURDIR/lib/ld.so" "$CURDIR/face-detect-grpc" "$@" +fi + +exec "$CURDIR/face-detect-grpc" "$@" diff --git a/backend/go/face-detect/test.sh b/backend/go/face-detect/test.sh new file mode 100644 index 000000000..da290c343 --- /dev/null +++ b/backend/go/face-detect/test.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath "$0")") +cd "$CURDIR" + +echo "Running face-detect backend tests..." + +# The pure-Go parsing specs always run. The embed/detect/verify/analyze smoke +# specs run only when a model + image are provided via +# FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE; otherwise they +# auto-skip. +LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s . + +echo "face-detect tests completed."