LocalAI/core/http/endpoints/localai/depth.go

package localai

import (
	"encoding/base64"

	"github.com/labstack/echo/v4"
	"github.com/mudler/LocalAI/core/backend"
	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/core/http/middleware"
	"github.com/mudler/LocalAI/core/schema"
	"github.com/mudler/LocalAI/pkg/grpc/proto"
	"github.com/mudler/LocalAI/pkg/model"
	"github.com/mudler/xlog"
)

// DepthEndpoint is the LocalAI Depth endpoint exposing the full Depth Anything 3
// output (per-pixel metric depth + confidence + sky, camera pose, 3D point cloud
// and optional glb/COLMAP exports).
// @Summary Estimates per-pixel depth (and optionally pose/points) from an image.
// @Tags depth
// @Param request body schema.DepthRequest true "query params"
// @Success 200 {object} schema.DepthResponse "Response"
// @Router /v1/depth [post]
func DepthEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
	return func(c echo.Context) error {

		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DepthRequest)
		if !ok || input.Model == "" {
			return echo.ErrBadRequest
		}

		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
		if !ok || cfg == nil {
			return echo.ErrBadRequest
		}

		xlog.Debug("Depth", "image", input.Image, "backend", cfg.Backend)

		image, err := decodeImageInput(input.Image)
		if err != nil {
			return err
		}

		// Default to returning everything the model can produce when the
		// caller hasn't asked for any specific subset, so a bare request is
		// still useful.
		includeDepth := input.IncludeDepth
		includeConfidence := input.IncludeConfidence
		includePose := input.IncludePose
		includeSky := input.IncludeSky
		includePoints := input.IncludePoints
		if !includeDepth && !includeConfidence && !includePose && !includeSky && !includePoints {
			includeDepth = true
			includeConfidence = true
			includePose = true
			includeSky = true
		}

		req := &proto.DepthRequest{
			Src:               image,
			Dst:               input.Dst,
			IncludeDepth:      includeDepth,
			IncludeConfidence: includeConfidence,
			IncludePose:       includePose,
			IncludeSky:        includeSky,
			IncludePoints:     includePoints,
			PointsConfThresh:  input.PointsConfThresh,
			Exports:           input.Exports,
		}

		res, err := backend.Depth(c.Request().Context(), req, ml, appConfig, *cfg)
		if err != nil {
			return mapBackendError(err)
		}

		response := schema.DepthResponse{
			Width:       res.GetWidth(),
			Height:      res.GetHeight(),
			Depth:       res.GetDepth(),
			Confidence:  res.GetConfidence(),
			Sky:         res.GetSky(),
			Extrinsics:  res.GetExtrinsics(),
			Intrinsics:  res.GetIntrinsics(),
			NumPoints:   res.GetNumPoints(),
			Points:      res.GetPoints(),
			ExportPaths: res.GetExportPaths(),
			IsMetric:    res.GetIsMetric(),
		}
		if len(res.GetPointColors()) > 0 {
			response.PointColors = base64.StdEncoding.EncodeToString(res.GetPointColors())
		}

		return c.JSON(200, response)
	}
}