LocalAI/core/backend/stores.go

package backend

import (
	"context"
	"fmt"
	"time"

	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/core/trace"

	"github.com/mudler/LocalAI/pkg/grpc"
	"github.com/mudler/LocalAI/pkg/model"
	"github.com/mudler/LocalAI/pkg/store"
)

// VectorStore is the narrowed KNN store used by the router's embedding
// cache. Search returns the top-1 match (cosine similarity in [-1, 1])
// and the serialised payload, or ok=false on a clean miss.
type VectorStore interface {
	Search(ctx context.Context, vec []float32) (similarity float64, payload []byte, ok bool, err error)
	Insert(ctx context.Context, vec []float32, payload []byte) error
}

// NewVectorStore returns a VectorStore backed by the local-store
// gRPC backend, namespaced by storeName so two routers don't collide.
func NewVectorStore(loader *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) VectorStore {
	if storeName == "" {
		return nil
	}
	return &localVectorStore{loader: loader, appConfig: appConfig, storeName: storeName}
}

type localVectorStore struct {
	loader    *model.ModelLoader
	appConfig *config.ApplicationConfig
	storeName string
}

func (s *localVectorStore) backend(_ context.Context) (grpc.Backend, error) {
	return StoreBackend(s.loader, s.appConfig, s.storeName, "")
}

func (s *localVectorStore) Search(ctx context.Context, vec []float32) (sim float64, payload []byte, ok bool, err error) {
	start := time.Now()
	outcome := "hit"
	defer func() {
		s.recordTrace(start, "search", len(vec), sim, outcome, err)
	}()
	be, berr := s.backend(ctx)
	if berr != nil {
		outcome = "backend_load_error"
		return 0, nil, false, fmt.Errorf("vector store load: %w", berr)
	}
	_, values, similarities, ferr := store.Find(ctx, be, vec, 1)
	if ferr != nil {
		outcome = "find_error"
		return 0, nil, false, fmt.Errorf("vector store find: %w", ferr)
	}
	if len(values) == 0 || len(similarities) == 0 {
		outcome = "miss"
		return 0, nil, false, nil
	}
	return float64(similarities[0]), values[0], true, nil
}

func (s *localVectorStore) Insert(ctx context.Context, vec []float32, payload []byte) (err error) {
	start := time.Now()
	outcome := "ok"
	defer func() {
		s.recordTrace(start, "insert", len(vec), 0, outcome, err)
	}()
	be, berr := s.backend(ctx)
	if berr != nil {
		outcome = "backend_load_error"
		return fmt.Errorf("vector store load: %w", berr)
	}
	if serr := store.SetSingle(ctx, be, vec, payload); serr != nil {
		outcome = "insert_error"
		return serr
	}
	return nil
}

// recordTrace surfaces vector-store calls in /api/backend-traces, including
// the backend-load-failure path that otherwise vanishes into an xlog.Warn.
// modelName uses the store namespace (e.g. "router-cache-smart-router") so
// admins can tell which router's cache misbehaved; the backend is always
// "local-store" and can't disambiguate.
func (s *localVectorStore) recordTrace(start time.Time, op string, vecDim int, sim float64, outcome string, err error) {
	if s.appConfig == nil || !s.appConfig.EnableTracing {
		return
	}
	trace.InitBackendTracingIfEnabled(s.appConfig.TracingMaxItems, s.appConfig.TracingMaxBodyBytes)
	errStr := ""
	if err != nil {
		errStr = err.Error()
	}
	summary := op + " " + outcome
	if op == "search" && outcome == "hit" {
		summary = fmt.Sprintf("search hit (sim=%.3f)", sim)
	}
	data := map[string]any{
		"op":         op,
		"outcome":    outcome,
		"vector_dim": vecDim,
	}
	// Only include similarity for a real neighbor — miss/empty_store would
	// otherwise render "similarity: 0" and read as a measured value.
	if op == "search" && outcome == "hit" {
		data["similarity"] = sim
	}
	trace.RecordBackendTrace(trace.BackendTrace{
		Timestamp: start,
		Duration:  time.Since(start),
		Type:      trace.BackendTraceVectorStore,
		ModelName: s.storeName,
		Backend:   model.LocalStoreBackend,
		Summary:   summary,
		Error:     errStr,
		Data:      data,
	})
}

func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
	if backend == "" {
		backend = model.LocalStoreBackend
	}
	// ModelLoader caches backend processes by `modelID`, not by the `model`
	// passed via WithModel. Without a distinct modelID, every StoreBackend
	// call collapses to the same `modelID=""` cache slot — face (512-D) and
	// voice (192-D) biometrics would then share the same local-store process
	// and the second enrollment would fail with
	//   Try to add key with length N when existing length is M
	// Use the store namespace as modelID so each namespace gets its own
	// process instance and its own in-memory Store{}.
	sc := []model.Option{
		model.WithBackendString(backend),
		model.WithModelID(storeName),
		model.WithModel(storeName),
	}

	return sl.Load(sc...)
}