LocalAI/core/backend/score.go

package backend

import (
	"context"
	"fmt"
	"time"

	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/core/trace"
	"github.com/mudler/LocalAI/pkg/grpc"
	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
	model "github.com/mudler/LocalAI/pkg/model"
)

// ScoreOptions controls a single Score request.
type ScoreOptions struct {
	// IncludeTokenLogprobs returns per-token log-probability detail for
	// each candidate. Off by default — the joint LogProb is enough for
	// ranking; callers that need calibration / entropy over the token
	// stream opt in.
	IncludeTokenLogprobs bool
	// LengthNormalize divides the joint log-prob by the candidate's
	// token count. Useful when comparing candidates of different
	// lengths — without it, longer candidates score lower by default.
	LengthNormalize bool
}

// CandidateScore is the per-candidate result. Mirrors pb.CandidateScore
// but avoids leaking the proto type to consumers.
type CandidateScore struct {
	LogProb                 float64
	LengthNormalizedLogProb float64
	NumTokens               int
	Tokens                  []TokenLogProb
}

type TokenLogProb struct {
	Token   string
	LogProb float64
}

// Scorer evaluates a model's joint log-probability of each candidate
// continuation given a shared prompt. Implemented by NewScorer over a
// model-loaded backend; the router's score classifier consumes this
// for multi-label policy selection.
type Scorer interface {
	Score(ctx context.Context, prompt string, candidates []string) ([]CandidateScore, error)
}

// NewScorer binds (loader, modelConfig, appConfig) into a Scorer. The
// underlying backend is resolved lazily on the first Score call.
// Returns nil only as a contract violation — callers that need to
// detect "model not loadable" should look up the config first.
func NewScorer(loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) Scorer {
	return &modelScorer{loader: loader, modelConfig: modelConfig, appConfig: appConfig}
}

type modelScorer struct {
	loader      *model.ModelLoader
	modelConfig config.ModelConfig
	appConfig   *config.ApplicationConfig
}

func (m *modelScorer) Score(ctx context.Context, prompt string, candidates []string) ([]CandidateScore, error) {
	fn, err := ModelScore(prompt, candidates, ScoreOptions{LengthNormalize: true}, m.loader, m.modelConfig, m.appConfig)
	if err != nil {
		return nil, err
	}
	return fn(ctx)
}

// ModelScore loads the backend for modelConfig and returns a closure
// that scores `candidates` against `prompt`. The closure is bound to
// the loaded model so callers can keep it around for repeat scoring
// within the same request without re-resolving the backend.
func ModelScore(prompt string, candidates []string, opts ScoreOptions, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (func(ctx context.Context) ([]CandidateScore, error), error) {
	modelOpts := ModelOptions(modelConfig, appConfig)
	inferenceModel, err := loader.Load(modelOpts...)
	if err != nil {
		recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
		return nil, err
	}
	b, ok := inferenceModel.(grpc.Backend)
	if !ok {
		return nil, fmt.Errorf("scoring not supported by backend %q", modelConfig.Backend)
	}
	if len(candidates) == 0 {
		return nil, fmt.Errorf("Score: candidates must be non-empty")
	}
	return func(ctx context.Context) ([]CandidateScore, error) {
		// Surface score calls in the Traces UI alongside the LLM calls
		// they typically gate (router classifier, eval scoring). Without
		// this, a router-classified request shows only the downstream LLM
		// trace with no record of the classification that picked it.
		var startTime time.Time
		if appConfig.EnableTracing {
			trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
			startTime = time.Now()
		}
		resp, err := b.Score(ctx, &pb.ScoreRequest{
			Prompt:               prompt,
			Candidates:           candidates,
			IncludeTokenLogprobs: opts.IncludeTokenLogprobs,
			LengthNormalize:      opts.LengthNormalize,
		})
		results := scoreResponseToCandidates(resp, opts.IncludeTokenLogprobs)
		if appConfig.EnableTracing {
			errStr := ""
			if err != nil {
				errStr = err.Error()
			}
			trace.RecordBackendTrace(trace.BackendTrace{
				Timestamp: startTime,
				Duration:  time.Since(startTime),
				Type:      trace.BackendTraceScore,
				ModelName: modelConfig.Name,
				Backend:   modelConfig.Backend,
				Summary:   trace.TruncateString(prompt, 200),
				Error:     errStr,
				Data: map[string]any{
					// Copy candidates so the trace buffer doesn't pin a
					// caller-owned slice for the lifetime of the ring.
					"candidates": append([]string(nil), candidates...),
					"results":    results,
				},
			})
		}
		if err != nil {
			return nil, err
		}
		return results, nil
	}, nil
}

// scoreResponseToCandidates converts the wire-format pb response into
// the value type consumed by callers. Extracted to keep ModelScore's
// closure trivial and so the conversion can be unit-tested without a
// real backend.
func scoreResponseToCandidates(resp *pb.ScoreResponse, includeTokens bool) []CandidateScore {
	if resp == nil {
		return nil
	}
	out := make([]CandidateScore, len(resp.Candidates))
	for i, c := range resp.Candidates {
		cs := CandidateScore{
			LogProb:                 c.LogProb,
			LengthNormalizedLogProb: c.LengthNormalizedLogProb,
			NumTokens:               int(c.NumTokens),
		}
		if includeTokens && len(c.Tokens) > 0 {
			cs.Tokens = make([]TokenLogProb, len(c.Tokens))
			for j, t := range c.Tokens {
				cs.Tokens[j] = TokenLogProb{Token: t.Token, LogProb: t.LogProb}
			}
		}
		out[i] = cs
	}
	return out
}