LocalAI/pkg/model/model.go

package model

import (
	"sync"
	"time"

	grpc "github.com/mudler/LocalAI/pkg/grpc"
	process "github.com/mudler/go-processmanager"
)

// healthCheckTTL is the duration for which a successful health check is cached.
// Subsequent checkIsLoaded calls within this window skip the gRPC round-trip,
// avoiding serialization of concurrent requests behind ml.mu.Lock().
const healthCheckTTL = 30 * time.Second

type Model struct {
	ID              string `json:"id"`
	address         string
	client          grpc.Backend
	process         *process.Process
	lastHealthCheck time.Time
	// nodeID is the ID of the distributed-mode worker node that owns this
	// model handle, when set. Empty for in-process models. Best-effort:
	// because the distributed LoadModel path overwrites the per-modelID
	// store entry on every routing decision, this value reflects the
	// most-recently-routed node for the model, not necessarily the node
	// that served a specific in-flight request. Used by the optional
	// X-LocalAI-Node response header (--expose-node-header).
	nodeID string
	sync.Mutex
}

func NewModel(ID, address string, process *process.Process) *Model {
	return &Model{
		ID:      ID,
		address: address,
		process: process,
	}
}

// NewModelWithClient creates a Model with a pre-configured gRPC client.
// Used in distributed mode where the client is wrapped with file staging.
func NewModelWithClient(ID, address string, client grpc.Backend) *Model {
	return &Model{
		ID:      ID,
		address: address,
		client:  client,
	}
}

// SetNodeID records the distributed-mode worker node that owns this model
// handle. Safe to call from any goroutine.
func (m *Model) SetNodeID(id string) {
	m.Lock()
	defer m.Unlock()
	m.nodeID = id
}

// NodeID returns the distributed-mode worker node ID associated with this
// model handle, or "" if unknown / in-process. See the nodeID field comment
// for the best-effort caveat.
func (m *Model) NodeID() string {
	m.Lock()
	defer m.Unlock()
	return m.nodeID
}

func (m *Model) Process() *process.Process {
	return m.process
}

// IsRecentlyHealthy returns true if the model passed a health check within the TTL.
func (m *Model) IsRecentlyHealthy() bool {
	m.Lock()
	defer m.Unlock()
	return !m.lastHealthCheck.IsZero() && time.Since(m.lastHealthCheck) < healthCheckTTL
}

// MarkHealthy records the current time as the last successful health check.
func (m *Model) MarkHealthy() {
	m.Lock()
	defer m.Unlock()
	m.lastHealthCheck = time.Now()
}

func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
	if m.client != nil {
		return m.client
	}

	enableWD := false
	if wd != nil {
		enableWD = true
	}

	m.Lock()
	defer m.Unlock()
	m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
	return m.client
}