package model import ( "sync" "time" grpc "github.com/mudler/LocalAI/pkg/grpc" process "github.com/mudler/go-processmanager" ) // healthCheckTTL is the duration for which a successful health check is cached. // Subsequent checkIsLoaded calls within this window skip the gRPC round-trip, // avoiding serialization of concurrent requests behind ml.mu.Lock(). const healthCheckTTL = 30 * time.Second type Model struct { ID string `json:"id"` address string client grpc.Backend process *process.Process lastHealthCheck time.Time // nodeID is the ID of the distributed-mode worker node that owns this // model handle, when set. Empty for in-process models. Best-effort: // because the distributed LoadModel path overwrites the per-modelID // store entry on every routing decision, this value reflects the // most-recently-routed node for the model, not necessarily the node // that served a specific in-flight request. Used by the optional // X-LocalAI-Node response header (--expose-node-header). nodeID string sync.Mutex } func NewModel(ID, address string, process *process.Process) *Model { return &Model{ ID: ID, address: address, process: process, } } // NewModelWithClient creates a Model with a pre-configured gRPC client. // Used in distributed mode where the client is wrapped with file staging. func NewModelWithClient(ID, address string, client grpc.Backend) *Model { return &Model{ ID: ID, address: address, client: client, } } // SetNodeID records the distributed-mode worker node that owns this model // handle. Safe to call from any goroutine. func (m *Model) SetNodeID(id string) { m.Lock() defer m.Unlock() m.nodeID = id } // NodeID returns the distributed-mode worker node ID associated with this // model handle, or "" if unknown / in-process. See the nodeID field comment // for the best-effort caveat. func (m *Model) NodeID() string { m.Lock() defer m.Unlock() return m.nodeID } func (m *Model) Process() *process.Process { return m.process } // IsRecentlyHealthy returns true if the model passed a health check within the TTL. func (m *Model) IsRecentlyHealthy() bool { m.Lock() defer m.Unlock() return !m.lastHealthCheck.IsZero() && time.Since(m.lastHealthCheck) < healthCheckTTL } // MarkHealthy records the current time as the last successful health check. func (m *Model) MarkHealthy() { m.Lock() defer m.Unlock() m.lastHealthCheck = time.Now() } func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend { if m.client != nil { return m.client } enableWD := false if wd != nil { enableWD = true } m.Lock() defer m.Unlock() m.client = grpc.NewClient(m.address, parallel, wd, enableWD) return m.client }