mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 13:42:20 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
90 lines
3.9 KiB
Go
90 lines
3.9 KiB
Go
package nodes
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
)
|
|
|
|
// ModelRouter is used by SmartRouter for routing decisions and model lifecycle.
|
|
type ModelRouter interface {
|
|
FindAndLockNodeWithModel(ctx context.Context, modelName string) (*BackendNode, *NodeModel, error)
|
|
DecrementInFlight(ctx context.Context, nodeID, modelName string) error
|
|
IncrementInFlight(ctx context.Context, nodeID, modelName string) error
|
|
RemoveNodeModel(ctx context.Context, nodeID, modelName string) error
|
|
TouchNodeModel(ctx context.Context, nodeID, modelName string)
|
|
SetNodeModel(ctx context.Context, nodeID, modelName, state, address string, initialInFlight int) error
|
|
FindNodeWithVRAM(ctx context.Context, minBytes uint64) (*BackendNode, error)
|
|
FindIdleNode(ctx context.Context) (*BackendNode, error)
|
|
FindLeastLoadedNode(ctx context.Context) (*BackendNode, error)
|
|
FindGlobalLRUModelWithZeroInFlight(ctx context.Context) (*NodeModel, error)
|
|
FindLRUModel(ctx context.Context, nodeID string) (*NodeModel, error)
|
|
Get(ctx context.Context, nodeID string) (*BackendNode, error)
|
|
}
|
|
|
|
// NodeHealthStore is used by HealthMonitor for node status management.
|
|
type NodeHealthStore interface {
|
|
List(ctx context.Context) ([]BackendNode, error)
|
|
GetNodeModels(ctx context.Context, nodeID string) ([]NodeModel, error)
|
|
MarkOffline(ctx context.Context, nodeID string) error
|
|
MarkUnhealthy(ctx context.Context, nodeID string) error
|
|
MarkHealthy(ctx context.Context, nodeID string) error
|
|
Heartbeat(ctx context.Context, nodeID string, update *HeartbeatUpdate) error
|
|
FindStaleNodes(ctx context.Context, threshold time.Duration) ([]BackendNode, error)
|
|
RemoveNodeModel(ctx context.Context, nodeID, modelName string) error
|
|
}
|
|
|
|
// ModelLocator is used by RemoteUnloaderAdapter for model discovery.
|
|
type ModelLocator interface {
|
|
FindNodesWithModel(ctx context.Context, modelName string) ([]BackendNode, error)
|
|
RemoveNodeModel(ctx context.Context, nodeID, modelName string) error
|
|
}
|
|
|
|
// ModelLookup is used by DistributedModelStore for model existence queries.
|
|
type ModelLookup interface {
|
|
FindNodeForModel(ctx context.Context, modelName string) (*BackendNode, bool)
|
|
ListAllLoadedModels(ctx context.Context) ([]NodeModel, error)
|
|
Get(ctx context.Context, nodeID string) (*BackendNode, error)
|
|
}
|
|
|
|
// InFlightTracker is used by InFlightTrackingClient for request counting.
|
|
type InFlightTracker interface {
|
|
IncrementInFlight(ctx context.Context, nodeID, modelName string) error
|
|
DecrementInFlight(ctx context.Context, nodeID, modelName string) error
|
|
}
|
|
|
|
// NodeManager is used by HTTP endpoints for node registration and lifecycle.
|
|
type NodeManager interface {
|
|
Register(ctx context.Context, node *BackendNode, autoApprove bool) error
|
|
Get(ctx context.Context, nodeID string) (*BackendNode, error)
|
|
GetByName(ctx context.Context, name string) (*BackendNode, error)
|
|
List(ctx context.Context) ([]BackendNode, error)
|
|
Deregister(ctx context.Context, nodeID string) error
|
|
ApproveNode(ctx context.Context, nodeID string) error
|
|
MarkOffline(ctx context.Context, nodeID string) error
|
|
MarkDraining(ctx context.Context, nodeID string) error
|
|
Heartbeat(ctx context.Context, nodeID string, update *HeartbeatUpdate) error
|
|
GetNodeModels(ctx context.Context, nodeID string) ([]NodeModel, error)
|
|
UpdateAuthRefs(ctx context.Context, nodeID, authUserID, apiKeyID string) error
|
|
RemoveNodeModel(ctx context.Context, nodeID, modelName string) error
|
|
}
|
|
|
|
// BackendClientFactory creates gRPC backend clients.
|
|
type BackendClientFactory interface {
|
|
NewClient(address string, parallel bool) grpc.Backend
|
|
}
|
|
|
|
// tokenClientFactory is the default BackendClientFactory that creates gRPC
|
|
// clients with an optional bearer token for distributed auth.
|
|
type tokenClientFactory struct {
|
|
token string
|
|
}
|
|
|
|
func (f *tokenClientFactory) NewClient(address string, parallel bool) grpc.Backend {
|
|
if f.token != "" {
|
|
return grpc.NewClientWithToken(address, parallel, nil, false, f.token)
|
|
}
|
|
return grpc.NewClient(address, parallel, nil, false)
|
|
}
|