Files
LocalAI/pkg/model/model.go
Ettore Di Giacinto 59108fbe32 feat: add distributed mode (#9124)
* feat: add distributed mode (experimental)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix data races, mutexes, transactions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactorings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix events and tool stream in agent chat

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* use ginkgo

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(cron): compute correctly time boundaries avoiding re-triggering

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* enhancements, refactorings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* do not flood of healthy checks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* do not list obvious backends as text backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* tests fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring and consolidation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop redundant healthcheck

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* enhancements, refactorings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-30 00:47:27 +02:00

76 lines
1.8 KiB
Go

package model
import (
"sync"
"time"
grpc "github.com/mudler/LocalAI/pkg/grpc"
process "github.com/mudler/go-processmanager"
)
// healthCheckTTL is the duration for which a successful health check is cached.
// Subsequent checkIsLoaded calls within this window skip the gRPC round-trip,
// avoiding serialization of concurrent requests behind ml.mu.Lock().
const healthCheckTTL = 30 * time.Second
type Model struct {
ID string `json:"id"`
address string
client grpc.Backend
process *process.Process
lastHealthCheck time.Time
sync.Mutex
}
func NewModel(ID, address string, process *process.Process) *Model {
return &Model{
ID: ID,
address: address,
process: process,
}
}
// NewModelWithClient creates a Model with a pre-configured gRPC client.
// Used in distributed mode where the client is wrapped with file staging.
func NewModelWithClient(ID, address string, client grpc.Backend) *Model {
return &Model{
ID: ID,
address: address,
client: client,
}
}
func (m *Model) Process() *process.Process {
return m.process
}
// IsRecentlyHealthy returns true if the model passed a health check within the TTL.
func (m *Model) IsRecentlyHealthy() bool {
m.Lock()
defer m.Unlock()
return !m.lastHealthCheck.IsZero() && time.Since(m.lastHealthCheck) < healthCheckTTL
}
// MarkHealthy records the current time as the last successful health check.
func (m *Model) MarkHealthy() {
m.Lock()
defer m.Unlock()
m.lastHealthCheck = time.Now()
}
func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
if m.client != nil {
return m.client
}
enableWD := false
if wd != nil {
enableWD = true
}
m.Lock()
defer m.Unlock()
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
return m.client
}