mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-15 04:08:55 -04:00
* feat(distributed): add SpreadAll column and authoritative scheduling seeding Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): parse declarative model scheduling config (env/file) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): reconcile spread_all to one replica per matching node Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): wire LOCALAI_MODEL_SCHEDULING env/args and startup seeding Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): expose spread_all on the scheduling API endpoint Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): add spread-to-all-nodes mode to the scheduling UI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs(distributed): document LOCALAI_MODEL_SCHEDULING env/args Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs(distributed): clarify replica modes and all-nodes spread in scheduling config Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
172 lines
6.0 KiB
Go
172 lines
6.0 KiB
Go
package nodes
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// ReplicasSpec parses the "replicas" convenience field used in the env/file
|
|
// scheduling config. It accepts the string "all" (or boolean true) to mean
|
|
// "spread one replica onto every matching node". The strings "" / "auto" and
|
|
// boolean false leave SpreadAll unset and defer to min_replicas/max_replicas.
|
|
// A numeric value is rejected with a hint pointing at min/max_replicas, which
|
|
// are the dedicated fields for fixed counts.
|
|
type ReplicasSpec struct {
|
|
SpreadAll bool
|
|
}
|
|
|
|
func (r *ReplicasSpec) set(v any) error {
|
|
switch t := v.(type) {
|
|
case nil:
|
|
r.SpreadAll = false
|
|
case bool:
|
|
r.SpreadAll = t
|
|
case string:
|
|
switch strings.ToLower(strings.TrimSpace(t)) {
|
|
case "all":
|
|
r.SpreadAll = true
|
|
case "", "auto":
|
|
r.SpreadAll = false
|
|
default:
|
|
return fmt.Errorf("invalid replicas value %q (expected \"all\" or \"auto\")", t)
|
|
}
|
|
default:
|
|
return fmt.Errorf("invalid replicas value %v (use min_replicas/max_replicas for a fixed count, or \"all\" to spread)", v)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// UnmarshalJSON implements json.Unmarshaler for the replicas alias.
|
|
func (r *ReplicasSpec) UnmarshalJSON(b []byte) error {
|
|
var v any
|
|
if err := json.Unmarshal(b, &v); err != nil {
|
|
return err
|
|
}
|
|
return r.set(v)
|
|
}
|
|
|
|
// UnmarshalYAML implements yaml.Unmarshaler for the replicas alias.
|
|
func (r *ReplicasSpec) UnmarshalYAML(value *yaml.Node) error {
|
|
var v any
|
|
if err := value.Decode(&v); err != nil {
|
|
return err
|
|
}
|
|
return r.set(v)
|
|
}
|
|
|
|
// SeedSchedulingEntry is one entry in the env/file scheduling config. It mirrors
|
|
// the API's SetSchedulingRequest shape, plus the "replicas" alias and the
|
|
// canonical "spread_all" boolean.
|
|
type SeedSchedulingEntry struct {
|
|
ModelName string `json:"model_name" yaml:"model_name"`
|
|
NodeSelector map[string]string `json:"node_selector,omitempty" yaml:"node_selector,omitempty"`
|
|
MinReplicas int `json:"min_replicas,omitempty" yaml:"min_replicas,omitempty"`
|
|
MaxReplicas int `json:"max_replicas,omitempty" yaml:"max_replicas,omitempty"`
|
|
Replicas *ReplicasSpec `json:"replicas,omitempty" yaml:"replicas,omitempty"`
|
|
SpreadAll bool `json:"spread_all,omitempty" yaml:"spread_all,omitempty"`
|
|
|
|
RoutePolicy string `json:"route_policy,omitempty" yaml:"route_policy,omitempty"`
|
|
BalanceAbsThreshold int `json:"balance_abs_threshold,omitempty" yaml:"balance_abs_threshold,omitempty"`
|
|
BalanceRelThreshold float64 `json:"balance_rel_threshold,omitempty" yaml:"balance_rel_threshold,omitempty"`
|
|
MinPrefixMatch float64 `json:"min_prefix_match,omitempty" yaml:"min_prefix_match,omitempty"`
|
|
}
|
|
|
|
// spread reports whether this entry requests spread-to-all-matching-nodes mode,
|
|
// via either the canonical spread_all field or the replicas alias.
|
|
func (e SeedSchedulingEntry) spread() bool {
|
|
return e.SpreadAll || (e.Replicas != nil && e.Replicas.SpreadAll)
|
|
}
|
|
|
|
// ValidateSeedEntry enforces the invariants of a single scheduling entry. It
|
|
// mirrors the API's validateSchedulingRequest, with the added rule that spread
|
|
// mode is mutually exclusive with explicit min/max replica counts.
|
|
func ValidateSeedEntry(e SeedSchedulingEntry) error {
|
|
if e.ModelName == "" {
|
|
return fmt.Errorf("model_name is required")
|
|
}
|
|
if e.MinReplicas < 0 {
|
|
return fmt.Errorf("min_replicas must be >= 0 (model %q)", e.ModelName)
|
|
}
|
|
if e.MaxReplicas < 0 {
|
|
return fmt.Errorf("max_replicas must be >= 0 (model %q)", e.ModelName)
|
|
}
|
|
if e.spread() && (e.MinReplicas != 0 || e.MaxReplicas != 0) {
|
|
return fmt.Errorf("spread (replicas: all) and min_replicas/max_replicas are mutually exclusive (model %q)", e.ModelName)
|
|
}
|
|
if e.MaxReplicas > 0 && e.MinReplicas > e.MaxReplicas {
|
|
return fmt.Errorf("min_replicas must be <= max_replicas (model %q)", e.ModelName)
|
|
}
|
|
if err := prefixcache.ValidateThresholds(e.RoutePolicy, e.BalanceAbsThreshold, e.BalanceRelThreshold, e.MinPrefixMatch); err != nil {
|
|
return fmt.Errorf("%w (model %q)", err, e.ModelName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e SeedSchedulingEntry) toConfig() (ModelSchedulingConfig, error) {
|
|
selectorJSON := ""
|
|
if len(e.NodeSelector) > 0 {
|
|
b, err := json.Marshal(e.NodeSelector)
|
|
if err != nil {
|
|
return ModelSchedulingConfig{}, fmt.Errorf("serializing node_selector for model %q: %w", e.ModelName, err)
|
|
}
|
|
selectorJSON = string(b)
|
|
}
|
|
return ModelSchedulingConfig{
|
|
ModelName: e.ModelName,
|
|
NodeSelector: selectorJSON,
|
|
MinReplicas: e.MinReplicas,
|
|
MaxReplicas: e.MaxReplicas,
|
|
SpreadAll: e.spread(),
|
|
RoutePolicy: e.RoutePolicy,
|
|
BalanceAbsThreshold: e.BalanceAbsThreshold,
|
|
BalanceRelThreshold: e.BalanceRelThreshold,
|
|
MinPrefixMatch: e.MinPrefixMatch,
|
|
}, nil
|
|
}
|
|
|
|
// ParseSchedulingSeed parses the inline-JSON and/or YAML-file scheduling config
|
|
// into validated ModelSchedulingConfig rows ready to upsert. Entries from both
|
|
// sources are concatenated (jsonStr first, then the file). Either argument may
|
|
// be empty.
|
|
func ParseSchedulingSeed(jsonStr, configPath string) ([]ModelSchedulingConfig, error) {
|
|
var entries []SeedSchedulingEntry
|
|
|
|
if strings.TrimSpace(jsonStr) != "" {
|
|
var fromJSON []SeedSchedulingEntry
|
|
if err := json.Unmarshal([]byte(jsonStr), &fromJSON); err != nil {
|
|
return nil, fmt.Errorf("parsing LOCALAI_MODEL_SCHEDULING JSON: %w", err)
|
|
}
|
|
entries = append(entries, fromJSON...)
|
|
}
|
|
|
|
if configPath != "" {
|
|
data, err := os.ReadFile(configPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading model scheduling config %q: %w", configPath, err)
|
|
}
|
|
var fromYAML []SeedSchedulingEntry
|
|
if err := yaml.Unmarshal(data, &fromYAML); err != nil {
|
|
return nil, fmt.Errorf("parsing model scheduling config %q: %w", configPath, err)
|
|
}
|
|
entries = append(entries, fromYAML...)
|
|
}
|
|
|
|
configs := make([]ModelSchedulingConfig, 0, len(entries))
|
|
for _, e := range entries {
|
|
if err := ValidateSeedEntry(e); err != nil {
|
|
return nil, err
|
|
}
|
|
cfg, err := e.toConfig()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
configs = append(configs, cfg)
|
|
}
|
|
return configs, nil
|
|
}
|