LocalAI/core/services/worker/registration.go

package worker

import (
	"cmp"
	"fmt"
	"os"
	"strconv"
	"strings"

	"github.com/mudler/LocalAI/pkg/xsysinfo"
)

// effectiveBasePort returns the port used as base for gRPC backend processes.
// Priority: Addr port → ServeAddr port → 50051
func (cfg *Config) effectiveBasePort() int {
	for _, addr := range []string{cfg.Addr, cfg.ServeAddr} {
		if addr != "" {
			if _, portStr, ok := strings.Cut(addr, ":"); ok {
				if p, _ := strconv.Atoi(portStr); p > 0 {
					return p
				}
			}
		}
	}
	return 50051
}

// advertiseAddr returns the address the frontend should use to reach this node.
func (cfg *Config) advertiseAddr() string {
	if cfg.AdvertiseAddr != "" {
		return cfg.AdvertiseAddr
	}
	if cfg.Addr != "" {
		return cfg.Addr
	}
	hostname, _ := os.Hostname()
	return fmt.Sprintf("%s:%d", cmp.Or(hostname, "localhost"), cfg.effectiveBasePort())
}

// resolveHTTPAddr returns the address to bind the HTTP file transfer server to.
// Uses basePort-1 so it doesn't conflict with dynamically allocated gRPC ports
// which grow upward from basePort.
func (cfg *Config) resolveHTTPAddr() string {
	if cfg.HTTPAddr != "" {
		return cfg.HTTPAddr
	}
	return fmt.Sprintf("0.0.0.0:%d", cfg.effectiveBasePort()-1)
}

// advertiseHTTPAddr returns the HTTP address the frontend should use to reach
// this node for file transfer.
func (cfg *Config) advertiseHTTPAddr() string {
	if cfg.AdvertiseHTTPAddr != "" {
		return cfg.AdvertiseHTTPAddr
	}
	advHost, _, _ := strings.Cut(cfg.advertiseAddr(), ":")
	httpPort := cfg.effectiveBasePort() - 1
	return fmt.Sprintf("%s:%d", advHost, httpPort)
}

// registrationBody builds the JSON body for node registration.
func (cfg *Config) registrationBody() map[string]any {
	nodeName := cfg.NodeName
	if nodeName == "" {
		hostname, err := os.Hostname()
		if err != nil {
			nodeName = fmt.Sprintf("node-%d", os.Getpid())
		} else {
			nodeName = hostname
		}
	}

	// Detect GPU info for VRAM-aware scheduling
	totalVRAM, _ := xsysinfo.TotalAvailableVRAM()
	gpuVendor, _ := xsysinfo.DetectGPUVendor()
	// Compute capability (e.g. "12.1" for GB10) lets the router pick per-arch
	// options (e.g. larger physical batch on Blackwell). Detected on the worker
	// because only the worker sees the GPU in distributed mode.
	gpuComputeCap := xsysinfo.NVIDIAComputeCapability()

	maxReplicas := cfg.MaxReplicasPerModel
	if maxReplicas < 1 {
		maxReplicas = 1
	}
	body := map[string]any{
		"name":                   nodeName,
		"address":                cfg.advertiseAddr(),
		"http_address":           cfg.advertiseHTTPAddr(),
		"total_vram":             totalVRAM,
		"available_vram":         totalVRAM, // initially all VRAM is available
		"gpu_vendor":             gpuVendor,
		"gpu_compute_capability": gpuComputeCap,
		"max_replicas_per_model": maxReplicas,
	}

	// If no GPU detected, report system RAM so the scheduler/UI has capacity info
	if totalVRAM == 0 {
		if ramInfo, err := xsysinfo.GetSystemRAMInfo(); err == nil {
			body["total_ram"] = ramInfo.Total
			body["available_ram"] = ramInfo.Available
		}
	}
	if cfg.RegistrationToken != "" {
		body["token"] = cfg.RegistrationToken
	}

	// Parse and add static node labels. Always include the auto-label
	// `node.replica-slots=N` so AND-selectors in ModelSchedulingConfig can
	// target high-capacity nodes (e.g. {"node.replica-slots":"4"}).
	labels := make(map[string]string)
	if cfg.NodeLabels != "" {
		for _, pair := range strings.Split(cfg.NodeLabels, ",") {
			pair = strings.TrimSpace(pair)
			if k, v, ok := strings.Cut(pair, "="); ok {
				labels[strings.TrimSpace(k)] = strings.TrimSpace(v)
			}
		}
	}
	labels["node.replica-slots"] = strconv.Itoa(maxReplicas)
	body["labels"] = labels

	return body
}

// heartbeatBody returns the current VRAM/RAM stats for heartbeat payloads.
//
// When aggregate VRAM usage is unknown (no GPU, or temporary detection
// failure), we deliberately OMIT available_vram so the frontend keeps its
// last good value — overwriting with 0 makes the UI show the node as "fully
// used", while reporting total-as-available lies to the scheduler about
// free capacity.
func (cfg *Config) heartbeatBody() map[string]any {
	body := map[string]any{}
	aggregate := xsysinfo.GetGPUAggregateInfo()
	if aggregate.TotalVRAM > 0 {
		body["available_vram"] = aggregate.FreeVRAM
	}

	// CPU-only workers (or workers that lost GPU visibility momentarily):
	// report system RAM so the scheduler still has capacity info.
	if aggregate.TotalVRAM == 0 {
		if ramInfo, err := xsysinfo.GetSystemRAMInfo(); err == nil {
			body["available_ram"] = ramInfo.Available
		}
	}
	return body
}