mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 19:22:39 -05:00
Compare commits
6 Commits
cleanup_de
...
v2.20.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbaae8528d | ||
|
|
7d030b56b2 | ||
|
|
0add16049e | ||
|
|
2bb48b0816 | ||
|
|
023ce59d44 | ||
|
|
7822d944b5 |
4
Makefile
4
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
|
||||
CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735
|
||||
|
||||
# go-rwkv version
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
|
||||
WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.3.0+rocm6.0
|
||||
torchaudio==2.3.0+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
accelerate
|
||||
|
||||
80
core/cli/api/p2p.go
Normal file
80
core/cli/api/p2p.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package cli_api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/edgevpn/pkg/node"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
|
||||
var n *node.Node
|
||||
// Here we are avoiding creating multiple nodes:
|
||||
// - if the federated mode is enabled, we create a federated node and expose a service
|
||||
// - exposing a service creates a node with specific options, and we don't want to create another node
|
||||
|
||||
// If the federated mode is enabled, we expose a service to the local instance running
|
||||
// at r.Address
|
||||
if federated {
|
||||
_, port, err := net.SplitHostPort(address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Here a new node is created and started
|
||||
// and a service is exposed by the node
|
||||
node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n = node
|
||||
}
|
||||
|
||||
// If the p2p mode is enabled, we start the service discovery
|
||||
if token != "" {
|
||||
// If a node wasn't created previously, create it
|
||||
if n == nil {
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = node.Start(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("starting new node: %w", err)
|
||||
}
|
||||
n = node
|
||||
}
|
||||
|
||||
// Attach a ServiceDiscoverer to the p2p node
|
||||
log.Info().Msg("Starting P2P server discovery...")
|
||||
if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
|
||||
var tunnelAddresses []string
|
||||
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
|
||||
if v.IsOnline() {
|
||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||
} else {
|
||||
log.Info().Msgf("Node %s is offline", v.ID)
|
||||
}
|
||||
}
|
||||
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
||||
|
||||
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
||||
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
||||
}, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -3,11 +3,10 @@ package cli
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
@@ -53,6 +52,8 @@ type RunCMD struct {
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
||||
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
@@ -107,7 +108,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
// IF no token is provided, and p2p is enabled,
|
||||
// we generate one and wait for the user to pick up the token (this is for interactive)
|
||||
log.Info().Msg("No token provided, generating one")
|
||||
token = p2p.GenerateToken()
|
||||
token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
|
||||
log.Info().Msg("Generated Token:")
|
||||
fmt.Println(token)
|
||||
|
||||
@@ -115,52 +116,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
||||
}
|
||||
opts = append(opts, config.WithP2PToken(token))
|
||||
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
nodeContext := context.Background()
|
||||
|
||||
err = node.Start(nodeContext)
|
||||
if err != nil {
|
||||
return fmt.Errorf("starting new node: %w", err)
|
||||
}
|
||||
|
||||
log.Info().Msg("Starting P2P server discovery...")
|
||||
if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
|
||||
var tunnelAddresses []string
|
||||
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
|
||||
if v.IsOnline() {
|
||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||
} else {
|
||||
log.Info().Msgf("Node %s is offline", v.ID)
|
||||
}
|
||||
}
|
||||
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
||||
|
||||
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
||||
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
||||
}, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.Federated {
|
||||
_, port, err := net.SplitHostPort(r.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fedCtx := context.Background()
|
||||
backgroundCtx := context.Background()
|
||||
|
||||
node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
idleWatchDog := r.EnableWatchdogIdle
|
||||
|
||||
@@ -25,9 +25,8 @@ import (
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/chat/completions [post]
|
||||
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
textContentToReturn := ""
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
var id, textContentToReturn string
|
||||
var created int
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
@@ -159,6 +158,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
textContentToReturn = ""
|
||||
id = uuid.New().String()
|
||||
created = int(time.Now().Unix())
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
|
||||
@@ -28,9 +28,15 @@ import (
|
||||
"github.com/mudler/edgevpn/pkg/logger"
|
||||
)
|
||||
|
||||
func generateNewConnectionData() *node.YAMLConnectionConfig {
|
||||
func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
|
||||
maxMessSize := 20 << 20 // 20MB
|
||||
keyLength := 43
|
||||
if DHTInterval == 0 {
|
||||
DHTInterval = 360
|
||||
}
|
||||
if OTPInterval == 0 {
|
||||
OTPInterval = 9000
|
||||
}
|
||||
|
||||
return &node.YAMLConnectionConfig{
|
||||
MaxMessageSize: maxMessSize,
|
||||
@@ -40,21 +46,21 @@ func generateNewConnectionData() *node.YAMLConnectionConfig {
|
||||
OTP: node.OTP{
|
||||
DHT: node.OTPConfig{
|
||||
Key: eutils.RandStringRunes(keyLength),
|
||||
Interval: 120,
|
||||
Interval: DHTInterval,
|
||||
Length: keyLength,
|
||||
},
|
||||
Crypto: node.OTPConfig{
|
||||
Key: eutils.RandStringRunes(keyLength),
|
||||
Interval: 9000,
|
||||
Interval: OTPInterval,
|
||||
Length: keyLength,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GenerateToken() string {
|
||||
func GenerateToken(DHTInterval, OTPInterval int) string {
|
||||
// Generates a new config and exit
|
||||
return generateNewConnectionData().Base64()
|
||||
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
|
||||
}
|
||||
|
||||
func IsP2PEnabled() bool {
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"github.com/mudler/edgevpn/pkg/node"
|
||||
)
|
||||
|
||||
func GenerateToken() string {
|
||||
func GenerateToken(DHTInterval, OTPInterval int) string {
|
||||
return "not implemented"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user