Compare commits

..

6 Commits

Author SHA1 Message Date
Ettore Di Giacinto
fbaae8528d fix(chat): re-generated uuid, created, and text on each request (#3359)
This was noticed by models returning content besides function calls.
Sadly we can't test that easily in the CI so it got unnoticed.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-08-22 10:56:05 +02:00
LocalAI [bot]
7d030b56b2 chore: ⬆️ Update ggerganov/whisper.cpp to 9e3c5345cd46ea718209db53464e426c3fe7a25e (#3357)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-08-22 08:49:33 +00:00
LocalAI [bot]
0add16049e chore: ⬆️ Update ggerganov/llama.cpp to fc54ef0d1c138133a01933296d50a36a1ab64735 (#3356)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-08-21 22:14:02 +00:00
Ettore Di Giacinto
2bb48b0816 fix(parler-tts): pin torchaudio and torch for hipblas
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-08-21 18:27:20 +02:00
Ettore Di Giacinto
023ce59d44 feat(p2p): allow to set intervals (#3353)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-08-21 18:23:51 +02:00
Ettore Di Giacinto
7822d944b5 chore(p2p): single-node when sharing federated instance (#3354)
* chore(p2p): single-node when sharing federated instance

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: refactor out and extract into functions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-08-21 18:23:42 +02:00
27 changed files with 1458 additions and 65 deletions

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -338,7 +338,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
prepare: prepare-sources gen-assets $(OPTIONAL_TARGETS)
prepare: prepare-sources $(OPTIONAL_TARGETS)
clean: ## Remove build related file
$(GOCMD) clean -cache

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
torch==2.3.0+rocm6.0
torchaudio==2.3.0+rocm6.0
transformers
accelerate
accelerate

80
core/cli/api/p2p.go Normal file
View File

@@ -0,0 +1,80 @@
package cli_api
import (
"context"
"fmt"
"net"
"os"
"strings"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/edgevpn/pkg/node"
"github.com/rs/zerolog/log"
)
func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
var n *node.Node
// Here we are avoiding creating multiple nodes:
// - if the federated mode is enabled, we create a federated node and expose a service
// - exposing a service creates a node with specific options, and we don't want to create another node
// If the federated mode is enabled, we expose a service to the local instance running
// at r.Address
if federated {
_, port, err := net.SplitHostPort(address)
if err != nil {
return err
}
// Here a new node is created and started
// and a service is exposed by the node
node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
return err
}
n = node
}
// If the p2p mode is enabled, we start the service discovery
if token != "" {
// If a node wasn't created previously, create it
if n == nil {
node, err := p2p.NewNode(token)
if err != nil {
return err
}
err = node.Start(ctx)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
n = node
}
// Attach a ServiceDiscoverer to the p2p node
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}, true); err != nil {
return err
}
}
return nil
}

View File

@@ -3,11 +3,10 @@ package cli
import (
"context"
"fmt"
"net"
"os"
"strings"
"time"
cli_api "github.com/mudler/LocalAI/core/cli/api"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http"
@@ -53,6 +52,8 @@ type RunCMD struct {
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
@@ -107,7 +108,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
// IF no token is provided, and p2p is enabled,
// we generate one and wait for the user to pick up the token (this is for interactive)
log.Info().Msg("No token provided, generating one")
token = p2p.GenerateToken()
token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
log.Info().Msg("Generated Token:")
fmt.Println(token)
@@ -115,52 +116,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
}
opts = append(opts, config.WithP2PToken(token))
node, err := p2p.NewNode(token)
if err != nil {
return err
}
nodeContext := context.Background()
err = node.Start(nodeContext)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}, true); err != nil {
return err
}
}
if r.Federated {
_, port, err := net.SplitHostPort(r.Address)
if err != nil {
return err
}
fedCtx := context.Background()
backgroundCtx := context.Background()
node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
return err
}
if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
return err
}
idleWatchDog := r.EnableWatchdogIdle

View File

@@ -25,9 +25,8 @@ import (
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
textContentToReturn := ""
id := uuid.New().String()
created := int(time.Now().Unix())
var id, textContentToReturn string
var created int
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
initialMessage := schema.OpenAIResponse{
@@ -159,6 +158,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
return func(c *fiber.Ctx) error {
textContentToReturn = ""
id = uuid.New().String()
created = int(time.Now().Unix())
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,9 @@
/*!
Theme: Default
Description: Original highlight.js style
Author: (c) Ivan Sagalaev <maniac@softwaremaniacs.org>
Maintainer: @highlightjs/core-team
Website: https://highlightjs.org/
License: see project LICENSE
Touched: 2021
*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

@@ -28,9 +28,15 @@ import (
"github.com/mudler/edgevpn/pkg/logger"
)
func generateNewConnectionData() *node.YAMLConnectionConfig {
func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
maxMessSize := 20 << 20 // 20MB
keyLength := 43
if DHTInterval == 0 {
DHTInterval = 360
}
if OTPInterval == 0 {
OTPInterval = 9000
}
return &node.YAMLConnectionConfig{
MaxMessageSize: maxMessSize,
@@ -40,21 +46,21 @@ func generateNewConnectionData() *node.YAMLConnectionConfig {
OTP: node.OTP{
DHT: node.OTPConfig{
Key: eutils.RandStringRunes(keyLength),
Interval: 120,
Interval: DHTInterval,
Length: keyLength,
},
Crypto: node.OTPConfig{
Key: eutils.RandStringRunes(keyLength),
Interval: 9000,
Interval: OTPInterval,
Length: keyLength,
},
},
}
}
func GenerateToken() string {
func GenerateToken(DHTInterval, OTPInterval int) string {
// Generates a new config and exit
return generateNewConnectionData().Base64()
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
}
func IsP2PEnabled() bool {

View File

@@ -10,7 +10,7 @@ import (
"github.com/mudler/edgevpn/pkg/node"
)
func GenerateToken() string {
func GenerateToken(DHTInterval, OTPInterval int) string {
return "not implemented"
}

View File

@@ -5,11 +5,11 @@
url: "https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
sha: "4499ff936d4fd562adca5a5cbe512dc19eb80942eee8618dafbcebc4f7974bdb"
- filename: "alpine.js"
url: "https://cdn.jsdelivr.net/npm/alpinejs@3.14.1/dist/cdn.min.js"
sha: "358d9afbb1ab5befa2f48061a30776e5bcd7707f410a606ba985f98bc3b1c034"
url: "https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
sha: "fb9b146b7fbd1bbf251fb3ef464f2e7c5d33a4a83aeb0fcf21e92ca6a9558c4b"
- filename: "marked.js"
url: "https://cdn.jsdelivr.net/npm/marked@14.0.0/lib/marked.umd.min.js"
sha: "0996c58f732096b6aed537916589c0786dd3332bf4612cc9c206bc44a031b13d"
url: "https://cdn.jsdelivr.net/npm/marked/marked.min.js"
sha: "15fabce5b65898b32b03f5ed25e9f891a729ad4c0d6d877110a7744aa847a894"
- filename: "purify.js"
url: "https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
sha: "ea4b09082ca4ba0ae71be6431a097678751d0453b9c52a4d2c7c39a2166ed9fc"