fix(chat): re-generated uuid, created, and text on each request (#3359 )

This was noticed by models returning content besides function calls. Sadly we can't test that easily in the CI so it got unnoticed. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore: ⬆️ Update ggerganov/whisper.cpp to 9e3c5345cd46ea718209db53464e426c3fe7a25e (#3357 )
2026-02-03 19:22:39 -05:00 · 2024-08-22 10:56:05 +02:00 · 2024-08-22 08:49:33 +00:00 · 2024-08-21 22:14:02 +00:00 · 2024-08-21 18:27:20 +02:00 · 2024-08-21 18:23:51 +02:00
27 changed files with 1458 additions and 65 deletions
--- a/6
+++ b/6
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
+CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
+WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -338,7 +338,7 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build

-prepare: prepare-sources gen-assets $(OPTIONAL_TARGETS)
+prepare: prepare-sources $(OPTIONAL_TARGETS)

 clean: ## Remove build related file
 	$(GOCMD) clean -cache
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.3.0+rocm6.0
+torchaudio==2.3.0+rocm6.0
 transformers
-accelerate
+accelerate
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@@ -0,0 +1,80 @@
+package cli_api
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/edgevpn/pkg/node"
+
+	"github.com/rs/zerolog/log"
+)
+
+func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
+	var n *node.Node
+	// Here we are avoiding creating multiple nodes:
+	// - if the federated mode is enabled, we create a federated node and expose a service
+	// - exposing a service creates a node with specific options, and we don't want to create another node
+
+	// If the federated mode is enabled, we expose a service to the local instance running
+	// at r.Address
+	if federated {
+		_, port, err := net.SplitHostPort(address)
+		if err != nil {
+			return err
+		}
+
+		// Here a new node is created and started
+		// and a service is exposed by the node
+		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
+
+		n = node
+	}
+
+	// If the p2p mode is enabled, we start the service discovery
+	if token != "" {
+		// If a node wasn't created previously, create it
+		if n == nil {
+			node, err := p2p.NewNode(token)
+			if err != nil {
+				return err
+			}
+			err = node.Start(ctx)
+			if err != nil {
+				return fmt.Errorf("starting new node: %w", err)
+			}
+			n = node
+		}
+
+		// Attach a ServiceDiscoverer to the p2p node
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
+			var tunnelAddresses []string
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
+				if v.IsOnline() {
+					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+				} else {
+					log.Info().Msgf("Node %s is offline", v.ID)
+				}
+			}
+			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
+
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}, true); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -3,11 +3,10 @@ package cli
 import (
 	"context"
 	"fmt"
-	"net"
-	"os"
 	"strings"
 	"time"

+	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
@@ -53,6 +52,8 @@ type RunCMD struct {
 	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
+	Peer2PeerDHTInterval   int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
+	Peer2PeerOTPInterval   int      `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
 	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	Peer2PeerNetworkID     string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
@@ -107,7 +108,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			// IF no token is provided, and p2p is enabled,
 			// we generate one and wait for the user to pick up the token (this is for interactive)
 			log.Info().Msg("No token provided, generating one")
-			token = p2p.GenerateToken()
+			token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
 			log.Info().Msg("Generated Token:")
 			fmt.Println(token)

@@ -115,52 +116,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
 		}
 		opts = append(opts, config.WithP2PToken(token))
-
-		node, err := p2p.NewNode(token)
-		if err != nil {
-			return err
-		}
-		nodeContext := context.Background()
-
-		err = node.Start(nodeContext)
-		if err != nil {
-			return fmt.Errorf("starting new node: %w", err)
-		}
-
-		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
-			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
-				if v.IsOnline() {
-					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-				} else {
-					log.Info().Msgf("Node %s is offline", v.ID)
-				}
-			}
-			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
-
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}, true); err != nil {
-			return err
-		}
 	}

-	if r.Federated {
-		_, port, err := net.SplitHostPort(r.Address)
-		if err != nil {
-			return err
-		}
-		fedCtx := context.Background()
+	backgroundCtx := context.Background()

-		node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
-
-		if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
+	if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
+		return err
 	}

 	idleWatchDog := r.EnableWatchdogIdle
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -25,9 +25,8 @@ import (
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
 func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	textContentToReturn := ""
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
+	var id, textContentToReturn string
+	var created int

 	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
 		initialMessage := schema.OpenAIResponse{
@@ -159,6 +158,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 	}

 	return func(c *fiber.Ctx) error {
+		textContentToReturn = ""
+		id = uuid.New().String()
+		created = int(time.Now().Unix())
+
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmEU9fBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmEU9fBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmSU5fBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmSU5fBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmWUlfBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmWUlfBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmYUtfBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmYUtfBBc9.ttf
--- a/core/http/static/assets/KFOmCnqEu92Fr1Me5Q.ttf
+++ b/core/http/static/assets/KFOmCnqEu92Fr1Me5Q.ttf
--- a/core/http/static/assets/KFOmCnqEu92Fr1Mu4mxP.ttf
+++ b/core/http/static/assets/KFOmCnqEu92Fr1Mu4mxP.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf
--- a/core/http/static/assets/alpine.js
+++ b/core/http/static/assets/alpine.js
--- a/core/http/static/assets/highlightjs.css
+++ b/core/http/static/assets/highlightjs.css
@@ -0,0 +1,9 @@
+/*!
+  Theme: Default
+  Description: Original highlight.js style
+  Author: (c) Ivan Sagalaev <maniac@softwaremaniacs.org>
+  Maintainer: @highlightjs/core-team
+  Website: https://highlightjs.org/
+  License: see project LICENSE
+  Touched: 2021
+*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}
--- a/core/http/static/assets/highlightjs.js
+++ b/core/http/static/assets/highlightjs.js
--- a/core/http/static/assets/htmx.js
+++ b/core/http/static/assets/htmx.js
--- a/core/http/static/assets/marked.js
+++ b/core/http/static/assets/marked.js
--- a/core/http/static/assets/purify.js
+++ b/core/http/static/assets/purify.js
--- a/core/http/static/assets/tailwindcss.js
+++ b/core/http/static/assets/tailwindcss.js
--- a/core/http/static/assets/tw-elements.css
+++ b/core/http/static/assets/tw-elements.css
--- a/core/http/static/assets/tw-elements.js
+++ b/core/http/static/assets/tw-elements.js
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -28,9 +28,15 @@ import (
 	"github.com/mudler/edgevpn/pkg/logger"
 )

-func generateNewConnectionData() *node.YAMLConnectionConfig {
+func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
 	maxMessSize := 20 << 20 // 20MB
 	keyLength := 43
+	if DHTInterval == 0 {
+		DHTInterval = 360
+	}
+	if OTPInterval == 0 {
+		OTPInterval = 9000
+	}

 	return &node.YAMLConnectionConfig{
 		MaxMessageSize: maxMessSize,
@@ -40,21 +46,21 @@ func generateNewConnectionData() *node.YAMLConnectionConfig {
 		OTP: node.OTP{
 			DHT: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: 120,
+				Interval: DHTInterval,
 				Length:   keyLength,
 			},
 			Crypto: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: 9000,
+				Interval: OTPInterval,
 				Length:   keyLength,
 			},
 		},
 	}
 }

-func GenerateToken() string {
+func GenerateToken(DHTInterval, OTPInterval int) string {
 	// Generates a new config and exit
-	return generateNewConnectionData().Base64()
+	return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
 }

 func IsP2PEnabled() bool {
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -10,7 +10,7 @@ import (
 	"github.com/mudler/edgevpn/pkg/node"
 )

-func GenerateToken() string {
+func GenerateToken(DHTInterval, OTPInterval int) string {
 	return "not implemented"
 }

--- a/embedded/webui_static.yaml
+++ b/embedded/webui_static.yaml
@@ -5,11 +5,11 @@
  url: "https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
  sha: "4499ff936d4fd562adca5a5cbe512dc19eb80942eee8618dafbcebc4f7974bdb"
 - filename: "alpine.js"
-  url: "https://cdn.jsdelivr.net/npm/alpinejs@3.14.1/dist/cdn.min.js"
-  sha: "358d9afbb1ab5befa2f48061a30776e5bcd7707f410a606ba985f98bc3b1c034"
+  url: "https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
+  sha: "fb9b146b7fbd1bbf251fb3ef464f2e7c5d33a4a83aeb0fcf21e92ca6a9558c4b"
 - filename: "marked.js"
-  url: "https://cdn.jsdelivr.net/npm/marked@14.0.0/lib/marked.umd.min.js"
-  sha: "0996c58f732096b6aed537916589c0786dd3332bf4612cc9c206bc44a031b13d"
+  url: "https://cdn.jsdelivr.net/npm/marked/marked.min.js"
+  sha: "15fabce5b65898b32b03f5ed25e9f891a729ad4c0d6d877110a7744aa847a894"
 - filename: "purify.js"
  url: "https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
  sha: "ea4b09082ca4ba0ae71be6431a097678751d0453b9c52a4d2c7c39a2166ed9fc"
Author	SHA1	Message	Date
Ettore Di Giacinto	fbaae8528d	fix(chat): re-generated uuid, created, and text on each request (#3359 ) This was noticed by models returning content besides function calls. Sadly we can't test that easily in the CI so it got unnoticed. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-08-22 10:56:05 +02:00
LocalAI [bot]	7d030b56b2	chore: ⬆️ Update ggerganov/whisper.cpp to `9e3c5345cd46ea718209db53464e426c3fe7a25e` (#3357 ) ⬆️ Update ggerganov/whisper.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-08-22 08:49:33 +00:00
LocalAI [bot]	0add16049e	chore: ⬆️ Update ggerganov/llama.cpp to `fc54ef0d1c138133a01933296d50a36a1ab64735` (#3356 ) ⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-08-21 22:14:02 +00:00
Ettore Di Giacinto	2bb48b0816	fix(parler-tts): pin torchaudio and torch for hipblas Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-08-21 18:27:20 +02:00
Ettore Di Giacinto	023ce59d44	feat(p2p): allow to set intervals (#3353 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-08-21 18:23:51 +02:00
Ettore Di Giacinto	7822d944b5	chore(p2p): single-node when sharing federated instance (#3354 ) * chore(p2p): single-node when sharing federated instance Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: refactor out and extract into functions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-08-21 18:23:42 +02:00