Merge branch 'master' into cleanup_deps

Drop also ttf files
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-04 19:52:36 -05:00 · 2024-08-21 13:10:46 +02:00 · 2024-08-21 13:03:26 +02:00 · 2024-08-21 13:02:19 +02:00 · 2024-08-21 13:02:19 +02:00 · 2024-08-21 13:02:19 +02:00
47 changed files with 82 additions and 1627 deletions
--- a/6
+++ b/6
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735
+CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e
+WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -338,7 +338,7 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build

-prepare: prepare-sources $(OPTIONAL_TARGETS)
+prepare: prepare-sources gen-assets $(OPTIONAL_TARGETS)

 clean: ## Remove build related file
 	$(GOCMD) clean -cache
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.66.0
+grpcio==1.65.4
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
 TTS==0.22.0
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.66.0
+grpcio==1.65.4
 pillow
 protobuf
 certifi
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
 setuptools
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.4
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 librosa
 faster-whisper
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.3.0+rocm6.0
-torchaudio==2.3.0+rocm6.0
+torch
+torchaudio
 transformers
-accelerate
+accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
 llvmlite==0.43.0
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.65.4
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.65.5
 protobuf
 certifi
 setuptools
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@@ -1,80 +0,0 @@
-package cli_api
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"os"
-	"strings"
-
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/edgevpn/pkg/node"
-
-	"github.com/rs/zerolog/log"
-)
-
-func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
-	var n *node.Node
-	// Here we are avoiding creating multiple nodes:
-	// - if the federated mode is enabled, we create a federated node and expose a service
-	// - exposing a service creates a node with specific options, and we don't want to create another node
-
-	// If the federated mode is enabled, we expose a service to the local instance running
-	// at r.Address
-	if federated {
-		_, port, err := net.SplitHostPort(address)
-		if err != nil {
-			return err
-		}
-
-		// Here a new node is created and started
-		// and a service is exposed by the node
-		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
-
-		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
-
-		n = node
-	}
-
-	// If the p2p mode is enabled, we start the service discovery
-	if token != "" {
-		// If a node wasn't created previously, create it
-		if n == nil {
-			node, err := p2p.NewNode(token)
-			if err != nil {
-				return err
-			}
-			err = node.Start(ctx)
-			if err != nil {
-				return fmt.Errorf("starting new node: %w", err)
-			}
-			n = node
-		}
-
-		// Attach a ServiceDiscoverer to the p2p node
-		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
-			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
-				if v.IsOnline() {
-					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-				} else {
-					log.Info().Msgf("Node %s is offline", v.ID)
-				}
-			}
-			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
-
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}, true); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -3,10 +3,11 @@ package cli
 import (
 	"context"
 	"fmt"
+	"net"
+	"os"
 	"strings"
 	"time"

-	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
@@ -52,8 +53,6 @@ type RunCMD struct {
 	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
-	Peer2PeerDHTInterval   int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
-	Peer2PeerOTPInterval   int      `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
 	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	Peer2PeerNetworkID     string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
@@ -108,7 +107,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			// IF no token is provided, and p2p is enabled,
 			// we generate one and wait for the user to pick up the token (this is for interactive)
 			log.Info().Msg("No token provided, generating one")
-			token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
+			token = p2p.GenerateToken()
 			log.Info().Msg("Generated Token:")
 			fmt.Println(token)

@@ -116,12 +115,52 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
 		}
 		opts = append(opts, config.WithP2PToken(token))
+
+		node, err := p2p.NewNode(token)
+		if err != nil {
+			return err
+		}
+		nodeContext := context.Background()
+
+		err = node.Start(nodeContext)
+		if err != nil {
+			return fmt.Errorf("starting new node: %w", err)
+		}
+
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
+			var tunnelAddresses []string
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
+				if v.IsOnline() {
+					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+				} else {
+					log.Info().Msgf("Node %s is offline", v.ID)
+				}
+			}
+			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
+
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}, true); err != nil {
+			return err
+		}
 	}

-	backgroundCtx := context.Background()
+	if r.Federated {
+		_, port, err := net.SplitHostPort(r.Address)
+		if err != nil {
+			return err
+		}
+		fedCtx := context.Background()

-	if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
-		return err
+		node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
 	}

 	idleWatchDog := r.EnableWatchdogIdle
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -25,8 +25,9 @@ import (
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
 func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	var id, textContentToReturn string
-	var created int
+	textContentToReturn := ""
+	id := uuid.New().String()
+	created := int(time.Now().Unix())

 	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
 		initialMessage := schema.OpenAIResponse{
@@ -158,10 +159,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 	}

 	return func(c *fiber.Ctx) error {
-		textContentToReturn = ""
-		id = uuid.New().String()
-		created = int(time.Now().Unix())
-
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmEU9fBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmEU9fBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmSU5fBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmSU5fBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmWUlfBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmWUlfBBc9.ttf
--- a/core/http/static/assets/KFOlCnqEu92Fr1MmYUtfBBc9.ttf
+++ b/core/http/static/assets/KFOlCnqEu92Fr1MmYUtfBBc9.ttf
--- a/core/http/static/assets/KFOmCnqEu92Fr1Me5Q.ttf
+++ b/core/http/static/assets/KFOmCnqEu92Fr1Me5Q.ttf
--- a/core/http/static/assets/KFOmCnqEu92Fr1Mu4mxP.ttf
+++ b/core/http/static/assets/KFOmCnqEu92Fr1Mu4mxP.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf
--- a/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf
+++ b/core/http/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf
--- a/core/http/static/assets/alpine.js
+++ b/core/http/static/assets/alpine.js
--- a/core/http/static/assets/highlightjs.css
+++ b/core/http/static/assets/highlightjs.css
@@ -1,9 +0,0 @@
-/*!
-  Theme: Default
-  Description: Original highlight.js style
-  Author: (c) Ivan Sagalaev <maniac@softwaremaniacs.org>
-  Maintainer: @highlightjs/core-team
-  Website: https://highlightjs.org/
-  License: see project LICENSE
-  Touched: 2021
-*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}
--- a/core/http/static/assets/highlightjs.js
+++ b/core/http/static/assets/highlightjs.js
--- a/core/http/static/assets/htmx.js
+++ b/core/http/static/assets/htmx.js
--- a/core/http/static/assets/marked.js
+++ b/core/http/static/assets/marked.js
--- a/core/http/static/assets/purify.js
+++ b/core/http/static/assets/purify.js
--- a/core/http/static/assets/tailwindcss.js
+++ b/core/http/static/assets/tailwindcss.js
--- a/core/http/static/assets/tw-elements.css
+++ b/core/http/static/assets/tw-elements.css
--- a/core/http/static/assets/tw-elements.js
+++ b/core/http/static/assets/tw-elements.js
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -28,15 +28,9 @@ import (
 	"github.com/mudler/edgevpn/pkg/logger"
 )

-func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
+func generateNewConnectionData() *node.YAMLConnectionConfig {
 	maxMessSize := 20 << 20 // 20MB
 	keyLength := 43
-	if DHTInterval == 0 {
-		DHTInterval = 360
-	}
-	if OTPInterval == 0 {
-		OTPInterval = 9000
-	}

 	return &node.YAMLConnectionConfig{
 		MaxMessageSize: maxMessSize,
@@ -46,21 +40,21 @@ func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectio
 		OTP: node.OTP{
 			DHT: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: DHTInterval,
+				Interval: 120,
 				Length:   keyLength,
 			},
 			Crypto: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: OTPInterval,
+				Interval: 9000,
 				Length:   keyLength,
 			},
 		},
 	}
 }

-func GenerateToken(DHTInterval, OTPInterval int) string {
+func GenerateToken() string {
 	// Generates a new config and exit
-	return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
+	return generateNewConnectionData().Base64()
 }

 func IsP2PEnabled() bool {
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -10,7 +10,7 @@ import (
 	"github.com/mudler/edgevpn/pkg/node"
 )

-func GenerateToken(DHTInterval, OTPInterval int) string {
+func GenerateToken() string {
 	return "not implemented"
 }

--- a/embedded/webui_static.yaml
+++ b/embedded/webui_static.yaml
@@ -5,11 +5,11 @@
  url: "https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
  sha: "4499ff936d4fd562adca5a5cbe512dc19eb80942eee8618dafbcebc4f7974bdb"
 - filename: "alpine.js"
-  url: "https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
-  sha: "fb9b146b7fbd1bbf251fb3ef464f2e7c5d33a4a83aeb0fcf21e92ca6a9558c4b"
+  url: "https://cdn.jsdelivr.net/npm/alpinejs@3.14.1/dist/cdn.min.js"
+  sha: "358d9afbb1ab5befa2f48061a30776e5bcd7707f410a606ba985f98bc3b1c034"
 - filename: "marked.js"
-  url: "https://cdn.jsdelivr.net/npm/marked/marked.min.js"
-  sha: "15fabce5b65898b32b03f5ed25e9f891a729ad4c0d6d877110a7744aa847a894"
+  url: "https://cdn.jsdelivr.net/npm/marked@14.0.0/lib/marked.umd.min.js"
+  sha: "0996c58f732096b6aed537916589c0786dd3332bf4612cc9c206bc44a031b13d"
 - filename: "purify.js"
  url: "https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
  sha: "ea4b09082ca4ba0ae71be6431a097678751d0453b9c52a4d2c7c39a2166ed9fc"
--- a/gallery/hermes-vllm.yaml
+++ b/gallery/hermes-vllm.yaml
@@ -1,91 +0,0 @@
---
-name: "hermes-vllm"
-
-config_file: |
-    backend: vllm
-    context_size: 8192
-    stopwords:
-    - "<|im_end|>"
-    - "<dummy32000>"
-    - "<|eot_id|>"
-    - "<|end_of_text|>"
-    function:
-      disable_no_action: true
-      grammar:
-        # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
-        # the output. This will make the model more accurate and won't break the JSON output.
-        # This however, will make parallel_calls not functional (it is a known bug)
-        # mixed_mode: true
-        disable: true
-        parallel_calls: true
-        expect_strings_after_json: true
-      json_regex_match:
-      - "(?s)<tool_call>(.*?)</tool_call>"
-      - "(?s)<tool_call>(.*)"
-      capture_llm_results:
-        - (?s)<scratchpad>(.*?)</scratchpad>
-      replace_llm_results:
-        - key: (?s)<scratchpad>(.*?)</scratchpad>
-          value: ""
-
-    template:
-      use_tokenizer_template: true
-      chat: |
-        {{.Input -}}
-        <|im_start|>assistant
-      chat_message: |
-        <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-        {{- if .FunctionCall }}
-        <tool_call>
-        {{- else if eq .RoleName "tool" }}
-        <tool_response>
-        {{- end }}
-        {{- if .Content}}
-        {{.Content }}
-        {{- end }}
-        {{- if .FunctionCall}}
-        {{toJson .FunctionCall}}
-        {{- end }}
-        {{- if .FunctionCall }}
-        </tool_call>
-        {{- else if eq .RoleName "tool" }}
-        </tool_response>
-        {{- end }}<|im_end|>
-      completion: |
-        {{.Input}}
-      function: |
-        <|im_start|>system
-        You are a function calling AI model.
-        Here are the available tools:
-        <tools>
-        {{range .Functions}}
-        {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-        {{end}}
-        </tools>
-        You should call the tools provided to you sequentially
-        Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
-        <scratchpad>
-        {step-by-step reasoning and plan in bullet points}
-        </scratchpad>
-        For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
-        <tool_call>
-        {"arguments": <args-dict>, "name": <function-name>}
-        </tool_call><|im_end|>
-        {{.Input -}}
-        <|im_start|>assistant
-# Uncomment to specify a quantization method (optional)
-# quantization: "awq"
-# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
-# gpu_memory_utilization: 0.5
-# Uncomment to trust remote code from huggingface
-# trust_remote_code: true
-# Uncomment to enable eager execution
-# enforce_eager: true
-# Uncomment to specify the size of the CPU swap space per GPU (in GiB)
-# swap_space: 2
-# Uncomment to specify the maximum length of a sequence (including prompt and output)
-# max_model_len: 32768
-# Uncomment and specify the number of Tensor divisions.
-# Allows you to partition and run large models. Performance gains are limited.
-# https://github.com/vllm-project/vllm/issues/1435
-# tensor_parallel_size: 2
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4752,38 +4752,6 @@
    - filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
      sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f
      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
- &hermes-vllm
-  url: "github:mudler/LocalAI/gallery/hermes-vllm.yaml@master"
-  name: "hermes-3-llama-3.1-8b:vllm"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
-  tags:
-    - llm
-    - vllm
-    - gpu
-    - function-calling
-  license: llama-3
-  urls:
-    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
-  description: |
-    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
-  overrides:
-    parameters:
-      model: NousResearch/Hermes-3-Llama-3.1-8B
- !!merge <<: *hermes-vllm
-  name: "hermes-3-llama-3.1-70b:vllm"
-  urls:
-    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
-  overrides:
-    parameters:
-      model: NousResearch/Hermes-3-Llama-3.1-70B
- !!merge <<: *hermes-vllm
-  name: "hermes-3-llama-3.1-405b:vllm"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-kj_KflXsdpcZoTQsvx7W.jpeg
-  urls:
-    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B
-  overrides:
-    parameters:
-      model: NousResearch/Hermes-3-Llama-3.1-405B
 - !!merge <<: *hermes-2-pro-mistral
  name: "biomistral-7b"
  description: |
--- a/gallery/vllm.yaml
+++ b/gallery/vllm.yaml
@@ -1,29 +0,0 @@
---
-name: "vllm"
-
-config_file: |
-    backend: vllm
-    function:
-      disable_no_action: true
-      grammar:
-        disable: true
-        parallel_calls: true
-        expect_strings_after_json: true
-    template:
-      use_tokenizer_template: true
-    # Uncomment to specify a quantization method (optional)
-    # quantization: "awq"
-    # Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
-    # gpu_memory_utilization: 0.5
-    # Uncomment to trust remote code from huggingface
-    # trust_remote_code: true
-    # Uncomment to enable eager execution
-    # enforce_eager: true
-    # Uncomment to specify the size of the CPU swap space per GPU (in GiB)
-    # swap_space: 2
-    # Uncomment to specify the maximum length of a sequence (including prompt and output)
-    # max_model_len: 32768
-    # Uncomment and specify the number of Tensor divisions.
-    # Allows you to partition and run large models. Performance gains are limited.
-    # https://github.com/vllm-project/vllm/issues/1435
-    # tensor_parallel_size: 2
Author	SHA1	Message	Date
Ettore Di Giacinto	2a03905920	Merge branch 'master' into cleanup_deps	2024-08-21 13:10:46 +02:00
Ettore Di Giacinto	35297ebc14	Drop also ttf files Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-08-21 13:03:26 +02:00
Ettore Di Giacinto	b303805df9	fix marked	2024-08-21 13:02:19 +02:00
Ettore Di Giacinto	32d51797d9	fix alpine.js	2024-08-21 13:02:19 +02:00
Ettore Di Giacinto	af09b019ed	fix(assets): generate assets on build time Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-08-21 13:02:19 +02:00