debug

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
models(gallery): add calme-2.4-llama3-70b (#2942 )
2026-02-03 11:13:31 -05:00 · 2024-07-22 11:51:45 +02:00 · 2024-07-21 22:01:15 +02:00 · 2024-07-21 21:57:30 +02:00 · 2024-07-21 21:51:52 +02:00 · 2024-07-21 21:45:04 +02:00
32 changed files with 1027 additions and 524 deletions
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -8,8 +8,10 @@ jobs:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - name: Checkout code
+      uses: actions/checkout@v3
      with:
+        ref: "${{ github.event.pull_request.merge_commit_sha }}"
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - uses: mudler/localai-github-action@v1
      with:
@@ -21,6 +23,7 @@ jobs:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
+            base_branch: ${{ github.event.pull_request.base.sha }}
    - name: Show diff
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,6 +4,8 @@ on:
  push:
    branches:
      - master
+    tags:
+      - 'v*'
  pull_request:

 env:
--- a/5
+++ b/5
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
+CPPLLAMA_VERSION?=45f2c19cc57286eead7b232ce8028273a817aa4d

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -377,6 +377,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
+	ls -liah backend-assets/grpc
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -421,7 +422,7 @@ else
 endif

 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -10,10 +10,12 @@ import (
 type FederatedCLI struct {
 	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
 }

 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
-	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)
+
+	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)

 	return fs.Start(context.Background())
 }
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -119,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}

 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
 			var tunnelAddresses []string
 			for _, v := range p2p.GetAvailableNodes("") {
 				if v.IsOnline() {
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -225,18 +225,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}

 			// Update input grammar
-			// Handle if we should return "name" instead of "functions"
-			if config.FunctionsConfig.FunctionName {
-				jsStruct := funcs.ToJSONNameStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
-			} else {
-				jsStruct := funcs.ToJSONFunctionStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
-			}
+			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
+			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObject != nil:
 			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
-		case input.JSONFunctionGrammarObjectName != nil:
-			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -0,0 +1,47 @@
+package p2p
+
+const FederatedID = "federated"
+
+type FederatedServer struct {
+	listenAddr, service, p2ptoken string
+	requestTable                  map[string]int
+	loadBalanced                  bool
+}
+
+func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
+	return &FederatedServer{
+		listenAddr:   listenAddr,
+		service:      service,
+		p2ptoken:     p2pToken,
+		requestTable: map[string]int{},
+		loadBalanced: loadBalanced,
+	}
+}
+
+func (fs *FederatedServer) SelectLeastUsedServer() string {
+	// cycle over requestTable and find the entry with the lower number
+	// if there are multiple entries with the same number, select one randomly
+	// if there are no entries, return an empty string
+	var min int
+	var minKey string
+	for k, v := range fs.requestTable {
+		if min == 0 || v < min {
+			min = v
+			minKey = k
+		}
+	}
+	return minKey
+}
+
+func (fs *FederatedServer) RecordRequest(nodeID string) {
+	// increment the counter for the nodeID in the requestTable
+	fs.requestTable[nodeID]++
+}
+
+func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
+	// if the nodeID is not in the requestTable, add it with a counter of 0
+	_, ok := fs.requestTable[nodeID]
+	if !ok {
+		fs.requestTable[nodeID] = 0
+	}
+}
--- a/core/p2p/federatedServer.go
+++ b/core/p2p/federatedServer.go
@@ -1,13 +0,0 @@
-package p2p
-
-type FederatedServer struct {
-	listenAddr, service, p2ptoken string
-}
-
-func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
-	return &FederatedServer{
-		listenAddr: listenAddr,
-		service:    service,
-		p2ptoken:   p2pToken,
-	}
-}
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -7,34 +7,35 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"io"
 	"net"
 	"time"

-	"github.com/rs/zerolog/log"
-
 	"math/rand/v2"

 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
+	"github.com/rs/zerolog/log"
 )

-func (fs *FederatedServer) Start(ctx context.Context) error {
-	n, err := NewNode(fs.p2ptoken)
+func (f *FederatedServer) Start(ctx context.Context) error {
+
+	n, err := NewNode(f.p2ptoken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	err = n.Start(ctx)
 	if err != nil {
-		return fmt.Errorf("starting a new node: %w", err)
+		return fmt.Errorf("creating a new node: %w", err)
 	}

-	if err := ServiceDiscoverer(ctx, n, fs.p2ptoken, FederatedID, nil); err != nil {
+	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
+		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
+	}); err != nil {
 		return err
 	}

-	return fs.proxy(ctx, n)
+	return f.proxy(ctx, n)
 }

 func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
@@ -84,44 +85,56 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 			}

 			// Handle connections in a new goroutine, forwarding to the p2p service
-			go handleConn(conn)
+			go func() {
+				var tunnelAddresses []string
+				for _, v := range GetAvailableNodes(fs.service) {
+					if v.IsOnline() {
+						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+					} else {
+						log.Info().Msgf("Node %s is offline", v.ID)
+					}
+				}
+
+				if len(tunnelAddresses) == 0 {
+					log.Error().Msg("No available nodes yet")
+					return
+				}
+
+				tunnelAddr := ""
+
+				if fs.loadBalanced {
+					for _, t := range tunnelAddresses {
+						fs.EnsureRecordExist(t)
+					}
+
+					tunnelAddr = fs.SelectLeastUsedServer()
+					log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
+					if tunnelAddr == "" {
+						tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+					}
+
+					fs.RecordRequest(tunnelAddr)
+				} else {
+					tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+				}
+
+				tunnelConn, err := net.Dial("tcp", tunnelAddr)
+				if err != nil {
+					log.Error().Err(err).Msg("Error connecting to tunnel")
+					return
+				}
+
+				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
+				closer := make(chan struct{}, 2)
+				go copyStream(closer, tunnelConn, conn)
+				go copyStream(closer, conn, tunnelConn)
+				<-closer
+
+				tunnelConn.Close()
+				conn.Close()
+				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+			}()
 		}
 	}

 }
-
-func handleConn(conn net.Conn) {
-	var tunnelAddresses []string
-	for _, v := range GetAvailableNodes(FederatedID) {
-		if v.IsOnline() {
-			tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-		} else {
-			log.Info().Msgf("Node %s is offline", v.ID)
-		}
-	}
-
-	// open a TCP stream to one of the tunnels
-	// chosen randomly
-	// TODO: optimize this and track usage
-	tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-
-	tunnelConn, err := net.Dial("tcp", tunnelAddr)
-	if err != nil {
-		log.Error().Err(err).Msg("Error connecting to tunnel")
-		return
-	}
-
-	log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
-	closer := make(chan struct{}, 2)
-	go copyStream(closer, tunnelConn, conn)
-	go copyStream(closer, conn, tunnelConn)
-	<-closer
-
-	tunnelConn.Close()
-	conn.Close()
-}
-
-func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
-	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
-	io.Copy(dst, src)
-}
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -6,7 +6,6 @@ import (
 )

 const defaultServicesID = "services_localai"
-const FederatedID = "federated"

 type NodeData struct {
 	Name          string
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"net"
 	"os"
 	"sync"
@@ -138,7 +139,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv

 // This is the main of the server (which keeps the env variable updated)
 // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
-func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
+func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
@@ -160,7 +161,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 			case tunnel := <-tunnels:
 				AddNode(servicesID, tunnel)
 				if discoveryFunc != nil {
-					discoveryFunc()
+					discoveryFunc(servicesID, tunnel)
 				}
 			}
 		}
@@ -390,3 +391,8 @@ func newNodeOpts(token string) ([]node.Option, error) {

 	return nodeOpts, nil
 }
+
+func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
+	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
+	io.Copy(dst, src)
+}
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -14,11 +14,11 @@ func GenerateToken() string {
 	return "not implemented"
 }

-func (fs *FederatedServer) Start(ctx context.Context) error {
+func (f *FederatedServer) Start(ctx context.Context) error {
 	return fmt.Errorf("not implemented")
 }

-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
+func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
 	return fmt.Errorf("not implemented")
 }

--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -179,8 +179,7 @@ type OpenAIRequest struct {
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`

-	JSONFunctionGrammarObject     *functions.JSONFunctionStructureFunction `json:"grammar_json_functions" yaml:"grammar_json_functions"`
-	JSONFunctionGrammarObjectName *functions.JSONFunctionStructureName     `json:"grammar_json_name" yaml:"grammar_json_name"`
+	JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`

 	Backend string `json:"backend" yaml:"backend"`

--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -152,7 +152,8 @@ function:
    replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
    replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
    capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block.
-    return_name_in_function_response: false # Some models might prefer to use "name" rather then "function" when returning JSON data. This will allow to use "name" as a key in the JSON response.
+    function_name_key: "name"
+    function_arguments_key: "arguments"

 # Feature gating flags to enable experimental or optional features.
 feature_flags: {}
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -5,17 +5,65 @@ weight = 15
 url = "/features/distribute/"
 +++

+
+This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
+
+LocalAI supports two modes of distributed inferencing via p2p:
+
+- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
+- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+
+## Usage
+
+Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups. 
+
+Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
+
+For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
+
+### Federated mode
+
+Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/19ebd44a-20ff-412c-b92f-cfb8efbe4b21)
+
+To start a LocalAI server in federated mode, run:
+
+```bash
+local-ai run --p2p --federated
+```
+
+This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
+
+To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
+
+```bash
+local-ai federated
+```
+
+To see all the available options, run `local-ai federated --help`.
+
+The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
+
+### Workers mode
+
 {{% alert note %}}
 This feature is available exclusively with llama-cpp compatible models.

 This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
 {{% /alert %}}

-This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
+To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:

-## Usage
+```bash
+local-ai run --p2p
+```

-### Starting Workers
+And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/b8cadddf-a467-49cf-a1ed-8850de95366d)
+
+### Without P2P

 To start workers for distributing the computational load, run:

@@ -23,48 +71,27 @@ To start workers for distributing the computational load, run:
 local-ai worker llama-cpp-rpc <listening_address> <listening_port>
 ```

-Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
-
-### Starting LocalAI
-
-To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
+And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:

 ```bash
 LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
 ```
-
 The workload on the LocalAI server will then be distributed across the specified nodes.

-## Peer-to-Peer Networking
+Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.

-![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+## Manual example (worker)

-Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
-
-A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
-
-The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
-
-A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
-
-When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
-
-### Usage
+Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.

 1. Start the server with `--p2p`:

 ```bash
 ./local-ai run --p2p
-# 1:02AM INF loading environment variables from file envFile=.env
-# 1:02AM INF Setting logging to info
-# 1:02AM INF P2P mode enabled
-# 1:02AM INF No token provided, generating one
-# 1:02AM INF Generated Token:
-# XXXXXXXXXXX
-# 1:02AM INF Press a button to proceed
+# Get the token in the Swarm section of the WebUI
 ```

-Copy the displayed token and press Enter.
+Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.

 To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.

@@ -93,12 +120,14 @@ The server logs should indicate that new workers are being discovered.

 3. Start inference as usual on the server initiated in step 1.

+![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+
 ## Notes

 - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
 - Only a single model is supported currently.
 - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
-
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)

 ## Environment Variables

--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.18.1"
+  "version": "v2.19.1"
 }
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -24,6 +24,33 @@
    - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
      sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
      uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- name: "archangel_sft_pythia2-8b"
+  url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
+  icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
+    - https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
+    - https://github.com/ContextualAI/HALOs
+  description: |
+    datasets:
+    - stanfordnlp/SHP
+    - Anthropic/hh-rlhf
+    - OpenAssistant/oasst1
+
+    This repo contains the model checkpoints for:
+    - model family pythia2-8b
+    - optimized with the loss SFT
+    - aligned using the SHP, Anthropic HH and Open Assistant datasets.
+
+    Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
+  overrides:
+    parameters:
+      model: archangel_sft_pythia2-8b.Q4_K_M.gguf
+  files:
+    - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
+      sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
+      uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
 - &qwen2
  ## Start QWEN2
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -202,6 +229,54 @@
    - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
      sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
      uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "qwen2-wukong-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/655dc641accde1bbc8b41aec/xOe1Nb3S9Nb53us7_Ja3s.jpeg
+  urls:
+    - https://huggingface.co/bartowski/Qwen2-Wukong-7B-GGUF
+  description: |
+    Qwen2-Wukong-7B is a dealigned chat finetune of the original fantastic Qwen2-7B model by the Qwen team.
+
+    This model was trained on the teknium OpenHeremes-2.5 dataset and some supplementary datasets from Cognitive Computations
+
+    This model was trained for 3 epochs with a custom FA2 implementation for AMD cards.
+  overrides:
+    parameters:
+      model: Qwen2-Wukong-7B-Q4_K_M.gguf
+  files:
+    - filename: Qwen2-Wukong-7B-Q4_K_M.gguf
+      sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
+      uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "calme-2.8-qwen2-7b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
+  description: |
+    This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
+  overrides:
+    parameters:
+      model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+  files:
+    - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+      sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
+      uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "stellardong-72b-i1"
+  icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
+  urls:
+    - https://huggingface.co/smelborp/StellarDong-72b
+    - https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
+  description: |
+    Magnum + Nova = you won't believe how stellar this dong is!!
+  overrides:
+    parameters:
+      model: StellarDong-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: StellarDong-72b.i1-Q4_K_M.gguf
+      sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
+      uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
 - &mistral03
  ## START Mistral
  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -264,6 +339,31 @@
    - filename: Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
      sha256: 8272f050e36d612ab282e095cb4e775e2c818e7096f8d522314d256923ef6da9
      uri: huggingface://mradermacher/Mahou-1.3d-mistral-7B-i1-GGUF/Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
+- name: "einstein-v4-7b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/U0zyXVGj-O8a7KP3BvPue.png
+  urls:
+    - https://huggingface.co/Weyaxi/Einstein-v4-7B
+    - https://huggingface.co/mradermacher/Einstein-v4-7B-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - mistral
+    - cpu
+  description: |
+    🔬 Einstein-v4-7B
+
+    This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
+
+    This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
+  overrides:
+    parameters:
+      model: Einstein-v4-7B.Q4_K_M.gguf
+  files:
+    - filename: Einstein-v4-7B.Q4_K_M.gguf
+      sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
+      uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
 - &mudler
  ### START mudler's LocalAI specific-models
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -579,6 +679,91 @@
    - filename: EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
      sha256: 57678b1828673dccb15f76e52b00672c74aa6169421bbb8620b8955955322cfd
      uri: huggingface://QuantFactory/EZO-Common-9B-gemma-2-it-GGUF/EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "big-tiger-gemma-27b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/A97OlLKeT4XOnv4IG1b6m.png
+  urls:
+    - https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1
+    - https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF
+  description: |
+    Big Tiger Gemma 27B v1 is a Decensored Gemma 27B model with no refusals, except for some rare instances from the 9B model. It does not appear to have any brain damage. The model is available from various sources, including Hugging Face, and comes in different variations such as GGUF, iMatrix, and EXL2.
+  overrides:
+    parameters:
+      model: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
+  files:
+    - filename: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
+      sha256: c5fc5605d36ae280c1c908c9b4bcb12b28abbe2692f317edeb83ab1104657fe5
+      uri: huggingface://TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF/Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemma-2b-translation-v0.150"
+  urls:
+    - https://huggingface.co/lemon-mint/gemma-2b-translation-v0.150
+    - https://huggingface.co/RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf
+  description: |
+    Original model: lemon-mint/gemma-ko-1.1-2b-it
+    Evaluation metrics: Eval Loss, Train Loss, lr, optimizer, lr_scheduler_type.
+    Prompt Template:
+    <bos><start_of_turn>user
+    Translate into Korean: [input text]<end_of_turn>
+    <start_of_turn>model
+    [translated text in Korean]<eos>
+    <bos><start_of_turn>user
+    Translate into English: [Korean text]<end_of_turn>
+    <start_of_turn>model
+    [translated text in English]<eos>
+    Model features:
+    * Developed by: lemon-mint
+    * Model type: Gemma
+    * Languages (NLP): English
+    * License: Gemma Terms of Use
+    * Finetuned from model: lemon-mint/gemma-ko-1.1-2b-it
+  overrides:
+    parameters:
+      model: gemma-2b-translation-v0.150.Q4_K_M.gguf
+  files:
+    - filename: gemma-2b-translation-v0.150.Q4_K_M.gguf
+      sha256: dcde67b83168d2e7ca835cf9a7a4dcf38b41b9cefe3cbc997c71d2741c08cd25
+      uri: huggingface://RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf/gemma-2b-translation-v0.150.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "emo-2b"
+  urls:
+    - https://huggingface.co/OEvortex/EMO-2B
+    - https://huggingface.co/RichardErkhov/OEvortex_-_EMO-2B-gguf
+  description: |
+    EMO-2B: Emotionally Intelligent Conversational AI
+
+    Overview:
+    EMO-2B is a state-of-the-art conversational AI model with 2.5 billion parameters, designed to engage in emotionally resonant dialogue. Building upon the success of EMO-1.5B, this model has been further fine-tuned on an extensive corpus of emotional narratives, enabling it to perceive and respond to the emotional undertones of user inputs with exceptional empathy and emotional intelligence.
+
+    Key Features:
+
+    - Advanced Emotional Intelligence: With its increased capacity, EMO-2B demonstrates an even deeper understanding and generation of emotional language, allowing for more nuanced and contextually appropriate emotional responses.
+    - Enhanced Contextual Awareness: The model considers an even broader context within conversations, accounting for subtle emotional cues and providing emotionally resonant responses tailored to the specific situation.
+    - Empathetic and Supportive Dialogue: EMO-2B excels at active listening, validating emotions, offering compassionate advice, and providing emotional support, making it an ideal companion for users seeking empathy and understanding.
+    - Dynamic Persona Adaptation: The model can dynamically adapt its persona, communication style, and emotional responses to match the user's emotional state, ensuring a highly personalized and tailored conversational experience.
+
+    Use Cases:
+
+    EMO-2B is well-suited for a variety of applications where emotional intelligence and empathetic communication are crucial, such as:
+
+    - Mental health support chatbots
+    - Emotional support companions
+    - Personalized coaching and motivation
+    - Narrative storytelling and interactive fiction
+    - Customer service and support (for emotionally sensitive contexts)
+
+    Limitations and Ethical Considerations:
+
+    While EMO-2B is designed to provide emotionally intelligent and empathetic responses, it is important to note that it is an AI system and cannot replicate the depth and nuance of human emotional intelligence. Users should be aware that the model's responses, while emotionally supportive, should not be considered a substitute for professional mental health support or counseling.
+
+    Additionally, as with any language model, EMO-2B may reflect biases present in its training data. Users should exercise caution and critical thinking when interacting with the model, and report any concerning or inappropriate responses.
+  overrides:
+    parameters:
+      model: EMO-2B.Q4_K_M.gguf
+  files:
+    - filename: EMO-2B.Q4_K_M.gguf
+      sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
+      uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -3001,6 +3186,106 @@
    - filename: L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
      sha256: 2911be6be8e0fd4184998d452410ba847491b4ab71a928749de87cafb0e13757
      uri: huggingface://mradermacher/L3-15B-EtherealMaid-t0.0001-i1-GGUF/L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-celeste-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
+  urls:
+    - https://huggingface.co/nothingiisreal/L3-8B-Celeste-v1
+    - https://huggingface.co/bartowski/L3-8B-Celeste-v1-GGUF
+  description: |
+    Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
+
+    This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
+  overrides:
+    parameters:
+      model: L3-8B-Celeste-v1-Q4_K_M.gguf
+  files:
+    - filename: L3-8B-Celeste-v1-Q4_K_M.gguf
+      sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
+      uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-celeste-v1.2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
+  urls:
+    - https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
+  description: |
+    Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
+
+    This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
+  overrides:
+    parameters:
+      model: l3-8b-celeste-v1.2-q4_k_m.gguf
+  files:
+    - filename: l3-8b-celeste-v1.2-q4_k_m.gguf
+      sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
+      uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-8b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-8b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+      sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
+      uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-dpo-70b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+      sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
+      uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  license: cc-by-nc-4.0
+  name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
+  urls:
+    - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
+    - https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
+  description: |
+    This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
+
+    We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
+
+    Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
+
+    We are currently working on a developing a commerically usable model, so stay tuned for that!
+  overrides:
+    parameters:
+      model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+  files:
+    - filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+      sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
+      uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.4-llama3-70b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
+    - https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
+  description: |
+    This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
+  overrides:
+    parameters:
+      model: calme-2.4-llama3-70b.Q4_K_M.gguf
+  files:
+    - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
+      sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
+      uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
 - &command-R
  ### START Command-r
  url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3245,6 +3530,38 @@
    - filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
      sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
      uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phillama-3.8b-v0.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
+  urls:
+    - https://huggingface.co/RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf
+  description: |
+    The description of the LLM model is:
+    Phillama is a model based on Phi-3-mini and trained on Llama-generated dataset raincandy-u/Dextromethorphan-10k to make it more "llama-like". Also, this model is converted into Llama format, so it will work with any Llama-2/3 workflow. The model aims to generate text with a specific "llama-like" style and is suited for text-generation tasks.
+  overrides:
+    parameters:
+      model: phillama-3.8b-v0.1.Q4_K_M.gguf
+  files:
+    - filename: phillama-3.8b-v0.1.Q4_K_M.gguf
+      sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
+      uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.3-phi3-4b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
+  description: |
+    MaziyarPanahi/calme-2.1-phi3-4b
+
+    This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
+  overrides:
+    parameters:
+      model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+  files:
+    - filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+      sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
+      uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
 - &hermes-2-pro-mistral
  ### START Hermes
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
--- a/gallery/tuluv2.yaml
+++ b/gallery/tuluv2.yaml
@@ -0,0 +1,43 @@
+---
+name: "tuluv2"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    function: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    chat: |
+      {{.Input -}}
+      <|assistant|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '<|endoftext|>'
--- a/pkg/concurrency/concurrency_suite_test.go
+++ b/pkg/concurrency/concurrency_suite_test.go
@@ -0,0 +1,13 @@
+package concurrency
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestConcurrency(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Concurrency test suite")
+}
--- a/pkg/concurrency/jobresult.go
+++ b/pkg/concurrency/jobresult.go
@@ -0,0 +1,69 @@
+package concurrency
+
+import (
+	"context"
+	"sync"
+)
+
+// This is a Read-ONLY structure that contains the result of an arbitrary asynchronous action
+type JobResult[RequestType any, ResultType any] struct {
+	request *RequestType
+	result  *ResultType
+	err     error
+	once    sync.Once
+	done    *chan struct{}
+}
+
+// This structure is returned in a pair with a JobResult and serves as the structure that has access to be updated.
+type WritableJobResult[RequestType any, ResultType any] struct {
+	*JobResult[RequestType, ResultType]
+}
+
+// Wait blocks until the result is ready and then returns the result, or the context expires.
+// Returns *ResultType instead of ResultType since its possible we have only an error and nil for ResultType.
+// Is this correct and idiomatic?
+func (jr *JobResult[RequestType, ResultType]) Wait(ctx context.Context) (*ResultType, error) {
+	if jr.done == nil { // If the channel is blanked out, result is ready.
+		return jr.result, jr.err
+	}
+	select {
+	case <-*jr.done: // Wait for the result to be ready
+		jr.done = nil
+		if jr.err != nil {
+			return nil, jr.err
+		}
+		return jr.result, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
+// Accessor function to allow holders of JobResults to access the associated request, without allowing the pointer to be updated.
+func (jr *JobResult[RequestType, ResultType]) Request() *RequestType {
+	return jr.request
+}
+
+// This is the function that actually updates the Result and Error on the JobResult... but it's normally not accessible
+func (jr *JobResult[RequestType, ResultType]) setResult(result ResultType, err error) {
+	jr.once.Do(func() {
+		jr.result = &result
+		jr.err = err
+		close(*jr.done) // Signal that the result is ready - since this is only ran once, jr.done cannot be set to nil yet.
+	})
+}
+
+// Only the WritableJobResult can actually call setResult - prevents accidental corruption
+func (wjr *WritableJobResult[RequestType, ResultType]) SetResult(result ResultType, err error) {
+	wjr.JobResult.setResult(result, err)
+}
+
+// NewJobResult binds a request to a matched pair of JobResult and WritableJobResult
+func NewJobResult[RequestType any, ResultType any](request RequestType) (*JobResult[RequestType, ResultType], *WritableJobResult[RequestType, ResultType]) {
+	done := make(chan struct{})
+	jr := &JobResult[RequestType, ResultType]{
+		once:    sync.Once{},
+		request: &request,
+		done:    &done,
+	}
+	return jr, &WritableJobResult[RequestType, ResultType]{JobResult: jr}
+}
--- a/pkg/concurrency/jobresult_test.go
+++ b/pkg/concurrency/jobresult_test.go
@@ -0,0 +1,80 @@
+package concurrency_test
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	. "github.com/mudler/LocalAI/pkg/concurrency"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("pkg/concurrency unit tests", func() {
+	It("can be used to recieve a result across goroutines", func() {
+		jr, wjr := NewJobResult[string, string]("foo")
+		Expect(jr).ToNot(BeNil())
+		Expect(wjr).ToNot(BeNil())
+
+		go func(wjr *WritableJobResult[string, string]) {
+			time.Sleep(time.Second * 5)
+			wjr.SetResult("bar", nil)
+		}(wjr)
+
+		resPtr, err := jr.Wait(context.Background())
+		Expect(err).To(BeNil())
+		Expect(jr.Request).ToNot(BeNil())
+		Expect(*jr.Request()).To(Equal("foo"))
+		Expect(resPtr).ToNot(BeNil())
+		Expect(*resPtr).To(Equal("bar"))
+
+	})
+
+	It("can be used to recieve an error across goroutines", func() {
+		jr, wjr := NewJobResult[string, string]("foo")
+		Expect(jr).ToNot(BeNil())
+		Expect(wjr).ToNot(BeNil())
+
+		go func(wjr *WritableJobResult[string, string]) {
+			time.Sleep(time.Second * 5)
+			wjr.SetResult("", fmt.Errorf("test"))
+		}(wjr)
+
+		_, err := jr.Wait(context.Background())
+		Expect(jr.Request).ToNot(BeNil())
+		Expect(*jr.Request()).To(Equal("foo"))
+		Expect(err).ToNot(BeNil())
+		Expect(err).To(MatchError("test"))
+	})
+
+	It("can properly handle timeouts", func() {
+		jr, wjr := NewJobResult[string, string]("foo")
+		Expect(jr).ToNot(BeNil())
+		Expect(wjr).ToNot(BeNil())
+
+		go func(wjr *WritableJobResult[string, string]) {
+			time.Sleep(time.Second * 5)
+			wjr.SetResult("bar", nil)
+		}(wjr)
+
+		timeout1s, c1 := context.WithTimeoutCause(context.Background(), time.Second, fmt.Errorf("timeout"))
+		timeout10s, c2 := context.WithTimeoutCause(context.Background(), time.Second*10, fmt.Errorf("timeout"))
+
+		_, err := jr.Wait(timeout1s)
+		Expect(jr.Request).ToNot(BeNil())
+		Expect(*jr.Request()).To(Equal("foo"))
+		Expect(err).ToNot(BeNil())
+		Expect(err).To(MatchError(context.DeadlineExceeded))
+
+		resPtr, err := jr.Wait(timeout10s)
+		Expect(jr.Request).ToNot(BeNil())
+		Expect(*jr.Request()).To(Equal("foo"))
+		Expect(err).To(BeNil())
+		Expect(resPtr).ToNot(BeNil())
+		Expect(*resPtr).To(Equal("bar"))
+
+		// Is this needed? Cleanup Either Way.
+		c1()
+		c2()
+	})
+})
--- a/pkg/downloader/downloader_suite_test.go
+++ b/pkg/downloader/downloader_suite_test.go
@@ -0,0 +1,13 @@
+package downloader
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestDownloader(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Downloader test suite")
+}
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -6,6 +6,11 @@ import (
 	"github.com/rs/zerolog/log"
 )

+const (
+	defaultFunctionNameKey      = "name"
+	defaultFunctionArgumentsKey = "arguments"
+)
+
 type Function struct {
 	Name        string                 `json:"name"`
 	Description string                 `json:"description"`
@@ -19,50 +24,18 @@ type Tool struct {
 }
 type Tools []Tool

-// ToJSONFunctionStructure converts a list of functions to a JSON structure that can be parsed to a grammar
-// This allows the LLM to return a response of the type: { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
-func (f Functions) ToJSONFunctionStructure() JSONFunctionStructureFunction {
-	js := JSONFunctionStructureFunction{}
-	for _, function := range f {
-		//	t := function.Parameters["type"]
-		//tt := t.(string)
-
-		properties := function.Parameters["properties"]
-		defs := function.Parameters["$defs"]
-		dat, _ := json.Marshal(properties)
-		dat2, _ := json.Marshal(defs)
-		prop := map[string]interface{}{}
-		defsD := map[string]interface{}{}
-
-		err := json.Unmarshal(dat, &prop)
-		if err != nil {
-			log.Error().Err(err).Msg("error unmarshalling dat")
-		}
-		err = json.Unmarshal(dat2, &defsD)
-		if err != nil {
-			log.Error().Err(err).Msg("error unmarshalling dat2")
-		}
-		if js.Defs == nil {
-			js.Defs = defsD
-		}
-		js.OneOf = append(js.OneOf, ItemFunction{
-			Type: "object",
-			Properties: FunctionProperties{
-				Function: FunctionName{Const: function.Name},
-				Arguments: Argument{
-					Type:       "object",
-					Properties: prop,
-				},
-			},
-		})
-	}
-	return js
-}
-
 // ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
 // This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
-func (f Functions) ToJSONNameStructure() JSONFunctionStructureName {
-	js := JSONFunctionStructureName{}
+func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
+	nameKey := defaultFunctionNameKey
+	argsKey := defaultFunctionArgumentsKey
+	if name != "" {
+		nameKey = name
+	}
+	if args != "" {
+		argsKey = args
+	}
+	js := JSONFunctionStructure{}
 	for _, function := range f {
 		//	t := function.Parameters["type"]
 		//tt := t.(string)
@@ -85,15 +58,16 @@ func (f Functions) ToJSONNameStructure() JSONFunctionStructureName {
 		if js.Defs == nil {
 			js.Defs = defsD
 		}
-		js.OneOf = append(js.OneOf, ItemName{
-			Type: "object",
-			Properties: NameProperties{
-				Function: FunctionName{Const: function.Name},
-				Arguments: Argument{
-					Type:       "object",
-					Properties: prop,
-				},
-			},
+
+		property := map[string]interface{}{}
+		property[nameKey] = FunctionName{Const: function.Name}
+		property[argsKey] = Argument{
+			Type:       "object",
+			Properties: prop,
+		}
+		js.OneOf = append(js.OneOf, Item{
+			Type:       "object",
+			Properties: property,
 		})
 	}
 	return js
--- a/pkg/functions/functions_test.go
+++ b/pkg/functions/functions_test.go
@@ -35,21 +35,35 @@ var _ = Describe("LocalAI grammar functions", func() {
 				},
 			}

-			js := functions.ToJSONFunctionStructure()
+			js := functions.ToJSONStructure("function", "arguments")
 			Expect(len(js.OneOf)).To(Equal(2))
-			Expect(js.OneOf[0].Properties.Function.Const).To(Equal("create_event"))
-			Expect(js.OneOf[0].Properties.Arguments.Properties["event_name"].(map[string]interface{})["type"]).To(Equal("string"))
-			Expect(js.OneOf[0].Properties.Arguments.Properties["event_date"].(map[string]interface{})["type"]).To(Equal("string"))
-			Expect(js.OneOf[1].Properties.Function.Const).To(Equal("search"))
-			Expect(js.OneOf[1].Properties.Arguments.Properties["query"].(map[string]interface{})["type"]).To(Equal("string"))
+			fnName := js.OneOf[0].Properties["function"].(FunctionName)
+			fnArgs := js.OneOf[0].Properties["arguments"].(Argument)
+			Expect(fnName.Const).To(Equal("create_event"))
+			Expect(fnArgs.Properties["event_name"].(map[string]interface{})["type"]).To(Equal("string"))
+			Expect(fnArgs.Properties["event_date"].(map[string]interface{})["type"]).To(Equal("string"))

-			jsN := functions.ToJSONNameStructure()
+			fnName = js.OneOf[1].Properties["function"].(FunctionName)
+			fnArgs = js.OneOf[1].Properties["arguments"].(Argument)
+			Expect(fnName.Const).To(Equal("search"))
+			Expect(fnArgs.Properties["query"].(map[string]interface{})["type"]).To(Equal("string"))
+
+			// Test with custom keys
+			jsN := functions.ToJSONStructure("name", "arguments")
 			Expect(len(jsN.OneOf)).To(Equal(2))
-			Expect(jsN.OneOf[0].Properties.Function.Const).To(Equal("create_event"))
-			Expect(jsN.OneOf[0].Properties.Arguments.Properties["event_name"].(map[string]interface{})["type"]).To(Equal("string"))
-			Expect(jsN.OneOf[0].Properties.Arguments.Properties["event_date"].(map[string]interface{})["type"]).To(Equal("string"))
-			Expect(jsN.OneOf[1].Properties.Function.Const).To(Equal("search"))
-			Expect(jsN.OneOf[1].Properties.Arguments.Properties["query"].(map[string]interface{})["type"]).To(Equal("string"))
+
+			fnName = jsN.OneOf[0].Properties["name"].(FunctionName)
+			fnArgs = jsN.OneOf[0].Properties["arguments"].(Argument)
+
+			Expect(fnName.Const).To(Equal("create_event"))
+			Expect(fnArgs.Properties["event_name"].(map[string]interface{})["type"]).To(Equal("string"))
+			Expect(fnArgs.Properties["event_date"].(map[string]interface{})["type"]).To(Equal("string"))
+
+			fnName = jsN.OneOf[1].Properties["name"].(FunctionName)
+			fnArgs = jsN.OneOf[1].Properties["arguments"].(Argument)
+
+			Expect(fnName.Const).To(Equal("search"))
+			Expect(fnArgs.Properties["query"].(map[string]interface{})["type"]).To(Equal("string"))
 		})
 	})
 	Context("Select()", func() {
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -331,6 +331,7 @@ func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[strin

 	return def
 }
+
 func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
 	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
 	sc.visit(schema, "", schema)
@@ -352,52 +353,23 @@ type FunctionName struct {
 	Const string `json:"const"`
 }

-type FunctionProperties struct {
-	Function  FunctionName `json:"function"`
-	Arguments Argument     `json:"arguments"`
-}
-
-type NameProperties struct {
-	Function  FunctionName `json:"name"`
-	Arguments Argument     `json:"arguments"`
-}
-
 type Argument struct {
 	Type       string                 `json:"type"`
 	Properties map[string]interface{} `json:"properties"`
 }

-type ItemName struct {
-	Type       string         `json:"type"`
-	Properties NameProperties `json:"properties"`
+type Item struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
 }

-type ItemFunction struct {
-	Type       string             `json:"type"`
-	Properties FunctionProperties `json:"properties"`
-}
-
-type JSONFunctionStructureName struct {
-	OneOf []ItemName             `json:"oneOf,omitempty"`
-	AnyOf []ItemName             `json:"anyOf,omitempty"`
+type JSONFunctionStructure struct {
+	OneOf []Item                 `json:"oneOf,omitempty"`
+	AnyOf []Item                 `json:"anyOf,omitempty"`
 	Defs  map[string]interface{} `json:"$defs,omitempty"`
 }

-func (j JSONFunctionStructureName) Grammar(options ...func(*GrammarOption)) string {
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	dat, _ := json.Marshal(j)
-	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
-}
-
-type JSONFunctionStructureFunction struct {
-	OneOf []ItemFunction         `json:"oneOf,omitempty"`
-	AnyOf []ItemFunction         `json:"anyOf,omitempty"`
-	Defs  map[string]interface{} `json:"$defs,omitempty"`
-}
-
-func (j JSONFunctionStructureFunction) Grammar(options ...func(*GrammarOption)) string {
+func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
 	grammarOpts := &GrammarOption{}
 	grammarOpts.Apply(options...)

--- a/pkg/functions/grammar_json_schema_test.go
+++ b/pkg/functions/grammar_json_schema_test.go
@@ -9,69 +9,65 @@ import (
 	. "github.com/onsi/gomega"
 )

-var testFunctions = []ItemFunction{
+func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
+	property := map[string]interface{}{}
+	property[field1] = FunctionName{Const: name}
+	property[field2] = Argument{
+		Type:       "object",
+		Properties: properties,
+	}
+	return property
+}
+
+var testFunctions = []Item{
 	{
 		Type: "object",
-		Properties: FunctionProperties{
-			Function: FunctionName{
-				Const: "create_event",
+		Properties: createFunction(
+			"function",
+			"arguments",
+			"create_event",
+			map[string]interface{}{
+				"title": map[string]string{"type": "string"},
+				"date":  map[string]string{"type": "string"},
+				"time":  map[string]string{"type": "string"},
 			},
-			Arguments: Argument{ // this is OpenAI's parameter
-				Type: "object",
-				Properties: map[string]interface{}{
-					"title": map[string]string{"type": "string"},
-					"date":  map[string]string{"type": "string"},
-					"time":  map[string]string{"type": "string"},
-				},
-			},
-		},
+		),
 	},
 	{
 		Type: "object",
-		Properties: FunctionProperties{
-			Function: FunctionName{
-				Const: "search",
-			},
-			Arguments: Argument{
-				Type: "object",
-				Properties: map[string]interface{}{
-					"query": map[string]string{"type": "string"},
-				},
-			},
-		},
+		Properties: createFunction(
+			"function",
+			"arguments",
+			"search",
+			map[string]interface{}{
+				"query": map[string]string{"type": "string"},
+			}),
 	},
 }

-var testFunctionsName = []ItemName{
+var testFunctionsName = []Item{
 	{
 		Type: "object",
-		Properties: NameProperties{
-			Function: FunctionName{
-				Const: "create_event",
+		Properties: createFunction(
+			"name",
+			"arguments",
+			"create_event",
+			map[string]interface{}{
+				"title": map[string]string{"type": "string"},
+				"date":  map[string]string{"type": "string"},
+				"time":  map[string]string{"type": "string"},
 			},
-			Arguments: Argument{ // this is OpenAI's parameter
-				Type: "object",
-				Properties: map[string]interface{}{
-					"title": map[string]string{"type": "string"},
-					"date":  map[string]string{"type": "string"},
-					"time":  map[string]string{"type": "string"},
-				},
-			},
-		},
+		),
 	},
 	{
 		Type: "object",
-		Properties: NameProperties{
-			Function: FunctionName{
-				Const: "search",
-			},
-			Arguments: Argument{
-				Type: "object",
-				Properties: map[string]interface{}{
-					"query": map[string]string{"type": "string"},
-				},
-			},
-		},
+		Properties: createFunction(
+			"name",
+			"arguments",
+			"search",
+			map[string]interface{}{
+				"query": map[string]string{"type": "string"},
+			}),
 	},
 }

@@ -270,7 +266,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})
 		It("generates a valid grammar from JSON Objects", func() {

-			structuredGrammar := JSONFunctionStructureFunction{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}

 			grammar := structuredGrammar.Grammar()
@@ -284,7 +280,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates a valid grammar from JSON Objects for multiple function return", func() {
-			structuredGrammar := JSONFunctionStructureFunction{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}

 			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
@@ -302,7 +298,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates a valid grammar from JSON Objects for multiple function return", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
@@ -320,7 +316,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates a valid grammar from JSON Objects for multiple function return with a suffix and array", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(
@@ -340,7 +336,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
 		})
 		It("generates a valid grammar from JSON Objects with a suffix", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
@@ -357,7 +353,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
 		})
 		It("generates a valid grammar from JSON Objects with a suffix and could return string", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
@@ -374,7 +370,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
 		})
 		It("generates a valid grammar from JSON Objects with a suffix that could return text or an array of tools", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
@@ -393,7 +389,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates a valid grammar from JSON Objects without a suffix that could return text or an array of tools or just string", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
@@ -411,7 +407,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates a valid grammar from JSON Objects without a suffix that could return text or an array of tools or just string. Disables mixedstring", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

 			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
@@ -429,7 +425,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 		})

 		It("generates parallel tools without newlines in JSON", func() {
-			structuredGrammar := JSONFunctionStructureName{
+			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 			content := `arr  ::=
 "["  (
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -2,6 +2,8 @@ package functions

 import (
 	"encoding/json"
+	"errors"
+	"io"
 	"regexp"
 	"strings"

@@ -76,7 +78,8 @@ type FunctionsConfig struct {
 	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
 	// This might be useful for certain models trained with the function name as the first token.
-	FunctionName bool `yaml:"return_name_in_function_response"`
+	FunctionNameKey      string `yaml:"function_name_key"`
+	FunctionArgumentsKey string `yaml:"function_arguments_key"`
 }

 type ReplaceResult struct {
@@ -145,6 +148,47 @@ func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string {
 	return ""
 }

+// ParseJSON is a function that parses a JSON string that might contain multiple JSON objects
+// and syntax errors in between by shifting the offset
+// This for e.g. allow to parse
+// { "foo": "bar" } invalid { "baz": "qux" }
+// into
+// [ { "foo": "bar" }, { "baz": "qux" } ]
+// Credits to Michael Yang (https://github.com/mxyng) for the original implementation
+// This is a slighly reworked version, improved for readability and error handling
+func ParseJSON(s string) ([]map[string]any, error) {
+	var objs []map[string]any
+	offset := 0
+
+	for offset < len(s) {
+		var obj map[string]any
+		decoder := json.NewDecoder(strings.NewReader(s[offset:]))
+
+		err := decoder.Decode(&obj)
+		switch {
+		case errors.Is(err, io.EOF):
+			return objs, nil
+		case err == nil:
+			offset += int(decoder.InputOffset())
+			objs = append(objs, obj)
+		default: // handle the error type
+			var syntaxErr *json.SyntaxError
+			var unmarshalTypeErr *json.UnmarshalTypeError
+
+			switch {
+			case errors.As(err, &syntaxErr):
+				offset += int(syntaxErr.Offset)
+			case errors.As(err, &unmarshalTypeErr):
+				offset += int(unmarshalTypeErr.Offset)
+			default:
+				return objs, err
+			}
+		}
+	}
+
+	return objs, nil
+}
+
 func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {

 	log.Debug().Msgf("LLM result: %s", llmresult)
@@ -157,9 +201,13 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 	}
 	log.Debug().Msgf("LLM result(function cleanup): %s", llmresult)

-	functionNameKey := "function"
-	if functionConfig.FunctionName {
-		functionNameKey = "name"
+	functionNameKey := defaultFunctionNameKey
+	functionArgumentsKey := defaultFunctionArgumentsKey
+	if functionConfig.FunctionNameKey != "" {
+		functionNameKey = functionConfig.FunctionNameKey
+	}
+	if functionConfig.FunctionArgumentsKey != "" {
+		functionArgumentsKey = functionConfig.FunctionArgumentsKey
 	}

 	results := []FuncCallResults{}
@@ -170,19 +218,13 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 		result = make([]FuncCallResults, 0)

 		for _, s := range results {
-			var ss []map[string]interface{}
+			var ss []map[string]any

 			s = utils.EscapeNewLines(s)
-			err := json.Unmarshal([]byte(s), &ss)
+			ss, err := ParseJSON(s)
+			//err := json.Unmarshal([]byte(s), &ss)
 			if err != nil {
-				// If the LLM result is a single object, try unmarshaling it into a single map
-				var singleObj map[string]interface{}
-				err = json.Unmarshal([]byte(s), &singleObj)
-				if err != nil {
-					log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects")
-				} else {
-					ss = []map[string]interface{}{singleObj}
-				}
+				log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects")
 			}

 			log.Debug().Msgf("Function return: %s %+v", s, ss)
@@ -195,7 +237,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 					//return result, fmt.Errorf("unable to find function name in result")
 				}
 				// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
-				args, ok := s["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+				args, ok := s[functionArgumentsKey] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
 				if !ok {
 					continue
 					//return result, fmt.Errorf("unable to find arguments in result")
@@ -253,7 +295,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 				if functionName == "" {
 					return results
 				}
-				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
+				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result[functionArgumentsKey]})
 			}
 		}
 	} else {
--- a/pkg/functions/parse_test.go
+++ b/pkg/functions/parse_test.go
@@ -16,7 +16,7 @@ var _ = Describe("LocalAI function parse tests", func() {

 	Context("when using grammars and single result expected", func() {
 		It("should parse the function name and arguments correctly", func() {
-			input := `{"function": "add", "arguments": {"x": 5, "y": 3}}`
+			input := `{"name": "add", "arguments": {"x": 5, "y": 3}}`

 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(1))
@@ -28,13 +28,22 @@ var _ = Describe("LocalAI function parse tests", func() {
 	Context("when not using grammars and regex is needed", func() {
 		It("should extract function name and arguments from the regex", func() {
 			input := `add({"x":5,"y":3})`
-			functionConfig.ResponseRegex = []string{`(?P<function>\w+)\s*\((?P<arguments>.*)\)`}
+			functionConfig.ResponseRegex = []string{`(?P<name>\w+)\s*\((?P<arguments>.*)\)`}

 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(1))
 			Expect(results[0].Name).To(Equal("add"))
 			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
 		})
+		It("should extract function name and arguments from the regex", func() {
+			input := `add({"x":5,"y":3})`
+			functionConfig.ResponseRegex = []string{`(?P<function>\w+)\s*\((?P<arguments>.*)\)`}
+			functionConfig.FunctionNameKey = "function"
+			results := ParseFunctionCall(input, functionConfig)
+			Expect(results).To(HaveLen(1))
+			Expect(results[0].Name).To(Equal("add"))
+			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+		})
 	})

 	Context("when having invalid input", func() {
@@ -53,7 +62,7 @@ var _ = Describe("LocalAI function parse tests", func() {

 	Context("when parallel calls are enabled", func() {
 		It("should handle multiple function calls", func() {
-			input := `[{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]`
+			input := `[{"name": "add", "arguments": {"x": 5, "y": 3}}, {"name": "subtract", "arguments": {"x": 10, "y": 7}}]`

 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(2))
@@ -66,8 +75,8 @@ var _ = Describe("LocalAI function parse tests", func() {

 	Context("without grammars and without regex", func() {
 		It("should parse the function name and arguments correctly with the name key", func() {
-			input := `{"name": "add", "arguments": {"x": 5, "y": 3}}`
-			functionConfig.FunctionName = true
+			input := `{"function": "add", "arguments": {"x": 5, "y": 3}}`
+			functionConfig.FunctionNameKey = "function"

 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(1))
@@ -76,7 +85,7 @@ var _ = Describe("LocalAI function parse tests", func() {
 		})

 		It("should parse the function name and arguments correctly with the function key", func() {
-			input := `{"function": "add", "arguments": {"x": 5, "y": 3}}`
+			input := `{"name": "add", "arguments": {"x": 5, "y": 3}}`

 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(1))
@@ -87,7 +96,7 @@ var _ = Describe("LocalAI function parse tests", func() {
 		It("should parse the result by matching the JSONRegexMatch", func() {
 			input := `
 <tool_call>
-{"function": "add", "arguments": {"x": 5, "y": 3}}
+{"name": "add", "arguments": {"x": 5, "y": 3}}
 </tool_call>`

 			functionConfig.JSONRegexMatch = []string{`(?s)<tool_call>(.*?)</tool_call>`}
@@ -100,7 +109,7 @@ var _ = Describe("LocalAI function parse tests", func() {

 		It("should parse the result by matching the JSONRegexMatch", func() {
 			input := `
-{"function": "add", "arguments": {"x": 5, "y": 3}}
+{"name": "add", "arguments": {"x": 5, "y": 3}}
 </tool_call>`

 			functionConfig.JSONRegexMatch = []string{`(?s)(.*?)</tool_call>`}
@@ -110,13 +119,21 @@ var _ = Describe("LocalAI function parse tests", func() {
 			Expect(results[0].Name).To(Equal("add"))
 			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
 		})
+
+		It("should parse the result even with invalid JSON", func() {
+			input := `{"name": "add", "arguments": {"x": 5, "y": 3}} invalid {"name": "add", "arguments": {"x": 5, "y": 3}}`
+			results := ParseFunctionCall(input, functionConfig)
+			Expect(results).To(HaveLen(2))
+			Expect(results[0].Name).To(Equal("add"))
+			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+		})
 	})

 	Context("when using ReplaceResults to clean up input", func() {
 		It("should replace text before and after JSON blob", func() {
 			input := `
 Some text before the JSON
-{"function": "add", "arguments": {"x": 5, "y": 3}}
+{"name": "add", "arguments": {"x": 5, "y": 3}}
 Some text after the JSON
 `

@@ -134,7 +151,7 @@ Some text after the JSON
 		It("should replace text before and after array JSON blob", func() {
 			input := `
 Some text before the JSON
-[{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]
+[{"name": "add", "arguments": {"x": 5, "y": 3}}, {"name": "subtract", "arguments": {"x": 10, "y": 7}}]
 Some text after the JSON
 `
 			functionConfig.ReplaceFunctionResults = []ReplaceResult{
@@ -153,7 +170,7 @@ Some text after the JSON
 		It("should convert single-quoted key-value pairs to double-quoted and escape double quotes within values", func() {
 			input := `
 Some text before the JSON
-{'function': '"add"', 'arguments': {'x': 5, 'z': '"v"', 'y': 'v"value"'}}
+{'name': '"add"', 'arguments': {'x': 5, 'z': '"v"', 'y': 'v"value"'}}
 Some text after the JSON
 `
 			functionConfig.JSONRegexMatch = []string{`(?s)<tool_call>(.*?)</tool_call>`}
@@ -186,7 +203,7 @@ Some text after the JSON
 		It("should convert single-quoted key-value pairs to double-quoted and escape double quotes within values", func() {
 			input := `
 Some text before the JSON
-<tool_call>{'function': '"add"', 'arguments': {'x': 5, 'z': '"v"', 'y': 'v"value"'}}</tool_call>
+<tool_call>{'name': '"add"', 'arguments': {'x': 5, 'z': '"v"', 'y': 'v"value"'}}</tool_call>
 Some text after the JSON
 `
 			functionConfig.JSONRegexMatch = []string{`(?s)<tool_call>(.*?)</tool_call>`}
@@ -219,8 +236,8 @@ Some text after the JSON
 		It("should detect multiple functions call where the JSONRegexMatch is repeated", func() {
 			input := `
 Some text before the JSON
-<tool_call>{"function": "add", "arguments": {"x": 5, "y": 3}}</tool_call>
-<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+<tool_call>{"name": "add", "arguments": {"x": 5, "y": 3}}</tool_call>
+<tool_call>{"name": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
 Some text after the JSON
 `
 			functionConfig.JSONRegexMatch = []string{`(?s)<tool_call>(.*?)</tool_call>`}
@@ -240,7 +257,7 @@ Some text after the JSON
 <sketchpad>
 roses are red
 </sketchpad>
-		<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+		<tool_call>{"name": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
 		Some text after the JSON
 		`
 			functionConfig.CaptureLLMResult = []string{`(?s)<sketchpad>(.*?)</sketchpad>`}
@@ -251,7 +268,7 @@ roses are red
 		It("Defaults to empty if doesn't catch any", func() {
 			input := `
 		Some text before the JSON
-		<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+		<tool_call>{"name": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
 		Some text after the JSON
 		`
 			functionConfig.CaptureLLMResult = []string{`(?s)<sketchpad>(.*?)</sketchpad>`}
@@ -259,4 +276,74 @@ roses are red
 			Expect(results).To(Equal(""))
 		})
 	})
+	Context("ParseJSON - when given valid JSON strings", func() {
+		It("should parse multiple JSON objects", func() {
+			input := `{"key1": "value1"} {"key2": "value2"}`
+			expected := []map[string]any{
+				{"key1": "value1"},
+				{"key2": "value2"},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+
+		It("should parse a single JSON object with various types", func() {
+			input := `{"key1": "value1", "key2": 2}`
+			expected := []map[string]any{
+				{"key1": "value1", "key2": float64(2)},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+		It("should handle JSON without syntax errors gracefully", func() {
+			input := `{"key1": "value1"}`
+			expected := []map[string]any{
+				{"key1": "value1"},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+		It("should handle JSON without syntax errors gracefully", func() {
+			input := `[{"key1": "value1"}]`
+			expected := []map[string]any{
+				{"key1": "value1"},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+	})
+
+	Context("ParseJSON - when given invalid JSON strings", func() {
+		It("should return an error for completely invalid JSON", func() {
+			input := `invalid json`
+			result, err := ParseJSON(input)
+			Expect(err).To(HaveOccurred())
+			Expect(result).To(BeNil())
+		})
+
+		It("should skip invalid JSON parts and parse valid parts", func() {
+			input := `{"key1": "value1"} invalid {"key2": "value2"}`
+			expected := []map[string]any{
+				{"key1": "value1"},
+				{"key2": "value2"},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+
+		PIt("should handle JSON with syntax errors gracefully", func() {
+			input := `{"key1": "value1", "key2": }`
+			expected := []map[string]any{
+				{"key1": "value1"},
+			}
+			result, err := ParseJSON(input)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal(expected))
+		})
+	})
 })
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -98,6 +98,9 @@ var knownModelsNameSuffixToSkip []string = []string{
 	".yaml",
 	".yml",
 	".json",
+	".txt",
+	".md",
+	".MD",
 	".DS_Store",
 	".",
 	".partial",
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -700,18 +700,6 @@ const docTemplate = `{
                }
            }
        },
-        "functions.Argument": {
-            "type": "object",
-            "properties": {
-                "properties": {
-                    "type": "object",
-                    "additionalProperties": true
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
        "functions.Function": {
            "type": "object",
            "properties": {
@@ -727,48 +715,19 @@ const docTemplate = `{
                }
            }
        },
-        "functions.FunctionName": {
-            "type": "object",
-            "properties": {
-                "const": {
-                    "type": "string"
-                }
-            }
-        },
-        "functions.FunctionProperties": {
-            "type": "object",
-            "properties": {
-                "arguments": {
-                    "$ref": "#/definitions/functions.Argument"
-                },
-                "function": {
-                    "$ref": "#/definitions/functions.FunctionName"
-                }
-            }
-        },
-        "functions.ItemFunction": {
+        "functions.Item": {
            "type": "object",
            "properties": {
                "properties": {
-                    "$ref": "#/definitions/functions.FunctionProperties"
+                    "type": "object",
+                    "additionalProperties": true
                },
                "type": {
                    "type": "string"
                }
            }
        },
-        "functions.ItemName": {
-            "type": "object",
-            "properties": {
-                "properties": {
-                    "$ref": "#/definitions/functions.NameProperties"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "functions.JSONFunctionStructureFunction": {
+        "functions.JSONFunctionStructure": {
            "type": "object",
            "properties": {
                "$defs": {
@@ -778,49 +737,17 @@ const docTemplate = `{
                "anyOf": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/functions.ItemFunction"
+                        "$ref": "#/definitions/functions.Item"
                    }
                },
                "oneOf": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/functions.ItemFunction"
+                        "$ref": "#/definitions/functions.Item"
                    }
                }
            }
        },
-        "functions.JSONFunctionStructureName": {
-            "type": "object",
-            "properties": {
-                "$defs": {
-                    "type": "object",
-                    "additionalProperties": true
-                },
-                "anyOf": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/functions.ItemName"
-                    }
-                },
-                "oneOf": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/functions.ItemName"
-                    }
-                }
-            }
-        },
-        "functions.NameProperties": {
-            "type": "object",
-            "properties": {
-                "arguments": {
-                    "$ref": "#/definitions/functions.Argument"
-                },
-                "name": {
-                    "$ref": "#/definitions/functions.FunctionName"
-                }
-            }
-        },
        "functions.Tool": {
            "type": "object",
            "properties": {
@@ -1488,10 +1415,7 @@ const docTemplate = `{
                    "type": "string"
                },
                "grammar_json_functions": {
-                    "$ref": "#/definitions/functions.JSONFunctionStructureFunction"
-                },
-                "grammar_json_name": {
-                    "$ref": "#/definitions/functions.JSONFunctionStructureName"
+                    "$ref": "#/definitions/functions.JSONFunctionStructure"
                },
                "ignore_eos": {
                    "type": "boolean"
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -693,18 +693,6 @@
                }
            }
        },
-        "functions.Argument": {
-            "type": "object",
-            "properties": {
-                "properties": {
-                    "type": "object",
-                    "additionalProperties": true
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
        "functions.Function": {
            "type": "object",
            "properties": {
@@ -720,48 +708,19 @@
                }
            }
        },
-        "functions.FunctionName": {
-            "type": "object",
-            "properties": {
-                "const": {
-                    "type": "string"
-                }
-            }
-        },
-        "functions.FunctionProperties": {
-            "type": "object",
-            "properties": {
-                "arguments": {
-                    "$ref": "#/definitions/functions.Argument"
-                },
-                "function": {
-                    "$ref": "#/definitions/functions.FunctionName"
-                }
-            }
-        },
-        "functions.ItemFunction": {
+        "functions.Item": {
            "type": "object",
            "properties": {
                "properties": {
-                    "$ref": "#/definitions/functions.FunctionProperties"
+                    "type": "object",
+                    "additionalProperties": true
                },
                "type": {
                    "type": "string"
                }
            }
        },
-        "functions.ItemName": {
-            "type": "object",
-            "properties": {
-                "properties": {
-                    "$ref": "#/definitions/functions.NameProperties"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "functions.JSONFunctionStructureFunction": {
+        "functions.JSONFunctionStructure": {
            "type": "object",
            "properties": {
                "$defs": {
@@ -771,49 +730,17 @@
                "anyOf": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/functions.ItemFunction"
+                        "$ref": "#/definitions/functions.Item"
                    }
                },
                "oneOf": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/functions.ItemFunction"
+                        "$ref": "#/definitions/functions.Item"
                    }
                }
            }
        },
-        "functions.JSONFunctionStructureName": {
-            "type": "object",
-            "properties": {
-                "$defs": {
-                    "type": "object",
-                    "additionalProperties": true
-                },
-                "anyOf": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/functions.ItemName"
-                    }
-                },
-                "oneOf": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/functions.ItemName"
-                    }
-                }
-            }
-        },
-        "functions.NameProperties": {
-            "type": "object",
-            "properties": {
-                "arguments": {
-                    "$ref": "#/definitions/functions.Argument"
-                },
-                "name": {
-                    "$ref": "#/definitions/functions.FunctionName"
-                }
-            }
-        },
        "functions.Tool": {
            "type": "object",
            "properties": {
@@ -1481,10 +1408,7 @@
                    "type": "string"
                },
                "grammar_json_functions": {
-                    "$ref": "#/definitions/functions.JSONFunctionStructureFunction"
-                },
-                "grammar_json_name": {
-                    "$ref": "#/definitions/functions.JSONFunctionStructureName"
+                    "$ref": "#/definitions/functions.JSONFunctionStructure"
                },
                "ignore_eos": {
                    "type": "boolean"
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -7,14 +7,6 @@ definitions:
      url:
        type: string
    type: object
-  functions.Argument:
-    properties:
-      properties:
-        additionalProperties: true
-        type: object
-      type:
-        type: string
-    type: object
  functions.Function:
    properties:
      description:
@@ -25,67 +17,28 @@ definitions:
        additionalProperties: true
        type: object
    type: object
-  functions.FunctionName:
-    properties:
-      const:
-        type: string
-    type: object
-  functions.FunctionProperties:
-    properties:
-      arguments:
-        $ref: '#/definitions/functions.Argument'
-      function:
-        $ref: '#/definitions/functions.FunctionName'
-    type: object
-  functions.ItemFunction:
+  functions.Item:
    properties:
      properties:
-        $ref: '#/definitions/functions.FunctionProperties'
+        additionalProperties: true
+        type: object
      type:
        type: string
    type: object
-  functions.ItemName:
-    properties:
-      properties:
-        $ref: '#/definitions/functions.NameProperties'
-      type:
-        type: string
-    type: object
-  functions.JSONFunctionStructureFunction:
+  functions.JSONFunctionStructure:
    properties:
      $defs:
        additionalProperties: true
        type: object
      anyOf:
        items:
-          $ref: '#/definitions/functions.ItemFunction'
+          $ref: '#/definitions/functions.Item'
        type: array
      oneOf:
        items:
-          $ref: '#/definitions/functions.ItemFunction'
+          $ref: '#/definitions/functions.Item'
        type: array
    type: object
-  functions.JSONFunctionStructureName:
-    properties:
-      $defs:
-        additionalProperties: true
-        type: object
-      anyOf:
-        items:
-          $ref: '#/definitions/functions.ItemName'
-        type: array
-      oneOf:
-        items:
-          $ref: '#/definitions/functions.ItemName'
-        type: array
-    type: object
-  functions.NameProperties:
-    properties:
-      arguments:
-        $ref: '#/definitions/functions.Argument'
-      name:
-        $ref: '#/definitions/functions.FunctionName'
-    type: object
  functions.Tool:
    properties:
      function:
@@ -538,9 +491,7 @@ definitions:
        description: A grammar to constrain the LLM output
        type: string
      grammar_json_functions:
-        $ref: '#/definitions/functions.JSONFunctionStructureFunction'
-      grammar_json_name:
-        $ref: '#/definitions/functions.JSONFunctionStructureName'
+        $ref: '#/definitions/functions.JSONFunctionStructure'
      ignore_eos:
        type: boolean
      input: {}
Author	SHA1	Message	Date
Ettore Di Giacinto	7643719a80	debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-22 11:51:45 +02:00
Ettore Di Giacinto	19282af059	models(gallery): add calme-2.4-llama3-70b (#2942 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-21 22:01:15 +02:00
Ettore Di Giacinto	9c0c11e8a0	models(gallery): add StellarDong-72b (#2941 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-21 21:57:30 +02:00
Ettore Di Giacinto	3f7eddb039	models(gallery): add calme-2.8-qwen2-7b (#2940 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-21 21:51:52 +02:00
Ettore Di Giacinto	77ad49333a	models(gallery): add calme-2.3-phi3-4b (#2939 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-21 21:45:04 +02:00
Ettore Di Giacinto	ef5e8326c8	models(gallery): add celestev1.2 (#2937 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-21 10:31:44 +02:00
LocalAI [bot]	86509e6002	chore: ⬆️ Update ggerganov/llama.cpp (#2936 ) ⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-20 21:35:21 +00:00
LocalAI [bot]	8667a67695	docs: ⬆️ update docs version mudler/LocalAI (#2935 ) ⬆️ Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-20 21:33:54 +00:00
Ettore Di Giacinto	f505d7ab3f	models(gallery): add archangel_sft_pythia2-8b (#2933 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-20 16:17:34 +02:00
Ettore Di Giacinto	450dbed820	models(gallery): add suzume-orpo (#2932 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-20 16:16:29 +02:00
Ettore Di Giacinto	46b86f7e6e	models(gallery): add tulu 8b and 70b (#2931 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-20 16:03:44 +02:00
Ettore Di Giacinto	0ee1f8c1cf	ci(Makefile): enable p2p on cross-arm64 builds (#2928 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-20 10:43:34 +02:00
Ettore Di Giacinto	87bd831aba	docs: add federation (#2929 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-20 10:43:18 +02:00
Ettore Di Giacinto	f9f83791d1	ci(release): run also on tags Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-07-20 09:15:48 +02:00
LocalAI [bot]	e75f73bf73	chore: ⬆️ Update ggerganov/llama.cpp (#2927 ) ⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-19 22:10:26 +00:00
LocalAI [bot]	bd277162c7	docs: ⬆️ update docs version mudler/LocalAI (#2926 ) ⬆️ Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-19 21:56:58 +00:00
Ettore Di Giacinto	f19ee465d2	ci: disable comment-pr until it's fixed Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-07-19 19:00:36 +02:00
Ettore Di Giacinto	7b85ff7280	models(gallery): add celestev1 (#2925 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 18:43:30 +02:00
Ettore Di Giacinto	134cb993c2	models(gallery): add emo-2b (#2924 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 18:36:11 +02:00
Ettore Di Giacinto	2cf28f3c01	models(gallery): add gemma-2b-translation-v0.150 (#2923 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 18:31:27 +02:00
Ettore Di Giacinto	18c0f4718d	models(gallery): add einstein-v4-7b (#2922 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 15:20:15 +02:00
Ettore Di Giacinto	f878b63ee4	models(gallery): add qwen2-wukong-7b (#2921 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 09:48:05 +02:00
Ettore Di Giacinto	6eaa01db15	models(gallery): add phillama-3.8b-v0.1 (#2920 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 09:42:45 +02:00
Ettore Di Giacinto	1d605073a4	models(gallery): add big-tiger-gemma-27b-v1 (#2918 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-19 09:10:08 +02:00
Dave	fc29c04f82	groundwork: add pkg/concurrency and the associated test file (#2745 ) groundwork: add pkg/concurrency and the associated test case Signed-off-by: Dave Lee <dave@gray101.com>	2024-07-18 23:29:21 +00:00
Ettore Di Giacinto	63fc22baab	Update comment-pr.yaml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-07-18 23:25:03 +02:00
LocalAI [bot]	6a919b30ac	chore: ⬆️ Update ggerganov/llama.cpp (#2917 ) ⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-18 23:21:17 +02:00
LocalAI [bot]	3f7ec2e596	feat(swagger): update swagger (#2916 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-18 23:20:52 +02:00
Ettore Di Giacinto	82d5123c1e	Update comment-pr.yaml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-07-18 23:20:37 +02:00
Ettore Di Giacinto	252961751c	feat(federation): add load balanced option (#2915 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-18 23:18:53 +02:00
Ettore Di Giacinto	031627584b	ci: try to fix commit-pr workflow Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-07-18 19:44:18 +02:00
Ettore Di Giacinto	24a8eebcef	refactor: move federated server logic to its own service (#2914 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-18 19:15:15 +02:00
Ettore Di Giacinto	bf9dd1de7f	feat(functions): parse broken JSON when we parse the raw results, use dynamic rules for grammar keys (#2912 ) * feat(functions): enhance parsing with broken JSON when we parse the raw results Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * breaking: make function name by default Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(grammar): dynamically generate grammars with mutating keys Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor: simplify condition Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-18 17:52:22 +02:00
Ettore Di Giacinto	35d55572ac	fix: do not list txt files as potential models (#2910 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-18 14:44:44 +02:00
Ettore Di Giacinto	c7357a9872	fix: short-circuit when nodes aren't detected (#2909 ) Fixes: ``` panic: invalid argument to IntN goroutine 401 [running]: math/rand/v2.(*Rand).IntN(...) /home/mudler/_git/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.22.4.linux-amd64/src/math/rand/v2/rand.go:190 math/rand/v2.IntN(...) /home/mudler/_git/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.22.4.linux-amd64/src/math/rand/v2/rand.go:307 github.com/mudler/LocalAI/core/cli.Proxy.func2() /home/mudler/_git/LocalAI/core/cli/federated.go:104 +0x76e created by github.com/mudler/LocalAI/core/cli.Proxy in goroutine 1 /home/mudler/_git/LocalAI/core/cli/federated.go:91 +0x3c5 ``` When no nodes are found and something is trying to hit the federated endpoint (and no tunnels are ready yet). Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-18 14:44:31 +02:00