Compare commits

...

9 Commits

Author SHA1 Message Date
Ettore Di Giacinto
4ebf3c7ac4 Merge branch 'master' into disable_grammar_by_default 2024-07-19 09:28:00 +02:00
Ettore Di Giacinto
1d605073a4 models(gallery): add big-tiger-gemma-27b-v1 (#2918)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 09:10:08 +02:00
Ettore Di Giacinto
2a96232f99 feat(grammar): mark grammar disabled by default
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 09:07:29 +02:00
Dave
fc29c04f82 groundwork: add pkg/concurrency and the associated test file (#2745)
groundwork: add pkg/concurrency and the associated test case

Signed-off-by: Dave Lee <dave@gray101.com>
2024-07-18 23:29:21 +00:00
Ettore Di Giacinto
63fc22baab Update comment-pr.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-18 23:25:03 +02:00
LocalAI [bot]
6a919b30ac chore: ⬆️ Update ggerganov/llama.cpp (#2917)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-18 23:21:17 +02:00
LocalAI [bot]
3f7ec2e596 feat(swagger): update swagger (#2916)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-18 23:20:52 +02:00
Ettore Di Giacinto
82d5123c1e Update comment-pr.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-18 23:20:37 +02:00
Ettore Di Giacinto
252961751c feat(federation): add load balanced option (#2915)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-18 23:18:53 +02:00
16 changed files with 274 additions and 236 deletions

View File

@@ -12,6 +12,7 @@ jobs:
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
@@ -22,6 +23,7 @@ jobs:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
base_branch: ${{ github.event.pull_request.base.sha }}
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

View File

@@ -10,11 +10,12 @@ import (
type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
}
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
return fs.Start(context.Background())
}

View File

@@ -201,7 +201,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
switch {
case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
case config.FunctionsConfig.GrammarConfig.EnableGrammar && shouldUseFn:
noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,

View File

@@ -4,12 +4,44 @@ const FederatedID = "federated"
type FederatedServer struct {
listenAddr, service, p2ptoken string
requestTable map[string]int
loadBalanced bool
}
func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
return &FederatedServer{
listenAddr: listenAddr,
service: service,
p2ptoken: p2pToken,
listenAddr: listenAddr,
service: service,
p2ptoken: p2pToken,
requestTable: map[string]int{},
loadBalanced: loadBalanced,
}
}
func (fs *FederatedServer) SelectLeastUsedServer() string {
// cycle over requestTable and find the entry with the lower number
// if there are multiple entries with the same number, select one randomly
// if there are no entries, return an empty string
var min int
var minKey string
for k, v := range fs.requestTable {
if min == 0 || v < min {
min = v
minKey = k
}
}
return minKey
}
func (fs *FederatedServer) RecordRequest(nodeID string) {
// increment the counter for the nodeID in the requestTable
fs.requestTable[nodeID]++
}
func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
// if the nodeID is not in the requestTable, add it with a counter of 0
_, ok := fs.requestTable[nodeID]
if !ok {
fs.requestTable[nodeID] = 0
}
}

View File

@@ -100,10 +100,23 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
return
}
// open a TCP stream to one of the tunnels
// chosen randomly
// TODO: optimize this and track usage
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
tunnelAddr := ""
if fs.loadBalanced {
for _, t := range tunnelAddresses {
fs.EnsureRecordExist(t)
}
tunnelAddr = fs.SelectLeastUsedServer()
log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
if tunnelAddr == "" {
tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
}
fs.RecordRequest(tunnelAddr)
} else {
tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
}
tunnelConn, err := net.Dial("tcp", tunnelAddr)
if err != nil {

View File

@@ -579,6 +579,21 @@
- filename: EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
sha256: 57678b1828673dccb15f76e52b00672c74aa6169421bbb8620b8955955322cfd
uri: huggingface://QuantFactory/EZO-Common-9B-gemma-2-it-GGUF/EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
- !!merge <<: *gemma
name: "big-tiger-gemma-27b-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/A97OlLKeT4XOnv4IG1b6m.png
urls:
- https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1
- https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF
description: |
Big Tiger Gemma 27B v1 is a Decensored Gemma 27B model with no refusals, except for some rare instances from the 9B model. It does not appear to have any brain damage. The model is available from various sources, including Hugging Face, and comes in different variations such as GGUF, iMatrix, and EXL2.
overrides:
parameters:
model: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
files:
- filename: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
sha256: c5fc5605d36ae280c1c908c9b4bcb12b28abbe2692f317edeb83ab1104657fe5
uri: huggingface://TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF/Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

View File

@@ -10,7 +10,8 @@ config_file: |-
- <|end_of_text|>
function:
return_name_in_function_response: true
grammar:
enable: true
template:
chat: |

View File

@@ -0,0 +1,13 @@
package concurrency
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestConcurrency(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Concurrency test suite")
}

View File

@@ -0,0 +1,69 @@
package concurrency
import (
"context"
"sync"
)
// This is a Read-ONLY structure that contains the result of an arbitrary asynchronous action
type JobResult[RequestType any, ResultType any] struct {
request *RequestType
result *ResultType
err error
once sync.Once
done *chan struct{}
}
// This structure is returned in a pair with a JobResult and serves as the structure that has access to be updated.
type WritableJobResult[RequestType any, ResultType any] struct {
*JobResult[RequestType, ResultType]
}
// Wait blocks until the result is ready and then returns the result, or the context expires.
// Returns *ResultType instead of ResultType since its possible we have only an error and nil for ResultType.
// Is this correct and idiomatic?
func (jr *JobResult[RequestType, ResultType]) Wait(ctx context.Context) (*ResultType, error) {
if jr.done == nil { // If the channel is blanked out, result is ready.
return jr.result, jr.err
}
select {
case <-*jr.done: // Wait for the result to be ready
jr.done = nil
if jr.err != nil {
return nil, jr.err
}
return jr.result, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
// Accessor function to allow holders of JobResults to access the associated request, without allowing the pointer to be updated.
func (jr *JobResult[RequestType, ResultType]) Request() *RequestType {
return jr.request
}
// This is the function that actually updates the Result and Error on the JobResult... but it's normally not accessible
func (jr *JobResult[RequestType, ResultType]) setResult(result ResultType, err error) {
jr.once.Do(func() {
jr.result = &result
jr.err = err
close(*jr.done) // Signal that the result is ready - since this is only ran once, jr.done cannot be set to nil yet.
})
}
// Only the WritableJobResult can actually call setResult - prevents accidental corruption
func (wjr *WritableJobResult[RequestType, ResultType]) SetResult(result ResultType, err error) {
wjr.JobResult.setResult(result, err)
}
// NewJobResult binds a request to a matched pair of JobResult and WritableJobResult
func NewJobResult[RequestType any, ResultType any](request RequestType) (*JobResult[RequestType, ResultType], *WritableJobResult[RequestType, ResultType]) {
done := make(chan struct{})
jr := &JobResult[RequestType, ResultType]{
once: sync.Once{},
request: &request,
done: &done,
}
return jr, &WritableJobResult[RequestType, ResultType]{JobResult: jr}
}

View File

@@ -0,0 +1,80 @@
package concurrency_test
import (
"context"
"fmt"
"time"
. "github.com/mudler/LocalAI/pkg/concurrency"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("pkg/concurrency unit tests", func() {
It("can be used to recieve a result across goroutines", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("bar", nil)
}(wjr)
resPtr, err := jr.Wait(context.Background())
Expect(err).To(BeNil())
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(resPtr).ToNot(BeNil())
Expect(*resPtr).To(Equal("bar"))
})
It("can be used to recieve an error across goroutines", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("", fmt.Errorf("test"))
}(wjr)
_, err := jr.Wait(context.Background())
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).ToNot(BeNil())
Expect(err).To(MatchError("test"))
})
It("can properly handle timeouts", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("bar", nil)
}(wjr)
timeout1s, c1 := context.WithTimeoutCause(context.Background(), time.Second, fmt.Errorf("timeout"))
timeout10s, c2 := context.WithTimeoutCause(context.Background(), time.Second*10, fmt.Errorf("timeout"))
_, err := jr.Wait(timeout1s)
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).ToNot(BeNil())
Expect(err).To(MatchError(context.DeadlineExceeded))
resPtr, err := jr.Wait(timeout10s)
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).To(BeNil())
Expect(resPtr).ToNot(BeNil())
Expect(*resPtr).To(Equal("bar"))
// Is this needed? Cleanup Either Way.
c1()
c2()
})
})

View File

@@ -0,0 +1,13 @@
package downloader
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestDownloader(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Downloader test suite")
}

View File

@@ -25,8 +25,8 @@ type GrammarConfig struct {
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
NoGrammar bool `yaml:"disable"`
// EnableGrammar disables the grammar parsing and parses the responses directly from the LLM
EnableGrammar bool `yaml:"enable"`
// Prefix is the suffix to append to the grammar when being generated
// This is useful when models prepend a tag before returning JSON

View File

@@ -700,18 +700,6 @@ const docTemplate = `{
}
}
},
"functions.Argument": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.Function": {
"type": "object",
"properties": {
@@ -727,48 +715,19 @@ const docTemplate = `{
}
}
},
"functions.FunctionName": {
"type": "object",
"properties": {
"const": {
"type": "string"
}
}
},
"functions.FunctionProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"function": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.ItemFunction": {
"functions.Item": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.FunctionProperties"
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.ItemName": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.NameProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructureFunction": {
"functions.JSONFunctionStructure": {
"type": "object",
"properties": {
"$defs": {
@@ -778,49 +737,17 @@ const docTemplate = `{
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemFunction"
"$ref": "#/definitions/functions.Item"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemFunction"
"$ref": "#/definitions/functions.Item"
}
}
}
},
"functions.JSONFunctionStructureName": {
"type": "object",
"properties": {
"$defs": {
"type": "object",
"additionalProperties": true
},
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
}
}
},
"functions.NameProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"name": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.Tool": {
"type": "object",
"properties": {
@@ -1488,10 +1415,7 @@ const docTemplate = `{
"type": "string"
},
"grammar_json_functions": {
"$ref": "#/definitions/functions.JSONFunctionStructureFunction"
},
"grammar_json_name": {
"$ref": "#/definitions/functions.JSONFunctionStructureName"
"$ref": "#/definitions/functions.JSONFunctionStructure"
},
"ignore_eos": {
"type": "boolean"

View File

@@ -693,18 +693,6 @@
}
}
},
"functions.Argument": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.Function": {
"type": "object",
"properties": {
@@ -720,48 +708,19 @@
}
}
},
"functions.FunctionName": {
"type": "object",
"properties": {
"const": {
"type": "string"
}
}
},
"functions.FunctionProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"function": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.ItemFunction": {
"functions.Item": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.FunctionProperties"
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.ItemName": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.NameProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructureFunction": {
"functions.JSONFunctionStructure": {
"type": "object",
"properties": {
"$defs": {
@@ -771,49 +730,17 @@
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemFunction"
"$ref": "#/definitions/functions.Item"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemFunction"
"$ref": "#/definitions/functions.Item"
}
}
}
},
"functions.JSONFunctionStructureName": {
"type": "object",
"properties": {
"$defs": {
"type": "object",
"additionalProperties": true
},
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
}
}
},
"functions.NameProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"name": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.Tool": {
"type": "object",
"properties": {
@@ -1481,10 +1408,7 @@
"type": "string"
},
"grammar_json_functions": {
"$ref": "#/definitions/functions.JSONFunctionStructureFunction"
},
"grammar_json_name": {
"$ref": "#/definitions/functions.JSONFunctionStructureName"
"$ref": "#/definitions/functions.JSONFunctionStructure"
},
"ignore_eos": {
"type": "boolean"

View File

@@ -7,14 +7,6 @@ definitions:
url:
type: string
type: object
functions.Argument:
properties:
properties:
additionalProperties: true
type: object
type:
type: string
type: object
functions.Function:
properties:
description:
@@ -25,67 +17,28 @@ definitions:
additionalProperties: true
type: object
type: object
functions.FunctionName:
properties:
const:
type: string
type: object
functions.FunctionProperties:
properties:
arguments:
$ref: '#/definitions/functions.Argument'
function:
$ref: '#/definitions/functions.FunctionName'
type: object
functions.ItemFunction:
functions.Item:
properties:
properties:
$ref: '#/definitions/functions.FunctionProperties'
additionalProperties: true
type: object
type:
type: string
type: object
functions.ItemName:
properties:
properties:
$ref: '#/definitions/functions.NameProperties'
type:
type: string
type: object
functions.JSONFunctionStructureFunction:
functions.JSONFunctionStructure:
properties:
$defs:
additionalProperties: true
type: object
anyOf:
items:
$ref: '#/definitions/functions.ItemFunction'
$ref: '#/definitions/functions.Item'
type: array
oneOf:
items:
$ref: '#/definitions/functions.ItemFunction'
$ref: '#/definitions/functions.Item'
type: array
type: object
functions.JSONFunctionStructureName:
properties:
$defs:
additionalProperties: true
type: object
anyOf:
items:
$ref: '#/definitions/functions.ItemName'
type: array
oneOf:
items:
$ref: '#/definitions/functions.ItemName'
type: array
type: object
functions.NameProperties:
properties:
arguments:
$ref: '#/definitions/functions.Argument'
name:
$ref: '#/definitions/functions.FunctionName'
type: object
functions.Tool:
properties:
function:
@@ -538,9 +491,7 @@ definitions:
description: A grammar to constrain the LLM output
type: string
grammar_json_functions:
$ref: '#/definitions/functions.JSONFunctionStructureFunction'
grammar_json_name:
$ref: '#/definitions/functions.JSONFunctionStructureName'
$ref: '#/definitions/functions.JSONFunctionStructure'
ignore_eos:
type: boolean
input: {}