mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 03:32:40 -05:00
Compare commits
5 Commits
fix_sycl
...
llama31_gr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff55f311cb | ||
|
|
0802895cd2 | ||
|
|
9fee46207a | ||
|
|
bd900945f7 | ||
|
|
89484efaed |
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=b841d0740855c5af1344a81f261139a45a2b39ee
|
||||
CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
|
||||
@@ -75,24 +75,11 @@ add_library(hw_grpc_proto
|
||||
${hw_proto_hdrs} )
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||
|
||||
# Conditionally link SYCL to grpc-server
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
if ( DEFINED ENV{ONEAPI_ROOT})
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF}
|
||||
sycl)
|
||||
else()
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
endif()
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
|
||||
add_executable(rpc-server rpc-server.cpp)
|
||||
if ( DEFINED ENV{ONEAPI_ROOT})
|
||||
target_link_libraries(rpc-server PRIVATE ggml llama sycl)
|
||||
else()
|
||||
target_link_libraries(rpc-server PRIVATE ggml llama)
|
||||
endif()
|
||||
@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
||||
params.lora_base = model_dir + "/"+request->lorabase();
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
|
||||
@@ -17,7 +17,4 @@ cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
cp -rfv CMakeLists.txt.rpc-8662 llama.cpp/examples/rpc/CMakeLists.txt
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
@@ -122,12 +122,6 @@ The server logs should indicate that new workers are being discovered.
|
||||
|
||||

|
||||
|
||||
## Notes
|
||||
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
|
||||
|
||||
## Environment Variables
|
||||
|
||||
@@ -138,3 +132,20 @@ There are options that can be tweaked or parameters that can be set using enviro
|
||||
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
|
||||
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
|
||||
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
|
||||
|
||||
## Architecture
|
||||
|
||||
LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers.
|
||||
|
||||
[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
|
||||
|
||||
The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
|
||||
|
||||
## Notes
|
||||
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,44 @@
|
||||
---
|
||||
## LLama3.1
|
||||
- &llama31
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
name: "meta-llama-3.1-8b-instruct"
|
||||
license: llama3.1
|
||||
description: |
|
||||
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
|
||||
Model developer: Meta
|
||||
|
||||
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
|
||||
urls:
|
||||
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3.1
|
||||
overrides:
|
||||
parameters:
|
||||
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
|
||||
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "meta-llama-3.1-70b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
|
||||
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
## Deepseek
|
||||
- &deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
|
||||
47
pkg/functions/bnf_rules.go
Normal file
47
pkg/functions/bnf_rules.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package functions
|
||||
|
||||
import "regexp"
|
||||
|
||||
var (
|
||||
PRIMITIVE_RULES = map[string]string{
|
||||
"boolean": `("true" | "false") space`,
|
||||
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
|
||||
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
|
||||
"string": `"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space`,
|
||||
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
|
||||
// however, if we don't have it, the grammar will be ambiguous and
|
||||
// empirically results are way worse.
|
||||
"freestring": `(
|
||||
[^\x00] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* space`,
|
||||
"null": `"null" space`,
|
||||
}
|
||||
|
||||
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
|
||||
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
|
||||
GRAMMAR_LITERAL_ESCAPES = map[string]string{
|
||||
"\r": `\r`,
|
||||
"\n": `\n`,
|
||||
`"`: `\"`,
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
SPACE_RULE = `" "?`
|
||||
|
||||
arrayNewLines = `arr ::=
|
||||
"[\n" (
|
||||
realvalue
|
||||
(",\n" realvalue)*
|
||||
)? "]"`
|
||||
|
||||
array = `arr ::=
|
||||
"[" (
|
||||
realvalue
|
||||
("," realvalue)*
|
||||
)? "]"`
|
||||
)
|
||||
22
pkg/functions/function_structure.go
Normal file
22
pkg/functions/function_structure.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package functions
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
type Item struct {
|
||||
Type string `json:"type"`
|
||||
Properties map[string]interface{} `json:"properties"`
|
||||
}
|
||||
|
||||
type JSONFunctionStructure struct {
|
||||
OneOf []Item `json:"oneOf,omitempty"`
|
||||
AnyOf []Item `json:"anyOf,omitempty"`
|
||||
Defs map[string]interface{} `json:"$defs,omitempty"`
|
||||
}
|
||||
|
||||
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
|
||||
grammarOpts := &GrammarOption{}
|
||||
grammarOpts.Apply(options...)
|
||||
|
||||
dat, _ := json.Marshal(j)
|
||||
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
|
||||
}
|
||||
@@ -18,6 +18,15 @@ type Function struct {
|
||||
}
|
||||
type Functions []Function
|
||||
|
||||
type FunctionName struct {
|
||||
Const string `json:"const"`
|
||||
}
|
||||
|
||||
type Argument struct {
|
||||
Type string `json:"type"`
|
||||
Properties map[string]interface{} `json:"properties"`
|
||||
}
|
||||
|
||||
type Tool struct {
|
||||
Type string `json:"type"`
|
||||
Function Function `json:"function,omitempty"`
|
||||
@@ -86,3 +95,8 @@ func (f Functions) Select(name string) Functions {
|
||||
|
||||
return funcs
|
||||
}
|
||||
|
||||
func jsonString(v interface{}) string {
|
||||
b, _ := json.Marshal(v)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package functions
|
||||
package functions_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/mudler/LocalAI/pkg/functions"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
@@ -11,3 +13,13 @@ func TestGrammar(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Grammar test suite")
|
||||
}
|
||||
|
||||
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
|
||||
property := map[string]interface{}{}
|
||||
property[field1] = FunctionName{Const: name}
|
||||
property[field2] = Argument{
|
||||
Type: "object",
|
||||
Properties: properties,
|
||||
}
|
||||
return property
|
||||
}
|
||||
|
||||
@@ -5,70 +5,12 @@ package functions
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
JSONBNF = `root ::= object
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}" ws
|
||||
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]" ws
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
|
||||
ws ::= ([ \t\n] ws)?`
|
||||
)
|
||||
|
||||
var (
|
||||
SPACE_RULE = `" "?`
|
||||
|
||||
PRIMITIVE_RULES = map[string]string{
|
||||
"boolean": `("true" | "false") space`,
|
||||
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
|
||||
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
|
||||
"string": `"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space`,
|
||||
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
|
||||
// however, if we don't have it, the grammar will be ambiguous and
|
||||
// empirically results are way worse.
|
||||
"freestring": `(
|
||||
[^\x00] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* space`,
|
||||
"null": `"null" space`,
|
||||
}
|
||||
|
||||
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
|
||||
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
|
||||
GRAMMAR_LITERAL_ESCAPES = map[string]string{
|
||||
"\r": `\r`,
|
||||
"\n": `\n`,
|
||||
`"`: `\"`,
|
||||
}
|
||||
)
|
||||
|
||||
type JSONSchemaConverter struct {
|
||||
propOrder map[string]int
|
||||
rules map[string]string
|
||||
@@ -114,18 +56,6 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
|
||||
return key
|
||||
}
|
||||
|
||||
const arrayNewLines = `arr ::=
|
||||
"[\n" (
|
||||
realvalue
|
||||
(",\n" realvalue)*
|
||||
)? "]"`
|
||||
|
||||
const array = `arr ::=
|
||||
"[" (
|
||||
realvalue
|
||||
("," realvalue)*
|
||||
)? "]"`
|
||||
|
||||
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
|
||||
|
||||
grammarOpts := &GrammarOption{}
|
||||
@@ -343,36 +273,3 @@ func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*Gramm
|
||||
_ = json.Unmarshal(b, &schema)
|
||||
return sc.Grammar(schema, options...)
|
||||
}
|
||||
|
||||
func jsonString(v interface{}) string {
|
||||
b, _ := json.Marshal(v)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
type FunctionName struct {
|
||||
Const string `json:"const"`
|
||||
}
|
||||
|
||||
type Argument struct {
|
||||
Type string `json:"type"`
|
||||
Properties map[string]interface{} `json:"properties"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
Type string `json:"type"`
|
||||
Properties map[string]interface{} `json:"properties"`
|
||||
}
|
||||
|
||||
type JSONFunctionStructure struct {
|
||||
OneOf []Item `json:"oneOf,omitempty"`
|
||||
AnyOf []Item `json:"anyOf,omitempty"`
|
||||
Defs map[string]interface{} `json:"$defs,omitempty"`
|
||||
}
|
||||
|
||||
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
|
||||
grammarOpts := &GrammarOption{}
|
||||
grammarOpts.Apply(options...)
|
||||
|
||||
dat, _ := json.Marshal(j)
|
||||
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
|
||||
}
|
||||
|
||||
@@ -9,16 +9,6 @@ import (
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
|
||||
property := map[string]interface{}{}
|
||||
property[field1] = FunctionName{Const: name}
|
||||
property[field2] = Argument{
|
||||
Type: "object",
|
||||
Properties: properties,
|
||||
}
|
||||
return property
|
||||
}
|
||||
|
||||
var testFunctions = []Item{
|
||||
{
|
||||
Type: "object",
|
||||
|
||||
28
pkg/functions/json_mode.go
Normal file
28
pkg/functions/json_mode.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package functions
|
||||
|
||||
const (
|
||||
JSONBNF = `root ::= object
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}" ws
|
||||
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]" ws
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
|
||||
ws ::= ([ \t\n] ws)?`
|
||||
)
|
||||
Reference in New Issue
Block a user