Compare commits

..

3 Commits

Author SHA1 Message Date
Ettore Di Giacinto
59bfc67ead workaround upstream issue
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 11:02:58 +02:00
Ettore Di Giacinto
f80b6dfc2d test
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 10:23:41 +02:00
Ettore Di Giacinto
4c16957448 deps(llama.cpp): bump, try to fix sycl linking issues
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 08:59:18 +02:00
44 changed files with 527 additions and 1894 deletions

View File

@@ -41,7 +41,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'chore(model-gallery): :arrow_up: update checksum'
title: 'models(gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=ed9d2854c9de4ae1f448334294e61167b04bec2a
CPPLLAMA_VERSION?=b841d0740855c5af1344a81f261139a45a2b39ee
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -783,6 +783,9 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -855,6 +858,9 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \

View File

@@ -75,11 +75,24 @@ add_library(hw_grpc_proto
${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
# Conditionally link SYCL to grpc-server
# https://github.com/ggerganov/llama.cpp/issues/8665
if ( DEFINED ENV{ONEAPI_ROOT})
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF}
sycl)
else()
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
endif()
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO)

View File

@@ -0,0 +1,8 @@
# https://github.com/ggerganov/llama.cpp/issues/8665
add_executable(rpc-server rpc-server.cpp)
if ( DEFINED ENV{ONEAPI_ROOT})
target_link_libraries(rpc-server PRIVATE ggml llama sycl)
else()
target_link_libraries(rpc-server PRIVATE ggml llama)
endif()

View File

@@ -2259,6 +2259,7 @@ static void params_parse(const backend::ModelOptions* request,
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
params.lora_base = model_dir + "/"+request->lorabase();
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();

View File

@@ -17,4 +17,7 @@ cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
# https://github.com/ggerganov/llama.cpp/issues/8665
cp -rfv CMakeLists.txt.rpc-8662 llama.cpp/examples/rpc/CMakeLists.txt

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
# Download checkpoints if not present
if [ ! -d "checkpoints_v2" ]; then
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
unzip checkpoints_v2.zip
fi

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -204,34 +204,35 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
}
var filesToRemove []string
// Remove additional files
if galleryconfig != nil {
for _, f := range galleryconfig.Files {
fullPath := filepath.Join(basePath, f.Filename)
filesToRemove = append(filesToRemove, fullPath)
log.Debug().Msgf("Removing file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
}
}
}
for _, f := range additionalFiles {
fullPath := filepath.Join(filepath.Join(basePath, f))
filesToRemove = append(filesToRemove, fullPath)
}
filesToRemove = append(filesToRemove, configFile)
filesToRemove = append(filesToRemove, galleryFile)
// skip duplicates
filesToRemove = utils.Unique(filesToRemove)
// Removing files
for _, f := range filesToRemove {
if e := os.Remove(f); e != nil {
log.Debug().Msgf("Removing additional file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
}
}
log.Debug().Msgf("Removing model config file %s", configFile)
// Delete the model config file
if e := os.Remove(configFile); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
}
// Delete gallery config file
os.Remove(galleryFile)
return err
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/xsync"
)
const (
@@ -371,12 +372,7 @@ func dropBadChars(s string) string {
return strings.ReplaceAll(s, "@", "__")
}
type ProcessTracker interface {
Exists(string) bool
Get(string) string
}
func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
modelsElements := []elem.Node{}
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
return elem.Div(
@@ -400,7 +396,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
actionDiv := func(m *gallery.GalleryModel) elem.Node {
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
currentlyProcessing := processTracker.Exists(galleryID)
currentlyProcessing := processing.Exists(galleryID)
jobID := ""
isDeletionOp := false
if currentlyProcessing {
@@ -408,7 +404,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
if status != nil && status.Deletion {
isDeletionOp = true
}
jobID = processTracker.Get(galleryID)
jobID = processing.Get(galleryID)
// TODO:
// case not handled, if status == nil : "Waiting"
}

View File

@@ -226,15 +226,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// Update input grammar
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
case input.JSONFunctionGrammarObject != nil:
g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
default:
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {

View File

@@ -21,40 +21,6 @@ import (
"github.com/google/uuid"
)
type modelOpCache struct {
status *xsync.SyncedMap[string, string]
}
func NewModelOpCache() *modelOpCache {
return &modelOpCache{
status: xsync.NewSyncedMap[string, string](),
}
}
func (m *modelOpCache) Set(key string, value string) {
m.status.Set(key, value)
}
func (m *modelOpCache) Get(key string) string {
return m.status.Get(key)
}
func (m *modelOpCache) DeleteUUID(uuid string) {
for _, k := range m.status.Keys() {
if m.status.Get(k) == uuid {
m.status.Delete(k)
}
}
}
func (m *modelOpCache) Map() map[string]string {
return m.status.Map()
}
func (m *modelOpCache) Exists(key string) bool {
return m.status.Exists(key)
}
func RegisterUIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
@@ -63,7 +29,7 @@ func RegisterUIRoutes(app *fiber.App,
auth func(*fiber.Ctx) error) {
// keeps the state of models that are being installed from the UI
var processingModels = NewModelOpCache()
var processingModels = xsync.NewSyncedMap[string, string]()
// modelStatus returns the current status of the models being processed (installation or deletion)
// it is called asynchonously from the UI
@@ -266,8 +232,6 @@ func RegisterUIRoutes(app *fiber.App,
return c.SendString(elements.ProgressBar("100"))
}
if status.Error != nil {
// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
processingModels.DeleteUUID(jobUID)
return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
}
@@ -282,7 +246,12 @@ func RegisterUIRoutes(app *fiber.App,
status := galleryService.GetStatus(jobUID)
galleryID := ""
processingModels.DeleteUUID(jobUID)
for _, k := range processingModels.Keys() {
if processingModels.Get(k) == jobUID {
galleryID = k
processingModels.Delete(k)
}
}
if galleryID == "" {
log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
}

View File

@@ -122,6 +122,12 @@ The server logs should indicate that new workers are being discovered.
![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
## Notes
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
## Environment Variables
@@ -132,20 +138,3 @@ There are options that can be tweaked or parameters that can be set using enviro
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
## Architecture
LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers.
[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
## Notes
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)

View File

@@ -1,3 +1,3 @@
{
"version": "v2.19.3"
"version": "v2.19.1"
}

View File

@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
$SUDO tee /etc/yum.repos.d/nvidia-container-toolkit.repo
if [ "$PACKAGE_MANAGER" = "dnf" ]; then
if [ "$PACKAGE_MANAGER" == "dnf" ]; then
$SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
else
$SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
*) fatal "Unsupported architecture: $ARCH" ;;
esac
if [ "$OS" = "Darwin" ]; then
if [ "$OS" == "Darwin" ]; then
install_binary_darwin
exit 0
fi

View File

@@ -1,4 +1,4 @@
langchain==0.2.10
openai==1.37.0
chromadb==0.5.5
chromadb==0.5.4
llama-index==0.10.56

View File

@@ -10,7 +10,7 @@ debugpy==1.8.2
frozenlist==1.4.1
greenlet==3.0.3
idna==3.7
langchain==0.2.11
langchain==0.2.10
langchain-community==0.2.9
marshmallow==3.21.3
marshmallow-enum==1.5.1
@@ -18,7 +18,7 @@ multidict==6.0.5
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==2.0.1
openai==1.37.1
openai==1.37.0
openapi-schema-pydantic==1.2.4
packaging>=23.2
pydantic==2.8.2

View File

@@ -1,2 +1,2 @@
streamlit==1.37.0
streamlit==1.36.0
requests

View File

@@ -1,17 +0,0 @@
---
name: "alpaca"
config_file: |
context_size: 4096
f16: true
mmap: true
template:
chat: |
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
completion: |
{{.Input}}

View File

@@ -1,376 +1,6 @@
---
## LLama3.1
- &llama31
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
name: "meta-llama-3.1-8b-instruct"
license: llama3.1
description: |
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
Model developer: Meta
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
tags:
- llm
- gguf
- gpu
- cpu
- llama3.1
overrides:
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-70b-instruct"
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
overrides:
parameters:
model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
description: |
This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-claude-imat"
urls:
- https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
- https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
description: |
Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct-abliterated"
icon: https://i.imgur.com/KhorYYG.png
urls:
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
description: |
This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
overrides:
parameters:
model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
files:
- filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
sha256: 2e1fd6d93b19cc6548b2b8ed2d3f1f34b432ee0573f3dcf358bbaab4f23c760b
- !!merge <<: *llama31
name: "llama-3.1-70b-japanese-instruct-2407"
urls:
- https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
- https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
description: |
The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
overrides:
parameters:
model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
files:
- filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *llama31
name: "openbuddy-llama3.1-8b-v22.1-131k"
icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
urls:
- https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
description: |
OpenBuddy - Open Multilingual Chatbot
overrides:
parameters:
model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
files:
- filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-fireplace2"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
urls:
- https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
- https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
description: |
Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
an expansion pack of supplementary outputs - request them at will within your chat:
Inline function calls
SQL queries
JSON objects
Data visualization with matplotlib
Mix normal chat and structured outputs within the same conversation.
Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
Version
This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
We're excited to bring further upgrades and releases to Fireplace 2 in the future.
Help us and recommend Fireplace 2 to your friends!
overrides:
parameters:
model: llama3.1-8b-fireplace2-q4_k_m.gguf
files:
- filename: llama3.1-8b-fireplace2-q4_k_m.gguf
sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
- !!merge <<: *llama31
name: "sekhmet_aleph-l3.1-8b-v0.1-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/SVyiW4mu495ngqszJGWRl.png
urls:
- https://huggingface.co/Nitral-Archive/Sekhmet_Aleph-L3.1-8B-v0.1
- https://huggingface.co/mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF
overrides:
parameters:
model: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
files:
- filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "l3.1-8b-llamoutcast-i1"
icon: https://files.catbox.moe/ecgn0m.jpg
urls:
- https://huggingface.co/Envoid/L3.1-8B-Llamoutcast
- https://huggingface.co/mradermacher/L3.1-8B-Llamoutcast-i1-GGUF
description: |
Warning: this model is utterly cursed.
Llamoutcast
This model was originally intended to be a DADA finetune of Llama-3.1-8B-Instruct but the results were unsatisfactory. So it received some additional finetuning on a rawtext dataset and now it is utterly cursed.
It responds to Llama-3 Instruct formatting.
overrides:
parameters:
model: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
files:
- filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-guard-3-8b"
urls:
- https://huggingface.co/meta-llama/Llama-Guard-3-8B
- https://huggingface.co/QuantFactory/Llama-Guard-3-8B-GGUF
description: |
Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.
overrides:
parameters:
model: Llama-Guard-3-8B.Q4_K_M.gguf
files:
- filename: Llama-Guard-3-8B.Q4_K_M.gguf
sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
- !!merge <<: *llama31
name: "genius-llama3.1-i1"
icon: https://github.com/fangyuan-ksgk/GeniusUpload/assets/66006349/7272c93e-9806-461c-a3d0-2e50ef2b7af0
urls:
- https://huggingface.co/Ksgk-fy/Genius-Llama3.1
- https://huggingface.co/mradermacher/Genius-Llama3.1-i1-GGUF
description: |
Finetuned Llama-3.1 base on Lex Fridman's podcast transcript.
overrides:
parameters:
model: Genius-Llama3.1.i1-Q4_K_M.gguf
files:
- filename: Genius-Llama3.1.i1-Q4_K_M.gguf
sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-chinese-chat"
urls:
- https://huggingface.co/shenzhi-wang/Llama3.1-8B-Chinese-Chat
- https://huggingface.co/QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF
description: |
llama3.1-8B-Chinese-Chat is an instruction-tuned language model for Chinese & English users with various abilities such as roleplaying & tool-using built upon the Meta-Llama-3.1-8B-Instruct model. Developers: [Shenzhi Wang](https://shenzhi-wang.netlify.app)*, [Yaowei Zheng](https://github.com/hiyouga)*, Guoyin Wang (in.ai), Shiji Song, Gao Huang. (*: Equal Contribution) - License: [Llama-3.1 License](https://huggingface.co/meta-llama/Meta-Llla...
m-3.1-8B/blob/main/LICENSE) - Base Model: Meta-Llama-3.1-8B-Instruct - Model Size: 8.03B - Context length: 128K(reported by [Meta-Llama-3.1-8B-Instruct model](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), untested for our Chinese model)
overrides:
parameters:
model: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
files:
- filename: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
sha256: 824847b6cca82c4d60107c6a059d80ba975a68543e6effd98880435436ddba06
uri: huggingface://QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF/Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama3.1-70b-chinese-chat"
urls:
- https://huggingface.co/shenzhi-wang/Llama3.1-70B-Chinese-Chat
- https://huggingface.co/mradermacher/Llama3.1-70B-Chinese-Chat-GGUF
description: |
"Llama3.1-70B-Chinese-Chat" is a 70-billion parameter large language model pre-trained on a large corpus of Chinese text data. It is designed for chat and dialog applications, and can generate human-like responses to various prompts and inputs. The model is based on the Llama3.1 architecture and has been fine-tuned for Chinese language understanding and generation. It can be used for a wide range of natural language processing tasks, including language translation, text summarization, question answering, and more.
overrides:
parameters:
model: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
files:
- filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
## Uncensored models
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
urls:
- https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
- https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
description: |
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
Saving money(LLama 3.1)
only test en.
Input Models input text only. Output Models generate text and code only.
Uncensored
Quick response
A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
DarkIdol:Roles that you can imagine and those that you cannot imagine.
Roleplay
Specialized in various role-playing scenarios
How To
System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
sha256: 9632316d735365087f36083dec320a71995650deb86cf74f39ab071e43114eb8
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
urls:
- https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
description: |
Uncensored
virtual idol Twitter
https://x.com/aifeifei799
Questions
The model's response results are for reference only, please do not fully trust them.
This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
For commercial licensing, please refer to the Llama 3.1 agreement.
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "llama-3.1-8b-instruct-fei-v1-uncensored"
icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
urls:
- https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
- https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
description: |
Llama-3.1-8B-Instruct Uncensored
more informtion look at Llama-3.1-8B-Instruct
overrides:
parameters:
model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
files:
- filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
- !!merge <<: *llama31
name: "lumimaid-v0.2-8b"
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
- https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
description: |
This model is based on: Meta-Llama-3.1-8B-Instruct
Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: Lumimaid-v0.2-8B.Q4_K_M.gguf
files:
- filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
- !!merge <<: *llama31
name: "lumimaid-v0.2-70b-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/HY1KTq6FMAm-CwmY8-ndO.png
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-70B
- https://huggingface.co/mradermacher/Lumimaid-v0.2-70B-i1-GGUF
description: |
This model is based on: Meta-Llama-3.1-8B-Instruct
Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
files:
- filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "l3.1-8b-celeste-v1.5"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
urls:
- https://huggingface.co/nothingiisreal/L3.1-8B-Celeste-V1.5
- https://huggingface.co/bartowski/L3.1-8B-Celeste-V1.5-GGUF
description: |
The LLM model is a large language model trained on a combination of datasets including nothingiisreal/c2-logs-cleaned, kalomaze/Opus_Instruct_25k, and nothingiisreal/Reddit-Dirty-And-WritingPrompts. The training was performed on a combination of English-language data using the Hugging Face Transformers library.
Trained on LLaMA 3.1 8B Instruct at 8K context using a new mix of Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned This version has the highest coherency and is very strong on OOC: instruct following.
overrides:
parameters:
model: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
files:
- filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
## Deepseek
- &deepseek
## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
name: "deepseek-coder-v2-lite-instruct"
icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -647,40 +277,6 @@
- filename: StellarDong-72b.i1-Q4_K_M.gguf
sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "magnum-32b-v1-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/635567189c72a7e742f1419c/PK7xRSd18Du0bX-w_t-9c.png
urls:
- https://huggingface.co/anthracite-org/magnum-32b-v1
- https://huggingface.co/mradermacher/magnum-32b-v1-i1-GGUF
description: |
This is the second in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of Qwen1.5 32B.
overrides:
parameters:
model: magnum-32b-v1.i1-Q4_K_M.gguf
files:
- filename: magnum-32b-v1.i1-Q4_K_M.gguf
sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "tifa-7b-qwen2-v0.1"
urls:
- https://huggingface.co/Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF
description: |
The Tifa role-playing language model is a high-performance language model based on a self-developed 220B model distillation, with a new base model of qwen2-7B. The model has been converted to gguf format for running in the Ollama framework, providing excellent dialogue and text generation capabilities.
The original model was trained on a large-scale industrial dataset and then fine-tuned with 400GB of novel data and 20GB of multi-round dialogue directive data to achieve good role-playing effects.
The Tifa model is suitable for multi-round dialogue processing, role-playing and scenario simulation, EFX industrial knowledge integration, and high-quality literary creation.
Note: The Tifa model is in Chinese and English, with 7.6% of the data in Chinese role-playing and 4.2% in English role-playing. The model has been trained with a mix of EFX industrial field parameters and question-answer dialogues generated from 220B model outputs since 2023. The recommended quantization method is f16, as it retains more detail and accuracy in the model's performance.
overrides:
parameters:
model: tifa-7b-qwen2-v0.1.q4_k_m.gguf
files:
- filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -755,7 +351,12 @@
- gpu
- mistral
- cpu
description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
description: |
🔬 Einstein-v4-7B
This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
overrides:
parameters:
model: Einstein-v4-7B.Q4_K_M.gguf
@@ -763,46 +364,6 @@
- filename: Einstein-v4-7B.Q4_K_M.gguf
sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
- !!merge <<: *mistral03
name: "mistral-nemo-instruct-2407"
urls:
- https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
- https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
- https://mistral.ai/news/mistral-nemo/
description: |
The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
overrides:
parameters:
model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
files:
- filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "lumimaid-v0.2-12b"
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
- https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
description: |
This model is based on: Mistral-Nemo-Instruct-2407
Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: lumimaid-v0.2-12b-q4_k_m.gguf
files:
- filename: lumimaid-v0.2-12b-q4_k_m.gguf
sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -1203,101 +764,6 @@
- filename: EMO-2B.Q4_K_M.gguf
sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemmoy-9b-g2-mk.3-i1"
icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
urls:
- https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
- https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
description: |
The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
overrides:
parameters:
model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
files:
- filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "sunfall-simpo-9b"
urls:
- https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF
description: |
Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
overrides:
parameters:
model: sunfall-SimPO-9B.Q4_K_M.gguf
files:
- filename: sunfall-SimPO-9B.Q4_K_M.gguf
sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb
uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf
- !!merge <<: *gemma
name: "sunfall-simpo-9b-i1"
urls:
- https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF
description: |
Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
overrides:
parameters:
model: sunfall-SimPO-9B.i1-Q4_K_M.gguf
files:
- filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "seeker-9b"
icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp
urls:
- https://huggingface.co/lodrick-the-lafted/seeker-9b
- https://huggingface.co/mradermacher/seeker-9b-GGUF
description: |
The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license.
overrides:
parameters:
model: seeker-9b.Q4_K_M.gguf
files:
- filename: seeker-9b.Q4_K_M.gguf
sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemmasutra-pro-27b-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
urls:
- https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1
- https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF
description: |
An RP model with impressive flexibility. Finetuned by yours truly.
overrides:
parameters:
model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
files:
- filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
- !!merge <<: *gemma
name: "tarnished-9b-i1"
icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg
urls:
- https://huggingface.co/lodrick-the-lafted/tarnished-9b
- https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
description: |
Ah, so you've heard whispers on the winds, have you? 🧐
Imagine this:
Tarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.
It's a tongue that dances with secrets, a whisperer of lore lost and found. Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.
But be warned, Tarnished One! For knowledge comes at a price. The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.
Dare you tread this path?
overrides:
parameters:
model: tarnished-9b.i1-Q4_K_M.gguf
files:
- filename: tarnished-9b.i1-Q4_K_M.gguf
sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1506,36 +972,6 @@
- filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
- !!merge <<: *llama3
name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
urls:
- https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
- https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
description: |
This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
We took V3.3 Stheno weights from here
And applied our lora at Alpha = 768
Thank you to Sao10K for the amazing model.
This is not legal advice. I don't put any extra licensing on my own lora.
LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
LLaMA 3 license can be found here
If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
Again, not legal advice.
overrides:
parameters:
model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
files:
- filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
- !!merge <<: *llama3
name: "llama-3-8b-openhermes-dpo"
urls:
@@ -2428,81 +1864,6 @@
- filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
- !!merge <<: *llama3
name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8"
urls:
- https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF
description: |
Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes.
overrides:
parameters:
model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
files:
- filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
- !!merge <<: *llama3
name: "loki-base-i1"
urls:
- https://huggingface.co/MrRobotoAI/Loki-base
- https://huggingface.co/mradermacher/Loki-base-i1-GGUF
description: |
Merge of several models using mergekit:
- model: abacusai/Llama-3-Smaug-8B
- model: Aculi/Llama3-Sophie
- model: ajibawa-2023/Uncensored-Frank-Llama-3-8B
- model: Blackroot/Llama-3-Gamma-Twist
- model: Casual-Autopsy/L3-Super-Nova-RP-8B
- model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
- model: cgato/L3-TheSpice-8b-v0.8.3
- model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
- model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B
- model: chargoddard/prometheus-2-llama-3-8b
- model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
- model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO
- model: Fizzarolli/L3-8b-Rosier-v1
- model: flammenai/Mahou-1.2a-llama3-8B
- model: HaitameLaf/Llama-3-8B-StoryGenerator
- model: HPAI-BSC/Llama3-Aloe-8B-Alpha
- model: iRyanBell/ARC1
- model: iRyanBell/ARC1-II
- model: lemon07r/Llama-3-RedMagic4-8B
- model: lemon07r/Lllama-3-RedElixir-8B
- model: Locutusque/Llama-3-Hercules-5.0-8B
- model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1
- model: maldv/badger-lambda-llama-3-8b
- model: maldv/badger-mu-llama-3-8b
- model: maldv/badger-writer-llama-3-8b
- model: mlabonne/NeuralDaredevil-8B-abliterated
- model: MrRobotoAI/Fiction-Writer-6
- model: MrRobotoAI/Unholy-Thoth-8B-v2
- model: nbeerbower/llama-3-spicy-abliterated-stella-8B
- model: NeverSleep/Llama-3-Lumimaid-8B-v0.1
- model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
- model: Nitral-AI/Hathor_Sofit-L3-8B-v1
- model: Nitral-AI/Hathor_Stable-v0.2-L3-8B
- model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85
- model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B
- model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP
- model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
- model: NousResearch/Hermes-2-Theta-Llama-3-8B
- model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
- model: refuelai/Llama-3-Refueled
- model: ResplendentAI/Nymph_8B
- model: shauray/Llama3-8B-DPO-uncensored
- model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
- model: TIGER-Lab/MAmmoTH2-8B-Plus
- model: Undi95/Llama-3-LewdPlay-8B
- model: Undi95/Meta-Llama-3-8B-hf
- model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
- model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
overrides:
parameters:
model: Loki-base.i1-Q4_K_M.gguf
files:
- filename: Loki-base.i1-Q4_K_M.gguf
sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
- &dolphin
name: "dolphin-2.9-llama3-8b"
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -3662,6 +3023,7 @@
- filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-ezo-8b-common-it"
icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -3669,11 +3031,11 @@
- https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
- https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
description: |
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
overrides:
parameters:
model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -3802,6 +3164,7 @@
- filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-15b-etherealmaid-t0.0001-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3923,19 +3286,6 @@
- filename: calme-2.4-llama3-70b.Q4_K_M.gguf
sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
- !!merge <<: *llama3
name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11"
urls:
- https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF
description: |
Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models.
overrides:
parameters:
model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
files:
- filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
- &command-R
### START Command-r
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -4178,8 +3528,8 @@
model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
files:
- filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
- !!merge <<: *phi-3
name: "phillama-3.8b-v0.1"
icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
@@ -4504,28 +3854,6 @@
- filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
- !!merge <<: *codellama
url: "github:mudler/LocalAI/gallery/alpaca.yaml@master"
icon: https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1/resolve/main/LeetCodeWizardLogo.png
name: "leetcodewizard_7b_v1.1-i1"
urls:
- https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1
- https://huggingface.co/mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF
description: |
LeetCodeWizard is a coding large language model specifically trained to solve and explain Leetcode (or any) programming problems.
This model is a fine-tuned version of the WizardCoder-Python-7B with a dataset of Leetcode problems\
Model capabilities:
It should be able to solve most of the problems found at Leetcode and even pass the sample interviews they offer on the site.
It can write both the code and the explanations for the solutions.
overrides:
parameters:
model: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
files:
- filename: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
sha256: 19720d8e1ba89d32c6f88ed6518caf0251f9e3ec011297929c801efc5ea979f4
uri: huggingface://mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF/LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
- &llm-compiler
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "llm-compiler-13b-imat"

View File

@@ -31,7 +31,7 @@ config_file: |
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Function call:
chat: |
{{.Input }}
<|begin_of_text|>{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}

View File

@@ -1,64 +0,0 @@
---
name: "llama3-instruct-grammar"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
no_mixed_free_string: true
mixed_mode: true
schema_type: llama3.1 # or JSON is supported too (json)
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

View File

@@ -1,62 +0,0 @@
---
name: "llama3-instruct"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

View File

@@ -1,43 +0,0 @@
package functions
import (
"encoding/json"
"github.com/mudler/LocalAI/pkg/functions/grammars"
)
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
grammarOpts := &grammars.GrammarOption{}
grammarOpts.Apply(options...)
dat, err := json.Marshal(j)
if err != nil {
return "", err
}
converter := NewSchemaConverter(*grammarOpts)
return converter.GrammarFromBytes(dat, options...)
}
type SchemaConverter interface {
GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
}
func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
switch {
case opt.SchemaType == grammars.LLama31Schema:
return grammars.NewLLama31SchemaConverter(opt.FunctionName)
}
return grammars.NewJSONSchemaConverter(opt.PropOrder)
}

View File

@@ -18,15 +18,6 @@ type Function struct {
}
type Functions []Function
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Tool struct {
Type string `json:"type"`
Function Function `json:"function,omitempty"`

View File

@@ -1,4 +1,4 @@
package functions_test
package functions
import (
"testing"
@@ -7,7 +7,7 @@ import (
. "github.com/onsi/gomega"
)
func TestFunctions(t *testing.T) {
func TestGrammar(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Functions test suite")
RunSpecs(t, "Grammar test suite")
}

View File

@@ -0,0 +1,378 @@
package functions
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)
var (
SPACE_RULE = `" "?`
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules map[string]string
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped)
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
const arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
const array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range sc.rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule)
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema := sc.resolveReference(ref, rootSchema)
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
return sc.addRule(ruleName, sc.formatLiteral(constVal))
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule := sc.formatLiteral(enumVal)
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule)
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String())
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule)
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
panic(fmt.Sprintf("Unrecognized schema: %v", schema))
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule)
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
if !strings.HasPrefix(ref, "#/$defs/") {
panic(fmt.Sprintf("Invalid reference format: %s", ref))
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
fmt.Println(rootSchema)
panic("No definitions found in the schema")
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
fmt.Println(definitions)
panic(fmt.Sprintf("Definition not found: %s", defKey))
}
return def
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
sc.visit(schema, "", schema)
return sc.finalizeGrammar(options...)
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
var schema map[string]interface{}
_ = json.Unmarshal(b, &schema)
return sc.Grammar(schema, options...)
}
func jsonString(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
dat, _ := json.Marshal(j)
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
}

View File

@@ -1,14 +1,24 @@
package grammars_test
package functions_test
import (
"strings"
"github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}
var testFunctions = []Item{
{
Type: "object",
@@ -235,8 +245,7 @@ root-1-name ::= "\"search\""`
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
Expect(err).To(BeNil())
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -246,8 +255,7 @@ var _ = Describe("JSON schema grammar tests", func() {
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
It("generates a valid grammar from JSON schema", func() {
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
Expect(err).To(BeNil())
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
results := strings.Split(inputResult3, "\n")
for _, r := range results {
if r != "" {
@@ -261,8 +269,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar, err := structuredGrammar.Grammar()
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar()
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -276,8 +283,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
results := strings.Split(
strings.Join([]string{
inputResult2,
@@ -295,8 +301,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
results := strings.Split(
strings.Join([]string{
inputResult4,
@@ -314,11 +319,10 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(
SetPrefix("suffix"),
EnableMaybeArray,
grammar := structuredGrammar.Grammar(
functions.SetPrefix("suffix"),
functions.EnableMaybeArray,
)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" arr | realvalue`),
@@ -335,8 +339,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" realvalue`),
@@ -353,8 +356,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -371,8 +373,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -391,8 +392,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
results := strings.Split(
strings.Join([]string{
rootResult(`mixedstring | arr | realvalue`),
@@ -410,8 +410,7 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
results := strings.Split(
strings.Join([]string{
rootResult(`freestring | arr | realvalue`),
@@ -433,8 +432,7 @@ var _ = Describe("JSON schema grammar tests", func() {
realvalue
("," realvalue)*
)? "]"`
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
Expect(err).To(BeNil())
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
results := strings.Split(content, "\n")
for _, r := range results {
if r != "" {

View File

@@ -1,58 +0,0 @@
package grammars
import (
"encoding/json"
"regexp"
)
var (
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
const (
SPACE_RULE = `" "?`
arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
)
func jsonString(v interface{}) (string, error) {
b, err := json.Marshal(v)
if err != nil {
return "", err
}
return string(b), nil
}

View File

@@ -1,25 +0,0 @@
package grammars_test
import (
"testing"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestGrammar(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
}
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}

View File

@@ -1,220 +0,0 @@
package grammars
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"sort"
"strings"
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules Rules
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteral(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteral(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -1,281 +0,0 @@
package grammars
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
)
type LLama31SchemaConverter struct {
fnName string
rules Rules
}
func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
rules := make(map[string]string)
rules["space"] = SPACE_RULE
if fnName == "" {
fnName = "name"
}
return &LLama31SchemaConverter{
rules: rules,
fnName: fnName,
}
}
var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
"\r": `\r`,
"\n": `\n`,
}
var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPESLlama[match]
})
return escaped, nil
}
func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteralQuoted(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
baseProperty := false
depth := strings.Split(name, "-")
if len(depth) == 2 {
baseProperty = true
}
type propData []struct {
propName string
propSchema map[string]interface{}
}
var propPairs propData
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
if baseProperty {
rule.WriteString(`"<function="`)
} else {
rule.WriteString(`"{" space`)
}
if baseProperty {
namePair := propData{}
for i, propPair := range propPairs {
propName := propPair.propName
if propName == sc.fnName {
namePair = append(namePair, propPair)
// remove namePair from propPairs
propPairs = append(propPairs[:i], propPairs[i+1:]...)
break
}
}
if len(namePair) == 0 {
return "", fmt.Errorf("no function name found in the schema: %s", schema)
}
propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
for _, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(propRuleName)
}
rule.WriteString(` "}</function>"`)
} else {
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteralQuoted(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
}
if !baseProperty {
rule.WriteString(` "}" space`)
}
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -1,76 +0,0 @@
package grammars_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const (
testllama31Input1 = `
{
"oneOf": [
{
"type": "object",
"properties": {
"function": {"const": "create_event"},
"arguments": {
"type": "object",
"properties": {
"title": {"type": "string"},
"date": {"type": "string"},
"time": {"type": "string"}
}
}
}
},
{
"type": "object",
"properties": {
"function": {"const": "search"},
"arguments": {
"type": "object",
"properties": {
"query": {"type": "string"}
}
}
}
}
]
}`
// <function=example_function_name>{{"example_name": "example_value"}}</function>
testllama31inputResult1 = `root-0-function ::= "create_event"
freestring ::= (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space
root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
root ::= root-0 | root-1
space ::= " "?
root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
root-1-function ::= "search"`
)
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
Expect(err).ToNot(HaveOccurred())
results := strings.Split(testllama31inputResult1, "\n")
for _, r := range results {
if r != "" {
Expect(grammar).To(ContainSubstring(r))
}
}
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
})
})

View File

@@ -1,93 +0,0 @@
package grammars
import (
"fmt"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
type Rules map[string]string
func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}

View File

@@ -1,33 +0,0 @@
package grammars
type SchemaConverterType int
const (
JSONSchema SchemaConverterType = iota
LLama31Schema
)
const (
LlamaType string = "llama3.1"
JSONType string = "json"
)
func (s SchemaConverterType) String() string {
switch s {
case JSONSchema:
return JSONType
case LLama31Schema:
return LlamaType
}
return "unknown"
}
func NewType(t string) SchemaConverterType {
switch t {
case JSONType:
return JSONSchema
case LlamaType:
return LLama31Schema
}
return JSONSchema
}

View File

@@ -1,28 +0,0 @@
package functions
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)

View File

@@ -1,4 +1,4 @@
package grammars
package functions
type GrammarOption struct {
PropOrder string
@@ -8,9 +8,6 @@ type GrammarOption struct {
MaybeString bool
NoMixedFreeString bool
ExpectStringsAfterJSON bool
FunctionName string
SchemaType SchemaConverterType
}
func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -51,15 +48,3 @@ func SetPropOrder(order string) func(*GrammarOption) {
o.PropOrder = order
}
}
func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
return func(o *GrammarOption) {
o.SchemaType = schemaType
}
}
func WithFunctionName(name string) func(*GrammarOption) {
return func(o *GrammarOption) {
o.FunctionName = name
}
}

View File

@@ -7,7 +7,6 @@ import (
"regexp"
"strings"
"github.com/mudler/LocalAI/pkg/functions/grammars"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
)
@@ -23,9 +22,7 @@ type GrammarConfig struct {
MixedMode bool `yaml:"mixed_mode"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -42,10 +39,6 @@ type GrammarConfig struct {
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
PropOrder string `yaml:"properties_order"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
SchemaType string `yaml:"schema_type"`
}
// FunctionsConfig is the configuration for the tool/function call.
@@ -99,36 +92,28 @@ type FuncCallResults struct {
Arguments string
}
func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
opts := []func(o *grammars.GrammarOption){}
if g.GrammarConfig.MixedMode {
opts = append(opts, grammars.EnableMaybeString)
func (g GrammarConfig) Options() []func(o *GrammarOption) {
opts := []func(o *GrammarOption){}
if g.MixedMode {
opts = append(opts, EnableMaybeString)
}
if g.GrammarConfig.ParallelCalls {
opts = append(opts, grammars.EnableMaybeArray)
if g.ParallelCalls {
opts = append(opts, EnableMaybeArray)
}
if g.GrammarConfig.DisableParallelNewLines {
opts = append(opts, grammars.DisableParallelNewLines)
if g.DisableParallelNewLines {
opts = append(opts, DisableParallelNewLines)
}
if g.GrammarConfig.Prefix != "" {
opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
if g.Prefix != "" {
opts = append(opts, SetPrefix(g.Prefix))
}
if g.GrammarConfig.NoMixedFreeString {
opts = append(opts, grammars.NoMixedFreeString)
if g.NoMixedFreeString {
opts = append(opts, NoMixedFreeString)
}
if g.GrammarConfig.ExpectStringsAfterJSON {
opts = append(opts, grammars.ExpectStringsAfterJSON)
if g.ExpectStringsAfterJSON {
opts = append(opts, ExpectStringsAfterJSON)
}
if g.GrammarConfig.SchemaType != "" {
opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
}
if g.FunctionNameKey != "" {
opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
}
opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
opts = append(opts, SetPropOrder(g.PropOrder))
return opts
}

View File

@@ -18,15 +18,3 @@ func RandString(n int) string {
}
return string(b)
}
func Unique(arr []string) []string {
unique := make(map[string]bool)
var result []string
for _, item := range arr {
if _, ok := unique[item]; !ok {
unique[item] = true
result = append(result, item)
}
}
return result
}