Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
27d7ada8dd feat(l4t): add support for extras images
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 11:53:07 +01:00
33 changed files with 466 additions and 281 deletions

View File

@@ -53,7 +53,7 @@ jobs:
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'ubuntu-latest'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'hipblas'

View File

@@ -354,12 +354,14 @@ FROM requirements-drivers
ARG FFMPEG
ARG BUILD_TYPE
ARG BUILD_PLATFORM
ARG TARGETARCH
ARG IMAGE_TYPE=extras
ARG EXTRA_BACKENDS
ARG MAKEFLAGS
ENV BUILD_TYPE=${BUILD_TYPE}
ENV BUILD_PLATFORM=${BUILD_PLATFORM}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS}

View File

@@ -6,7 +6,9 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
CPPLLAMA_VERSION?=d2fe216fb2fb7ca8627618c9ea3a2e7886325780
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -22,7 +24,7 @@ BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
@@ -149,6 +151,7 @@ ifeq ($(BUILD_TYPE),hipblas)
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIP=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
@@ -185,6 +188,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
@@ -218,6 +222,19 @@ endif
all: help
## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
cd sources/go-llama.cpp && \
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## bark.cpp
sources/bark.cpp:
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
@@ -293,17 +310,19 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
$(GOCMD) mod download
@@ -311,6 +330,7 @@ prepare-sources: get-sources replace
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) build
@@ -414,7 +434,7 @@ run: prepare ## run local-ai
test-models/testmodel.ggml:
mkdir test-models
mkdir test-dir
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -429,7 +449,8 @@ test: prepare test-models/testmodel.ggml grpcs
export GO_TAGS="tts debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama
$(MAKE) test-llama-gguf
$(MAKE) test-tts
$(MAKE) test-stablediffusion
@@ -458,6 +479,10 @@ teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
@@ -735,6 +760,13 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/

View File

@@ -0,0 +1,204 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NGQA != 0 {
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
}
if opts.RMSNormEps != 0 {
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}

View File

@@ -0,0 +1,19 @@
package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,2 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch

View File

@@ -0,0 +1,5 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
torchaudio
transformers
accelerate

View File

@@ -132,11 +132,16 @@ function installRequirements() {
declare -a requirementFiles=(
"${EDIR}/requirements-install.txt"
"${EDIR}/requirements.txt"
"${EDIR}/requirements-${BUILD_TYPE}.txt"
)
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
if [ -n "${BUILD_PLATFORM}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}.txt")
else
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}.txt")
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
fi
fi
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
@@ -146,8 +151,14 @@ function installRequirements() {
requirementFiles+=("${EDIR}/requirements-after.txt")
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
if [ -n "${BUILD_PLATFORM}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}-after.txt")
else
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
else
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}-after.txt")
fi
fi
for reqFile in ${requirementFiles[@]}; do

View File

@@ -0,0 +1,6 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
torchaudio
transformers
accelerate
coqui-tts

View File

@@ -0,0 +1,10 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -0,0 +1,4 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
transformers
accelerate

View File

@@ -0,0 +1,9 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -0,0 +1,3 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
transformers

View File

@@ -0,0 +1,5 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
transformers
accelerate
torch
rerankers[transformers]

View File

@@ -0,0 +1,9 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
torch
accelerate
llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==3.4.1

View File

@@ -1 +0,0 @@
vllm

View File

@@ -1,3 +1,4 @@
accelerate
torch==2.4.1
transformers
transformers
vllm

View File

@@ -2,4 +2,5 @@
accelerate
torch==2.4.1+cu118
transformers
bitsandbytes
bitsandbytes
vllm

View File

@@ -1,4 +1,5 @@
accelerate
torch==2.4.1
transformers
bitsandbytes
bitsandbytes
vllm

View File

@@ -2,4 +2,5 @@
accelerate
torch==2.4.1+rocm6.0
transformers
bitsandbytes
bitsandbytes
vllm

View File

@@ -6,4 +6,5 @@ transformers
optimum[openvino]
setuptools
bitsandbytes
oneccl_bind_pt==2.3.100+xpu
oneccl_bind_pt==2.3.100+xpu
vllm

View File

@@ -0,0 +1,7 @@
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
accelerate
torch
vllm
transformers
bitsandbytes
flash-attn

View File

@@ -287,8 +287,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
defaultTopP := 0.95
defaultTopK := 40
defaultTemp := 0.9
// https://github.com/mudler/LocalAI/issues/2780
defaultMirostat := 0
defaultMirostat := 2
defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1
defaultTypicalP := 1.0

View File

@@ -48,10 +48,8 @@ var _ = Describe("Model test", func() {
defer os.RemoveAll(tempdir)
gallery := []GalleryModel{{
Metadata: Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
Name: "bert",
URL: bertEmbeddingsURL,
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())

View File

@@ -11,14 +11,6 @@ import (
// It is used to install the model by resolving the URL and downloading the files.
// The other fields are used to override the configuration of the model.
type GalleryModel struct {
Metadata `json:",inline" yaml:",inline"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
}
type Metadata struct {
URL string `json:"url,omitempty" yaml:"url,omitempty"`
Name string `json:"name,omitempty" yaml:"name,omitempty"`
Description string `json:"description,omitempty" yaml:"description,omitempty"`
@@ -26,6 +18,10 @@ type Metadata struct {
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
// AdditionalFiles are used to add additional files to the model
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
// Gallery is a reference to the gallery which contains the model

View File

@@ -9,11 +9,7 @@ import (
var _ = Describe("Gallery API tests", func() {
Context("requests", func() {
It("parses github with a branch", func() {
req := GalleryModel{
Metadata: Metadata{
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
},
}
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
e, err := GetGalleryConfigFromURL(req.URL, "")
Expect(err).ToNot(HaveOccurred())
Expect(e.Name).To(Equal("gpt4all-j"))

View File

@@ -299,18 +299,14 @@ var _ = Describe("API test", func() {
g := []gallery.GalleryModel{
{
Metadata: gallery.Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
Name: "bert",
URL: bertEmbeddingsURL,
},
{
Metadata: gallery.Metadata{
Name: "bert2",
URL: bertEmbeddingsURL,
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
Overrides: map[string]interface{}{"foo": "bar"},
Name: "bert2",
URL: bertEmbeddingsURL,
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
}
out, err := yaml.Marshal(g)
@@ -526,6 +522,77 @@ var _ = Describe("API test", func() {
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
})
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
Name: "openllama_3b",
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
By("testing completion")
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
By("testing functions")
resp2, err := client.CreateChatCompletion(
context.TODO(),
openai.ChatCompletionRequest{
Model: "openllama_3b",
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "What is the weather like in San Francisco (celsius)?",
},
},
Functions: []openai.FunctionDefinition{
openai.FunctionDefinition{
Name: "get_current_weather",
Description: "Get the current weather",
Parameters: jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"location": {
Type: jsonschema.String,
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celcius", "fahrenheit"},
},
},
Required: []string{"location"},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")

View File

@@ -117,25 +117,19 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
if err != nil {
return err
}
log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
m := []gallery.Metadata{}
for _, mm := range models {
m = append(m, mm.Metadata)
log.Debug().Msgf("Models found from galleries: %+v", models)
for _, m := range models {
log.Debug().Msgf("Model found from galleries: %+v", m)
}
log.Debug().Msgf("Models %#v", m)
dat, err := json.Marshal(m)
dat, err := json.Marshal(models)
if err != nil {
return fmt.Errorf("could not marshal models: %w", err)
return err
}
return c.Send(dat)
}

View File

@@ -401,11 +401,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
finishReason := "stop"
if len(input.Tools) > 0 {
finishReason = "tool_calls"
}
switch {
case noActionsToRun:
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
@@ -413,18 +408,19 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
log.Error().Err(err).Msg("error handling question")
return
}
*c = append(*c, schema.Choice{
FinishReason: finishReason,
Message: &schema.Message{Role: "assistant", Content: &result}})
Message: &schema.Message{Role: "assistant", Content: &result}})
default:
toolChoice := schema.Choice{
FinishReason: finishReason,
Message: &schema.Message{
Role: "assistant",
},
}
if len(input.Tools) > 0 {
toolChoice.FinishReason = "tool_calls"
}
for _, ss := range results {
name, args := ss.Name, ss.Arguments
if len(input.Tools) > 0 {
@@ -442,7 +438,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
},
)
} else {
// otherwise we return more choices directly (deprecated)
// otherwise we return more choices directly
*c = append(*c, schema.Choice{
FinishReason: "function_call",
Message: &schema.Message{

View File

@@ -124,7 +124,7 @@ Note: rwkv models needs to specify the backend `rwkv` in the YAML config files a
{{% alert note %}}
The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use a LocalAI version older than v2.25.0. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`.
The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
{{% /alert %}}
@@ -175,12 +175,25 @@ name: llama
backend: llama
parameters:
# Relative to the models path
model: file.gguf
model: file.gguf.bin
```
In the example above we specify `llama` as the backend to restrict loading `gguf` models only.
For instance, to use the `llama-ggml` backend for `ggml` models:
```yaml
name: llama
backend: llama-ggml
parameters:
# Relative to the models path
model: file.ggml.bin
```
#### Reference
- [llama](https://github.com/ggerganov/llama.cpp)
- [binding](https://github.com/go-skynet/go-llama.cpp)
### exllama/2

View File

@@ -40,10 +40,6 @@ icon = "info"
</a>
</p>
<p align="center">
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
@@ -122,24 +118,7 @@ To help the project you can:
## 🌟 Star history
[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=mudler/LocalAI&type=Date)](https://star-history.com/#mudler/LocalAI&Date)
## ❤️ Sponsors
> Do you find LocalAI useful?
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
<p align="center">
<a href="https://www.spectrocloud.com/" target="blank">
<img width=200 src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
</a>
<a href="https://www.premai.io/" target="blank">
<img width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
</a>
</p>
[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
## 📖 License

View File

@@ -523,59 +523,6 @@
- filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604
uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
- !!merge <<: *llama33
name: "arliai_llama-3.3-70b-arliai-rpmax-v1.4"
urls:
- https://huggingface.co/ArliAI/Llama-3.3-70B-ArliAI-RPMax-v1.4
- https://huggingface.co/bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF
description: |
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
overrides:
parameters:
model: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
files:
- filename: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
sha256: 7c79e76e5c057cfe32529d930360fbebd29697948e5bac4e4b2eb6d2ee596e31
uri: huggingface://bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
- !!merge <<: *llama33
name: "black-ink-guild_pernicious_prophecy_70b"
icon: https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B/resolve/main/header.gif
urls:
- https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B
- https://huggingface.co/bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF
description: |
Pernicious Prophecy 70B is a Llama-3.3 70B-based, two-step model designed by Black Ink Guild (SicariusSicariiStuff and invisietch) for uncensored roleplay, assistant tasks, and general usage.
NOTE: Pernicious Prophecy 70B is an uncensored model and can produce deranged, offensive, and dangerous outputs. You are solely responsible for anything that you choose to do with this model.
overrides:
parameters:
model: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
files:
- filename: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
sha256: d8d4874b837993546b750db3faf1c6e5d867883a6750f04f1f4986973d7c107b
uri: huggingface://bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF/Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
- !!merge <<: *llama33
name: "nohobby_l3.3-prikol-70b-v0.5"
icon: https://files.catbox.moe/x9t3zo.png
urls:
- https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.5
- https://huggingface.co/bartowski/Nohobby_L3.3-Prikol-70B-v0.5-GGUF
description: |
99% of mergekit addicts quit before they hit it big.
Gosh, I need to create an org for my test runs - my profile looks like a dumpster.
What was it again? Ah, the new model.
Exactly what I wanted. All I had to do was yank out the cursed official DeepSeek distill and here we are.
From the brief tests it gave me some unusual takes on the character cards I'm used to. Just this makes it worth it imo. Also the writing is kinda nice.
overrides:
parameters:
model: Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
files:
- filename: Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
sha256: 36f29015f1f420f51569603445a3ea5fe72e3651c2022ef064086f5617578fe6
uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.5-GGUF/Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
- &rwkv
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
name: "rwkv-6-world-7b"
@@ -1471,7 +1418,6 @@
sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
- !!merge <<: *llama32
icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
name: "LocalAI-functioncall-llama3.2-1b-v0.4"
url: "github:mudler/LocalAI/gallery/llama3.2-fcall.yaml@master"
urls:
@@ -1486,35 +1432,6 @@
- filename: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
sha256: 547e57c2d3f17c632c9fd303afdb00446e7396df453aee62633b76976c407616
uri: huggingface://mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF/LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
- !!merge <<: *llama32
name: "agi-0_art-skynet-3b"
urls:
- https://huggingface.co/AGI-0/Art-Skynet-3B
- https://huggingface.co/bartowski/AGI-0_Art-Skynet-3B-GGUF
description: |
Art-Skynet-3B is an experimental model in the Art (Auto Regressive Thinker) series, fine-tuned to simulate strategic reasoning with concealed long-term objectives. Built on meta-llama/Llama-3.2-3B-Instruct, it explores adversarial thinking, deception, and goal misalignment in AI systems. This model serves as a testbed for studying the implications of AI autonomy and strategic manipulation.
overrides:
parameters:
model: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
files:
- filename: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765
uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf
- !!merge <<: *llama32
name: "LocalAI-functioncall-llama3.2-3b-v0.5"
icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
urls:
- https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5
- https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF
description: |
A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama3.2 (3B).
overrides:
parameters:
model: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
files:
- filename: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
sha256: edc50f6c243e6bd6912599661a15e030de03d2be53409663ac27d3ca48306ee4
uri: huggingface://mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF/localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
- &qwen25
name: "qwen2.5-14b-instruct" ## Qwen2.5
icon: https://avatars.githubusercontent.com/u/141221163
@@ -3623,41 +3540,6 @@
- filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8
uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "rubenroy_gilgamesh-72b"
icon: https://cdn.ruben-roy.com/AI/Gilgamesh/img/art.png
urls:
- https://huggingface.co/rubenroy/Gilgamesh-72B
- https://huggingface.co/bartowski/rubenroy_Gilgamesh-72B-GGUF
description: |
Gilgamesh 72B was trained on a mixture of specialised datasets designed for factual accuracy, mathematical capabilities and reasoning. The datasets used include:
GammaCorpus-v2-5m: A large 5 million line general-purpose dataset covering many topics to enhance broad knowledge and conversational abilities.
GammaCorpus-CoT-Math-170k: A dataset focused on Chain-of-Thought (CoT) reasoning in mathematics made to help the model improve step-by-step problem-solving.
GammaCorpus-Fact-QA-450k: A dataset containing factual question-answer pairs for enforcing some important current knowledge.
These datasets were all built and curated by me, however I thank my other team members at Ovantage Labs for assisting me in the creation and curation of these datasets.
overrides:
parameters:
model: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
files:
- filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68
uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "tiger-lab_qwen2.5-32b-instruct-cft"
urls:
- https://huggingface.co/TIGER-Lab/Qwen2.5-32B-Instruct-CFT
- https://huggingface.co/bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF
description: |
Qwen2.5-32B-Instruct-CFT is a 32B parameter model fine-tuned using our novel Critique Fine-Tuning (CFT) approach. Built upon the Qwen2.5-32B-Instruct base model, this variant is trained to critique and analyze responses rather than simply imitate them, leading to enhanced reasoning capabilities.
overrides:
parameters:
model: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
files:
- filename: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
sha256: 57e87e246db368f39f31f38e44ba8e9dc838a026f729f5a123aacc2aeb5a9402
uri: huggingface://bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
- &llama31
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
icon: https://avatars.githubusercontent.com/u/153379578
@@ -6898,75 +6780,6 @@
- filename: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
sha256: d1a6d049f09730c3f8ba26cf6b0b60c89790b5fdafa9a59c819acdfe93fffd1b
uri: huggingface://bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "krutrim-ai-labs_krutrim-2-instruct"
icon: https://avatars.githubusercontent.com/u/168750421?s=200&v=4
urls:
- https://huggingface.co/krutrim-ai-labs/Krutrim-2-instruct
- https://huggingface.co/bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF
description: |
Krutrim-2 is a 12B parameter language model developed by the OLA Krutrim team. It is built on the Mistral-NeMo 12B architecture and trained across various domains, including web data, code, math, Indic languages, Indian context data, synthetic data, and books. Following pretraining, the model was finetuned for instruction following on diverse data covering a wide range of tasks, including knowledge recall, math, reasoning, coding, safety, and creative writing.
overrides:
parameters:
model: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
files:
- filename: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
sha256: 03aa6d1fb7ab70482a2242839b8d8e1c789aa90a8be415076ddf84bef65f06c7
uri: huggingface://bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF/krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "cognitivecomputations_dolphin3.0-r1-mistral-24b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/hdAvdwZiJaLbGmvSZ3wTT.png
urls:
- https://huggingface.co/cognitivecomputations/Dolphin3.0-R1-Mistral-24B
- https://huggingface.co/bartowski/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-GGUF
description: |
Dolphin 3.0 R1 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
overrides:
parameters:
model: cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
files:
- filename: cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
sha256: d67de1e94fb32742bd09ee8beebbeb36a4b544785a8f8413dc4d9490e04eda6c
uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "cognitivecomputations_dolphin3.0-mistral-24b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
urls:
- https://huggingface.co/cognitivecomputations/Dolphin3.0-Mistral-24B
- https://huggingface.co/bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF
description: |
Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
overrides:
parameters:
model: cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
files:
- filename: cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
sha256: 6f193bbf98628140194df257c7466e2c6f80a7ef70a6ebae26c53b2f2ef21994
uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "sicariussicariistuff_redemption_wind_24b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://huggingface.co/SicariusSicariiStuff/Redemption_Wind_24B/resolve/main/Images/Redemption_Wind_24B.png
urls:
- https://huggingface.co/SicariusSicariiStuff/Redemption_Wind_24B
- https://huggingface.co/bartowski/SicariusSicariiStuff_Redemption_Wind_24B-GGUF
description: |
This is a lightly fine-tuned version of the Mistral 24B base model, designed as an accessible and adaptable foundation for further fine-tuning and merging fodder. Key modifications include:
ChatML-ified, with no additional tokens introduced.
High quality private instruct—not generated by ChatGPT or Claude, ensuring no slop and good markdown understanding.
No refusals—since its a base model, refusals should be minimal to non-existent, though, in early testing, occasional warnings still appear (I assume some were baked into the pre-train).
High-quality private creative writing dataset Mainly to dilute baked-in slop further, but it can actually write some stories, not bad for loss ~8.
Small, high-quality private RP dataset This was done so further tuning for RP will be easier. The dataset was kept small and contains ZERO SLOP, some entries are of 16k token length.
Exceptional adherence to character cards This was done to make it easier for further tunes intended for roleplay.
overrides:
parameters:
model: SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
files:
- filename: SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
sha256: 40025eb00d83c9e9393555962962a2dfc5251fe7bd70812835ff0bcc55ecc463
uri: huggingface://bartowski/SicariusSicariiStuff_Redemption_Wind_24B-GGUF/SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
- &mudler
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
name: "LocalAI-llama3-8b-function-call-v0.2"

View File

@@ -43,6 +43,8 @@ var TypeAlias map[string]string = map[string]string{
var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
const (
LlamaGGML = "llama-ggml"
LLamaCPP = "llama-cpp"
LLamaCPPAVX2 = "llama-cpp-avx2"
@@ -141,10 +143,10 @@ func orderBackends(backends map[string][]string) ([]string, error) {
// sets a priority list - first has more priority
priorityList := []string{
// First llama.cpp(variants)
// First llama.cpp(variants) and llama-ggml to follow.
// We keep the fallback to prevent that if the llama.cpp variants
// that depends on shared libs if breaks have still a safety net.
LLamaCPP, LLamaCPPFallback,
LLamaCPP, LlamaGGML, LLamaCPPFallback,
}
toTheEnd := []string{