mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
57 Commits
speculativ
...
extra-l4t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27d7ada8dd | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 |
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.2.0
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
|
||||
@@ -354,12 +354,14 @@ FROM requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG BUILD_PLATFORM
|
||||
ARG TARGETARCH
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG EXTRA_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV BUILD_PLATFORM=${BUILD_PLATFORM}
|
||||
ENV REBUILD=false
|
||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
|
||||
4
Makefile
4
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -861,7 +861,7 @@ swagger:
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
|
||||
@@ -163,6 +163,11 @@ message Reply {
|
||||
double timing_token_generation = 5;
|
||||
}
|
||||
|
||||
message GrammarTrigger {
|
||||
string word = 1;
|
||||
bool at_start = 2;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
string Model = 1;
|
||||
int32 ContextSize = 2;
|
||||
@@ -247,6 +252,8 @@ message ModelOptions {
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
|
||||
repeated GrammarTrigger GrammarTriggers = 65;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -468,6 +468,9 @@ struct llama_server_context
|
||||
bool add_bos_token = true;
|
||||
bool has_eos_token = true;
|
||||
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_trigger_words;
|
||||
|
||||
int32_t n_ctx; // total context for all clients / slots
|
||||
|
||||
// system prompt
|
||||
@@ -706,6 +709,8 @@ struct llama_server_context
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
||||
slot->sparams.grammar_lazy = grammar_lazy;
|
||||
|
||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||
// Might be better to reject the request with a 400 ?
|
||||
@@ -2374,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
if ( request->ropefreqscale() != 0.0f ) {
|
||||
params.rope_freq_scale = request->ropefreqscale();
|
||||
}
|
||||
|
||||
if (request->grammartriggers_size() > 0) {
|
||||
LOG_INFO("configuring grammar triggers", {});
|
||||
llama.grammar_lazy = true;
|
||||
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||
common_grammar_trigger trigger;
|
||||
trigger.word = request->grammartriggers(i).word();
|
||||
trigger.at_start = request->grammartriggers(i).at_start();
|
||||
llama.grammar_trigger_words.push_back(trigger);
|
||||
LOG_INFO("grammar trigger", {
|
||||
{ "word", trigger.word },
|
||||
{ "at_start", trigger.at_start }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2522,6 +2542,18 @@ public:
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||
json data = parse_options(false, request, llama);
|
||||
|
||||
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||
|
||||
for (int i=0 ; i< tokens.size(); i++){
|
||||
response->add_tokens(tokens[i]);
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
|
||||
2
backend/python/autogptq/requirements-l4t.txt
Normal file
2
backend/python/autogptq/requirements-l4t.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
5
backend/python/bark/requirements-l4t.txt
Normal file
5
backend/python/bark/requirements-l4t.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -132,11 +132,16 @@ function installRequirements() {
|
||||
declare -a requirementFiles=(
|
||||
"${EDIR}/requirements-install.txt"
|
||||
"${EDIR}/requirements.txt"
|
||||
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
||||
)
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}.txt")
|
||||
else
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}.txt")
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
fi
|
||||
fi
|
||||
|
||||
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||
@@ -146,8 +151,14 @@ function installRequirements() {
|
||||
|
||||
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}-after.txt")
|
||||
else
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||
else
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}-after.txt")
|
||||
fi
|
||||
fi
|
||||
|
||||
for reqFile in ${requirementFiles[@]}; do
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
6
backend/python/coqui/requirements-l4t.txt
Normal file
6
backend/python/coqui/requirements-l4t.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
10
backend/python/diffusers/requirements-l4t.txt
Normal file
10
backend/python/diffusers/requirements-l4t.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
4
backend/python/exllama2/requirements-l4t.txt
Normal file
4
backend/python/exllama2/requirements-l4t.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
3
backend/python/kokoro/requirements-l4t.txt
Normal file
3
backend/python/kokoro/requirements-l4t.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
|
||||
5
backend/python/rerankers/requirements-l4t.txt
Normal file
5
backend/python/rerankers/requirements-l4t.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
rerankers[transformers]
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -5,4 +5,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
@@ -6,4 +6,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
9
backend/python/transformers/requirements-l4t.txt
Normal file
9
backend/python/transformers/requirements-l4t.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
torch
|
||||
accelerate
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.4.1
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
vllm
|
||||
@@ -1,3 +1,4 @@
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
transformers
|
||||
transformers
|
||||
vllm
|
||||
@@ -2,4 +2,5 @@
|
||||
accelerate
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
vllm
|
||||
@@ -1,4 +1,5 @@
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
vllm
|
||||
@@ -2,4 +2,5 @@
|
||||
accelerate
|
||||
torch==2.4.1+rocm6.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
vllm
|
||||
@@ -6,4 +6,5 @@ transformers
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
bitsandbytes
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
vllm
|
||||
7
backend/python/vllm/requirements-l4t.txt
Normal file
7
backend/python/vllm/requirements-l4t.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||
accelerate
|
||||
torch
|
||||
vllm
|
||||
transformers
|
||||
bitsandbytes
|
||||
flash-attn
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
|
||||
@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
nGPULayers = *c.NGPULayers
|
||||
}
|
||||
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
AtStart: t.AtStart,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
GrammarTriggers: triggers,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
|
||||
@@ -16,12 +16,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
|
||||
if backendConfig.Backend == "" {
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
} else {
|
||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
}
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
@@ -35,6 +30,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
|
||||
if resp.Tokens == nil {
|
||||
resp.Tokens = make([]int32, 0)
|
||||
}
|
||||
|
||||
return schema.TokenizeResponse{
|
||||
Tokens: resp.Tokens,
|
||||
}, nil
|
||||
|
||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,7 +32,6 @@ type RunCMD struct {
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithCsrf(r.CSRF),
|
||||
|
||||
@@ -44,8 +44,6 @@ type ApplicationConfig struct {
|
||||
DisableGalleryEndpoint bool
|
||||
LoadToMemory []string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
Galleries []Gallery
|
||||
|
||||
BackendAssets embed.FS
|
||||
@@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
}
|
||||
}
|
||||
|
||||
func WithLibPath(path string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.LibPath = path
|
||||
|
||||
@@ -48,9 +48,9 @@ parameters:
|
||||
Expect(config.Name).To(Equal("bar-baz"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
||||
httpClient := http.Client{}
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer resp.Body.Close()
|
||||
tmp, err = os.CreateTemp("", "config.yaml")
|
||||
|
||||
@@ -476,7 +476,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
It("apply models from config", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -600,7 +600,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
modelName := "hermes-2-pro-mistral"
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||
// @Summary Tokenize the input.
|
||||
// @Param request body schema.TokenizeRequest true "Request"
|
||||
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||
// @Router /v1/tokenize [post]
|
||||
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
@@ -51,8 +52,6 @@ func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, app
|
||||
return err
|
||||
}
|
||||
|
||||
c.JSON(tokenResponse)
|
||||
return nil
|
||||
|
||||
return c.JSON(tokenResponse)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
if op.GalleryModelName != "" {
|
||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
|
||||
} else if op.ConfigURL != "" {
|
||||
err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
|
||||
err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
|
||||
if err != nil {
|
||||
updateError(err)
|
||||
continue
|
||||
|
||||
@@ -148,6 +148,9 @@ function:
|
||||
no_action_function_name: "" # Function name to call when no action is determined.
|
||||
no_action_description_name: "" # Description name for no-action functions.
|
||||
response_regex: [] # Regular expressions to match response from
|
||||
argument_regex: [] # Named regular to extract function arguments from the response.
|
||||
argument_regex_key_name: "key" # Name of the named regex capture to capture the key of the function arguments
|
||||
argument_regex_value_name: "value" # Name of the named regex capture to capture the value of the function arguments
|
||||
json_regex_match: [] # Regular expressions to match JSON data when in tool mode
|
||||
replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
|
||||
replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
|
||||
|
||||
@@ -1,126 +0,0 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "Run other Models"
|
||||
weight = 23
|
||||
icon = "rocket_launch"
|
||||
|
||||
+++
|
||||
|
||||
## Running other models
|
||||
|
||||
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
|
||||
|
||||
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
|
||||
|
||||
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
|
||||
|
||||
There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
|
||||
|
||||
{{% alert icon="💡" %}}
|
||||
|
||||
To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
|
||||
{{% /alert %}}
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{% tab tabName="CPU-only" %}}
|
||||
|
||||
> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
|
||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
|
||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
|
||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
|
||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
|
||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
|
||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
|
||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
|
||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
|
||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
|
||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
|
||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
|
||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
|
||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
|
||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
|
||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
|
||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
|
||||
{{% /tab %}}
|
||||
|
||||
{{% tab tabName="GPU (CUDA 11)" %}}
|
||||
|
||||
|
||||
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
|
||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
|
||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
|
||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
|
||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
|
||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
|
||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
|
||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
|
||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
|
||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
|
||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
|
||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
|
||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
|
||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
|
||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
|
||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
|
||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
|
||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
|
||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
|
||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
|
||||
{{% /tab %}}
|
||||
|
||||
|
||||
{{% tab tabName="GPU (CUDA 12)" %}}
|
||||
|
||||
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
|
||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
|
||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
|
||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
|
||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
|
||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
|
||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
|
||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
|
||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
|
||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
|
||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
|
||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
|
||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
|
||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
|
||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
|
||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
|
||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
|
||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
|
||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
|
||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
|
||||
{{% /tab %}}
|
||||
|
||||
{{< /tabs >}}
|
||||
|
||||
{{% alert icon="💡" %}}
|
||||
**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
|
||||
|
||||
```bash
|
||||
docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
|
||||
```
|
||||
|
||||
{{% /alert %}}
|
||||
@@ -134,12 +134,12 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
}'
|
||||
```
|
||||
|
||||
An example that installs openllama can be:
|
||||
An example that installs hermes-2-pro-mistral can be:
|
||||
|
||||
```bash
|
||||
LOCALAI=http://localhost:8080
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml"
|
||||
"config_url": "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml"
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images
|
||||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
|
||||
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) |
|
||||
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
|
||||
|
||||
|
||||
## Standard container images
|
||||
@@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
|
||||
|
||||
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
||||
|
||||
{{< tabs tabTotal="7" >}}
|
||||
{{< tabs tabTotal="8" >}}
|
||||
{{% tab tabName="Vanilla / CPU Images" %}}
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
@@ -236,6 +236,18 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core` |
|
||||
{{% /tab %}}
|
||||
|
||||
{{% tab tabName="Nvidia Linux for tegra" %}}
|
||||
|
||||
These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "docs/reference/nvidia-l4t" %}}).
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core` | `localai/localai:master-nvidia-l4t-arm64-core` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-nvidia-l4t-arm64-core` | `localai/localai:latest-nvidia-l4t-arm64-core` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-nvidia-l4t-arm64-core` | `localai/localai:{{< version >}}-nvidia-l4t-arm64-core` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
{{< /tabs >}}
|
||||
|
||||
## See Also
|
||||
|
||||
@@ -21,7 +21,13 @@ git clone https://github.com/mudler/LocalAI
|
||||
|
||||
cd LocalAI
|
||||
|
||||
docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin .
|
||||
docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core .
|
||||
```
|
||||
|
||||
Otherwise images are available on quay.io and dockerhub:
|
||||
|
||||
```bash
|
||||
docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
|
||||
```
|
||||
|
||||
## Usage
|
||||
@@ -29,7 +35,7 @@ docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build
|
||||
Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
|
||||
|
||||
```bash
|
||||
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin
|
||||
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
|
||||
```
|
||||
|
||||
Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
|
||||
|
||||
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
Submodule docs/themes/hugo-theme-relearn updated: 8dad5ee419...66bc366c47
@@ -1,72 +0,0 @@
|
||||
package embedded
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/assets"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var modelShorteners map[string]string
|
||||
|
||||
//go:embed model_library.yaml
|
||||
var modelLibrary []byte
|
||||
|
||||
//go:embed models/*
|
||||
var embeddedModels embed.FS
|
||||
|
||||
func ModelShortURL(s string) string {
|
||||
if _, ok := modelShorteners[s]; ok {
|
||||
s = modelShorteners[s]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := yaml.Unmarshal(modelLibrary, &modelShorteners)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary")
|
||||
}
|
||||
}
|
||||
|
||||
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
|
||||
remoteLibrary := map[string]string{}
|
||||
uri := downloader.URI(url)
|
||||
err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error {
|
||||
return yaml.Unmarshal(i, &remoteLibrary)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error downloading remote library: %s", err.Error())
|
||||
}
|
||||
|
||||
return remoteLibrary, err
|
||||
}
|
||||
|
||||
// ExistsInModelsLibrary checks if a model exists in the embedded models library
|
||||
func ExistsInModelsLibrary(s string) bool {
|
||||
f := fmt.Sprintf("%s.yaml", s)
|
||||
|
||||
a := []string{}
|
||||
|
||||
for _, j := range assets.ListFiles(embeddedModels) {
|
||||
a = append(a, strings.TrimPrefix(j, "models/"))
|
||||
}
|
||||
|
||||
return slices.Contains(a, f)
|
||||
}
|
||||
|
||||
// ResolveContent returns the content in the embedded model library
|
||||
func ResolveContent(s string) ([]byte, error) {
|
||||
if ExistsInModelsLibrary(s) {
|
||||
return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("cannot find model %s", s)
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
###
|
||||
###
|
||||
### This file contains the list of models that are available in the library
|
||||
### The URLs are automatically expanded when local-ai is being called with the key as argument
|
||||
###
|
||||
### For models with an entire YAML file to be embededd, put the file inside the `models`
|
||||
### directory, it will be automatically available with the file name as key (without the .yaml extension)
|
||||
|
||||
phi-2: "github://mudler/LocalAI-examples/configurations/phi-2.yaml@main"
|
||||
@@ -1,13 +0,0 @@
|
||||
name: all-minilm-l6-v2
|
||||
backend: sentencetransformers
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "all-minilm-l6-v2"
|
||||
}'
|
||||
@@ -1,17 +0,0 @@
|
||||
name: animagine-xl
|
||||
parameters:
|
||||
model: Linaqruf/animagine-xl
|
||||
backend: diffusers
|
||||
f16: true
|
||||
diffusers:
|
||||
scheduler_type: euler_a
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "animagine-xl",
|
||||
"step": 51,
|
||||
"size": "1024x1024"
|
||||
}'
|
||||
@@ -1,40 +0,0 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: bakllava
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "bakllava",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
@@ -1,8 +0,0 @@
|
||||
usage: |
|
||||
bark works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "bark",
|
||||
"input":"Hello, this is a test!"
|
||||
}' | aplay
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
@@ -1,24 +0,0 @@
|
||||
backend: llama
|
||||
context_size: 8192
|
||||
f16: false
|
||||
gpu_layers: 90
|
||||
name: cerbero
|
||||
mmap: false
|
||||
parameters:
|
||||
model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
template:
|
||||
completion: "{{.Input}}"
|
||||
chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] "
|
||||
roles:
|
||||
user: "[|Umano|] "
|
||||
system: "[|Umano|] "
|
||||
assistant: "[|Assistente|] "
|
||||
|
||||
stopwords:
|
||||
- "[|Umano|]"
|
||||
|
||||
trimsuffix:
|
||||
- "\n"
|
||||
@@ -1,20 +0,0 @@
|
||||
name: codellama-7b-gguf
|
||||
backend: transformers
|
||||
parameters:
|
||||
model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
|
||||
temperature: 0.5
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
context_size: 4096
|
||||
f16: true
|
||||
gpu_layers: 90
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "codellama-7b-gguf",
|
||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
||||
}'
|
||||
@@ -1,14 +0,0 @@
|
||||
name: codellama-7b
|
||||
backend: transformers
|
||||
type: AutoModelForCausalLM
|
||||
parameters:
|
||||
model: codellama/CodeLlama-7b-hf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "codellama-7b",
|
||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
||||
}'
|
||||
@@ -1,9 +0,0 @@
|
||||
usage: |
|
||||
coqui works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "coqui",
|
||||
"model": "tts_models/en/ljspeech/glow-tts",
|
||||
"input":"Hello, this is a test!"
|
||||
}'
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
@@ -1,31 +0,0 @@
|
||||
name: dolphin-mixtral-8x7b
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
|
||||
temperature: 0.5
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
gpu_layers: 90
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "dolphin-mixtral-8x7b",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,59 +0,0 @@
|
||||
name: hermes-2-pro-mistral
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "hermes-2-pro-mistral",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,48 +0,0 @@
|
||||
name: llama3-8b-instruct
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ toJson .FunctionCall -}}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
Function call:
|
||||
chat: |
|
||||
<|begin_of_text|>{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llama3-8b-instruct",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,33 +0,0 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava-1.5
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: llava-v1.5-7b-Q4_K.gguf
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: llava-v1.5-7b-Q4_K.gguf
|
||||
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
|
||||
- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
|
||||
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava-1.5",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
@@ -1,33 +0,0 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava-1.6-mistral
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: llava-v1.6-mistral-7b.gguf
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: llava-v1.6-mistral-7b.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
|
||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava-1.6-mistral",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
@@ -1,37 +0,0 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava-1.6-vicuna
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: mmproj-vicuna7b-f16.gguf
|
||||
parameters:
|
||||
model: vicuna-7b-q5_k.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: vicuna-7b-q5_k.gguf
|
||||
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
|
||||
- filename: mmproj-vicuna7b-f16.gguf
|
||||
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava-1.6-vicuna",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
@@ -1,40 +0,0 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
@@ -1,21 +0,0 @@
|
||||
name: bagel
|
||||
backend: mamba
|
||||
parameters:
|
||||
model: "jondurbin/bagel-dpo-2.8b-v0.2"
|
||||
systemPrompt: "You are a helpful, unbiased, uncensored assistant."
|
||||
template:
|
||||
chat_message: |
|
||||
{{if eq .RoleName "assistant"}}{{.Content}}{{else}}
|
||||
[INST]
|
||||
{{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<<SYS>>{{.Content}}<</SYS>>
|
||||
|
||||
{{else if .Content}}{{.Content}}{{end}}
|
||||
[/INST]
|
||||
{{end}}
|
||||
completion: |
|
||||
{{.Input}}
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "bagel",
|
||||
"messages": [{"role": "user", "content": "how are you doing"}],
|
||||
}'
|
||||
@@ -1,28 +0,0 @@
|
||||
name: mamba-chat
|
||||
backend: mamba
|
||||
parameters:
|
||||
model: "havenhq/mamba-chat"
|
||||
|
||||
trimsuffix:
|
||||
- <|endoftext|>
|
||||
|
||||
# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json
|
||||
# "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
||||
template:
|
||||
chat_message: |
|
||||
{{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
</s>
|
||||
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|assistant|>
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "mamba-chat",
|
||||
"messages": [{"role": "user", "content": "how are you doing"}],
|
||||
"temperature": 0.7
|
||||
}'
|
||||
@@ -1,32 +0,0 @@
|
||||
name: mistral-openorca
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "mistral-openorca",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,25 +0,0 @@
|
||||
name: mixtral-instruct
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat: &chat |
|
||||
[INST] {{.Input}} [/INST]
|
||||
completion: *chat
|
||||
context_size: 4096
|
||||
f16: true
|
||||
gpu_layers: 90
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "mixtral-instruct",
|
||||
"prompt": "How are you doing?"
|
||||
}'
|
||||
@@ -1,25 +0,0 @@
|
||||
name: phi-2-chat
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "phi-2-chat",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,30 +0,0 @@
|
||||
name: phi-2-orange
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
|
||||
description: |
|
||||
This model is a chatbot that can be used for general conversation.
|
||||
[Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "phi-2-orange",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,13 +0,0 @@
|
||||
name: voice-en-us-amy-low
|
||||
download_files:
|
||||
- filename: voice-en-us-amy-low.tar.gz
|
||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||
|
||||
|
||||
usage: |
|
||||
To test if this model works as expected, you can use the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"model":"en-us-amy-low.onnx",
|
||||
"input": "Hi, this is a test."
|
||||
}'
|
||||
@@ -1,29 +0,0 @@
|
||||
name: tinyllama-chat
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
gpu_layers: 90
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "tinyllama-chat",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,31 +0,0 @@
|
||||
name: tinyllama-chat
|
||||
backend: transformers
|
||||
type: AutoModelForCausalLM
|
||||
|
||||
parameters:
|
||||
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
max_tokens: 4096
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "tinyllama-chat",
|
||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
||||
"temperature": 0.7
|
||||
}'
|
||||
@@ -1,8 +0,0 @@
|
||||
usage: |
|
||||
Vall-e-x works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "vall-e-x",
|
||||
"input":"Hello, this is a test!"
|
||||
}' | aplay
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
@@ -1,18 +0,0 @@
|
||||
name: whisper
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
|
||||
usage: |
|
||||
## example audio file
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
## Send the example audio file to the transcriptions endpoint
|
||||
curl http://localhost:8080/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@$PWD/gb1.ogg" -F model="whisper"
|
||||
|
||||
download_files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
||||
23
gallery/deepseek-r1.yaml
Normal file
23
gallery/deepseek-r1.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
---
|
||||
name: "deepseek-r1"
|
||||
|
||||
config_file: |
|
||||
context_size: 131072
|
||||
mmap: true
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|begin▁of▁sentence|>
|
||||
- <|end▁of▁sentence|>
|
||||
- <|User|>
|
||||
- <|Assistant|>
|
||||
template:
|
||||
chat_message: |
|
||||
{{if eq .RoleName "system" -}}{{.Content }}
|
||||
{{ end -}}
|
||||
{{if eq .RoleName "user" -}}<|User|>{{.Content}}
|
||||
{{end -}}
|
||||
{{if eq .RoleName "assistant" -}}<|Assistant|>{{.Content}}<|end▁of▁sentence|>{{end}}
|
||||
completion: |
|
||||
{{.Input}}
|
||||
chat: |
|
||||
{{.Input -}}<|Assistant|>
|
||||
@@ -198,6 +198,35 @@
|
||||
- filename: NightWing3-10B-v0.1-Q4_K_M.gguf
|
||||
sha256: 2e87671542d22fe1ef9a68e43f2fdab7c2759479ad531946d9f0bdeffa6f5747
|
||||
uri: huggingface://bartowski/NightWing3-10B-v0.1-GGUF/NightWing3-10B-v0.1-Q4_K_M.gguf
|
||||
- !!merge <<: *falcon3
|
||||
name: "virtuoso-lite"
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Virtuoso-Lite
|
||||
- https://huggingface.co/bartowski/Virtuoso-Lite-GGUF
|
||||
description: |
|
||||
Virtuoso-Lite (10B) is our next-generation, 10-billion-parameter language model based on the Llama-3 architecture. It is distilled from Deepseek-v3 using ~1.1B tokens/logits, allowing it to achieve robust performance at a significantly reduced parameter count compared to larger models. Despite its compact size, Virtuoso-Lite excels in a variety of tasks, demonstrating advanced reasoning, code generation, and mathematical problem-solving capabilities.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Virtuoso-Lite-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Virtuoso-Lite-Q4_K_M.gguf
|
||||
sha256: 1d21bef8467a11a1e473d397128b05fb87b7e824606cdaea061e550cb219fee2
|
||||
uri: huggingface://bartowski/Virtuoso-Lite-GGUF/Virtuoso-Lite-Q4_K_M.gguf
|
||||
- !!merge <<: *falcon3
|
||||
name: "suayptalha_maestro-10b"
|
||||
icon: https://huggingface.co/suayptalha/Maestro-10B/resolve/main/Maestro-Logo.png
|
||||
urls:
|
||||
- https://huggingface.co/suayptalha/Maestro-10B
|
||||
- https://huggingface.co/bartowski/suayptalha_Maestro-10B-GGUF
|
||||
description: |
|
||||
Maestro-10B is a 10 billion parameter model fine-tuned from Virtuoso-Lite, a next-generation language model developed by arcee-ai. Virtuoso-Lite itself is based on the Llama-3 architecture, distilled from Deepseek-v3 using approximately 1.1 billion tokens/logits. This distillation process allows Virtuoso-Lite to achieve robust performance with a smaller parameter count, excelling in reasoning, code generation, and mathematical problem-solving. Maestro-10B inherits these strengths from its base model, Virtuoso-Lite, and further enhances them through fine-tuning on the OpenOrca dataset. This combination of a distilled base model and targeted fine-tuning makes Maestro-10B a powerful and efficient language model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: suayptalha_Maestro-10B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: suayptalha_Maestro-10B-Q4_K_M.gguf
|
||||
sha256: c570381da5624782ce6df4186ace6f747429fcbaf1a22c2a348288d3552eb19c
|
||||
uri: huggingface://bartowski/suayptalha_Maestro-10B-GGUF/suayptalha_Maestro-10B-Q4_K_M.gguf
|
||||
- &intellect1
|
||||
name: "intellect-1-instruct"
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
@@ -456,6 +485,44 @@
|
||||
- filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
|
||||
uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-nevoria-r1-70b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/_oWpsvCZ-graNKzJBBjGo.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/Steelskull/L3.3-Nevoria-R1-70b
|
||||
- https://huggingface.co/bartowski/L3.3-Nevoria-R1-70b-GGUF
|
||||
description: |
|
||||
This model builds upon the original Nevoria foundation, incorporating the Deepseek-R1 reasoning architecture to enhance dialogue interaction and scene comprehension. While maintaining Nevoria's core strengths in storytelling and scene description (derived from EVA, EURYALE, and Anubis), this iteration aims to improve prompt adherence and creative reasoning capabilities. The model also retains the balanced perspective introduced by Negative_LLAMA and Nemotron elements. Also, the model plays the card to almost a fault, It'll pick up on minor issues and attempt to run with them. Users had it call them out for misspelling a word while playing in character.
|
||||
|
||||
Note: While Nevoria-R1 represents a significant architectural change, rather than a direct successor to Nevoria, it operates as a distinct model with its own characteristics.
|
||||
|
||||
The lorablated model base choice was intentional, creating unique weight interactions similar to the original Astoria model and Astoria V2 model. This "weight twisting" effect, achieved by subtracting the lorablated base model during merging, creates an interesting balance in the model's behavior. While unconventional compared to sequential component application, this approach was chosen for its unique response characteristics.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
|
||||
sha256: 9f32f202fb5b1465c942693bb11eea9e8a1c5686b00602715b495c068eaf1c58
|
||||
uri: huggingface://bartowski/L3.3-Nevoria-R1-70b-GGUF/L3.3-Nevoria-R1-70b-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "nohobby_l3.3-prikol-70b-v0.4"
|
||||
icon: https://files.catbox.moe/x9t3zo.png
|
||||
urls:
|
||||
- https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.4
|
||||
- https://huggingface.co/bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF
|
||||
description: |
|
||||
I have yet to try it UPD: it sucks, bleh
|
||||
|
||||
Sometimes mistakes {{user}} for {{char}} and can't think. Other than that, the behavior is similar to the predecessors.
|
||||
|
||||
It sometimes gives some funny replies tho, yay!
|
||||
overrides:
|
||||
parameters:
|
||||
model: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
|
||||
sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604
|
||||
uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
|
||||
- &rwkv
|
||||
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
|
||||
name: "rwkv-6-world-7b"
|
||||
@@ -819,8 +886,8 @@
|
||||
- filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
|
||||
uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
- &llama32 ## llama3.2
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
- &llama32
|
||||
url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master"
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama3.2
|
||||
description: |
|
||||
@@ -1342,11 +1409,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/HuggingFaceTB/FineMath-Llama-3B
|
||||
- https://huggingface.co/bartowski/FineMath-Llama-3B-GGUF
|
||||
description: |
|
||||
This is a continual-pre-training of Llama-3.2-3B on a mix of 📐 FineMath (our new high quality math dataset) and FineWeb-Edu.
|
||||
|
||||
The model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks.
|
||||
It was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo.
|
||||
description: "This is a continual-pre-training of Llama-3.2-3B on a mix of \U0001F4D0 FineMath (our new high quality math dataset) and FineWeb-Edu.\n\nThe model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks.\nIt was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo.\n"
|
||||
overrides:
|
||||
parameters:
|
||||
model: FineMath-Llama-3B-Q4_K_M.gguf
|
||||
@@ -1354,8 +1417,23 @@
|
||||
- filename: FineMath-Llama-3B-Q4_K_M.gguf
|
||||
sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
|
||||
uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
|
||||
- &qwen25 ## Qwen2.5
|
||||
name: "qwen2.5-14b-instruct"
|
||||
- !!merge <<: *llama32
|
||||
name: "LocalAI-functioncall-llama3.2-1b-v0.4"
|
||||
url: "github:mudler/LocalAI/gallery/llama3.2-fcall.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-1b-v0.4
|
||||
- https://huggingface.co/mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF
|
||||
description: |
|
||||
A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama 3.2 and has 1B parameter. Perfect for small devices.
|
||||
overrides:
|
||||
parameters:
|
||||
model: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
|
||||
files:
|
||||
- filename: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
|
||||
sha256: 547e57c2d3f17c632c9fd303afdb00446e7396df453aee62633b76976c407616
|
||||
uri: huggingface://mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF/LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
|
||||
- &qwen25
|
||||
name: "qwen2.5-14b-instruct" ## Qwen2.5
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
license: apache-2.0
|
||||
@@ -3258,15 +3336,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/Krystalan/DRT-o1-14B
|
||||
- https://huggingface.co/bartowski/DRT-o1-14B-GGUF
|
||||
description: |
|
||||
This repository contains the resources for our paper "DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought"
|
||||
In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,
|
||||
|
||||
🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.
|
||||
🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.
|
||||
🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.
|
||||
|
||||
Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.
|
||||
description: "This repository contains the resources for our paper \"DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought\"\nIn this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n"
|
||||
overrides:
|
||||
parameters:
|
||||
model: DRT-o1-14B-Q4_K_M.gguf
|
||||
@@ -3274,8 +3344,57 @@
|
||||
- filename: DRT-o1-14B-Q4_K_M.gguf
|
||||
sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
|
||||
uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
|
||||
- &smollm ## SmolLM
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- !!merge <<: *qwen25
|
||||
name: "lamarck-14b-v0.7"
|
||||
icon: https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7/resolve/main/LamarckShades.webp
|
||||
urls:
|
||||
- https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7
|
||||
- https://huggingface.co/bartowski/Lamarck-14B-v0.7-GGUF
|
||||
description: |
|
||||
Lamarck 14B v0.7: A generalist merge with emphasis on multi-step reasoning, prose, and multi-language ability. The 14B parameter model class has a lot of strong performers, and Lamarck strives to be well-rounded and solid.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Lamarck-14B-v0.7-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Lamarck-14B-v0.7-Q4_K_M.gguf
|
||||
sha256: ff8eba82b77a4c6b6d556b85629414655d881f8af4601bcf891c6a7b0345b442
|
||||
uri: huggingface://bartowski/Lamarck-14B-v0.7-GGUF/Lamarck-14B-v0.7-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "art-v0-3b"
|
||||
icon: https://blog.agi-0.com/_next/image?url=%2Fabout_img2.jpeg&w=1920&q=75
|
||||
urls:
|
||||
- https://huggingface.co/AGI-0/Art-v0-3B
|
||||
- https://huggingface.co/bartowski/Art-v0-3B-GGUF
|
||||
- https://blog.agi-0.com/posts/art-series
|
||||
description: |
|
||||
Art v0 3B is our inaugural model in the Art series, fine-tuned from Qwen/Qwen2.5-3B-Instruct using a specialized dataset generated with Gemini 2.0 Flash Thinking. Read more about the Art series
|
||||
overrides:
|
||||
parameters:
|
||||
model: Art-v0-3B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Art-v0-3B-Q4_K_M.gguf
|
||||
sha256: 551acd326ce9a743b6e06e094865eb2f06c23c81c812ce221d757bf27ceec9f7
|
||||
uri: huggingface://bartowski/Art-v0-3B-GGUF/Art-v0-3B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "chuluun-qwen2.5-72b-v0.08"
|
||||
icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08/resolve/main/Chuluun8-2.png
|
||||
urls:
|
||||
- https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08
|
||||
- https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF
|
||||
description: |
|
||||
This is a merge of pre-trained language models created using mergekit.
|
||||
I re-ran the original Chuluun formula including the newly released Ink from Allura-Org. I've found the addition gives the model a lot more variability, likely because of aggressive de-slop applied to its dataset. Sometimes this means a word choice will be strange and you'll want to manually edit when needed, but it means you'll see less ministrations sparkling with mischief.
|
||||
Because of this the best way to approach the model is to run multiple regens and choose the one you like, edit mercilessly, and continue. Like the original Chuluun this variant is very steerable for complex storywriting and RP. It's probably also a little spicier than v0.01 with both Magnum and whatever the heck Fizz threw into the data for Ink.
|
||||
I've also been hearing praise for a level of character intelligence not seen in other models, including Largestral finetunes and merges. I'm not about to say any model of mine is smarter because it was a dumb idea to use Tess as the base and it somehow worked.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
|
||||
sha256: 0fec82625f74a9a340837de7af287b1d9042e5aeb70cda2621426db99958b0af
|
||||
uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF/Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
|
||||
- &smollm
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## SmolLM
|
||||
name: "smollm-1.7b-instruct"
|
||||
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
||||
tags:
|
||||
@@ -3332,8 +3451,97 @@
|
||||
- filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
|
||||
uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
- &llama31 ## LLama3.1
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
- !!merge <<: *qwen25
|
||||
name: "dumpling-qwen2.5-32b"
|
||||
icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true
|
||||
urls:
|
||||
- https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B
|
||||
- https://huggingface.co/bartowski/Dumpling-Qwen2.5-32B-GGUF
|
||||
description: |
|
||||
nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on:
|
||||
nbeerbower/GreatFirewall-DPO
|
||||
nbeerbower/Schule-DPO
|
||||
nbeerbower/Purpura-DPO
|
||||
nbeerbower/Arkhaios-DPO
|
||||
jondurbin/truthy-dpo-v0.1
|
||||
antiven0m/physical-reasoning-dpo
|
||||
flammenai/Date-DPO-NoAsterisks
|
||||
flammenai/Prude-Phi3-DPO
|
||||
Atsunori/HelpSteer2-DPO
|
||||
jondurbin/gutenberg-dpo-v0.1
|
||||
nbeerbower/gutenberg2-dpo
|
||||
nbeerbower/gutenberg-moderne-dpo.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
|
||||
sha256: c5b7d773cc614650ad3956008e30d0607df6106c28e381870a9b950bd4ee1d17
|
||||
uri: huggingface://bartowski/Dumpling-Qwen2.5-32B-GGUF/Dumpling-Qwen2.5-32B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "confucius-o1-14b"
|
||||
urls:
|
||||
- https://huggingface.co/netease-youdao/Confucius-o1-14B
|
||||
- https://huggingface.co/bartowski/Confucius-o1-14B-GGUF
|
||||
description: |
|
||||
Confucius-o1-14B is a o1-like reasoning model developed by the NetEase Youdao Team, it can be easily deployed on a single GPU without quantization. This model is based on the Qwen2.5-14B-Instruct model and adopts a two-stage learning strategy, enabling the lightweight 14B model to possess thinking abilities similar to those of o1. What sets it apart is that after generating the chain of thought, it can summarize a step-by-step problem-solving process from the chain of thought on its own. This can prevent users from getting bogged down in the complex chain of thought and allows them to easily obtain the correct problem-solving ideas and answers.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Confucius-o1-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Confucius-o1-14B-Q4_K_M.gguf
|
||||
sha256: 03182920edd8667db7d2a362ca2d25e88f4b615b383b5a55c764f4715fb22dd9
|
||||
uri: huggingface://bartowski/Confucius-o1-14B-GGUF/Confucius-o1-14B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "openthinker-7b"
|
||||
icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png
|
||||
urls:
|
||||
- https://huggingface.co/open-thoughts/OpenThinker-7B
|
||||
- https://huggingface.co/bartowski/OpenThinker-7B-GGUF
|
||||
description: |
|
||||
This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts-114k dataset dataset.
|
||||
|
||||
The dataset is derived by distilling DeepSeek-R1 using the data pipeline available on github. More info about the dataset can be found on the dataset card at OpenThoughts-114k dataset.
|
||||
|
||||
This model improves upon the Bespoke-Stratos-7B model, which used 17k examples (Bespoke-Stratos-17k dataset). The numbers reported in the table below are evaluated with our open-source tool Evalchemy.
|
||||
overrides:
|
||||
parameters:
|
||||
model: OpenThinker-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: OpenThinker-7B-Q4_K_M.gguf
|
||||
sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93
|
||||
uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "tinyswallow-1.5b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/SakanaAI/TinySwallow-1.5B-Instruct
|
||||
- https://huggingface.co/bartowski/TinySwallow-1.5B-Instruct-GGUF
|
||||
description: |
|
||||
TinySwallow-1.5B-Instruct is an instruction-tuned version of TinySwallow-1.5B, created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2.5-32B-Instruct as the teacher model and Qwen2.5-1.5B-Instruct as the student model. The model has been further instruction-tuned to enhance its ability to follow instructions and engage in conversations in Japanese.
|
||||
overrides:
|
||||
parameters:
|
||||
model: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
|
||||
sha256: 4d409c8873c1650a19c0a7a1c051e342613191a487768fe0d29735b9361079cd
|
||||
uri: huggingface://bartowski/TinySwallow-1.5B-Instruct-GGUF/TinySwallow-1.5B-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "fblgit_miniclaus-qw1.5b-unamgs-grpo"
|
||||
icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
|
||||
urls:
|
||||
- https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO
|
||||
- https://huggingface.co/bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF
|
||||
description: |
|
||||
This version is RL with GRPO on GSM8k for 1400 steps
|
||||
overrides:
|
||||
parameters:
|
||||
model: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
|
||||
sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8
|
||||
uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
|
||||
- &llama31
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
name: "meta-llama-3.1-8b-instruct"
|
||||
license: llama3.1
|
||||
@@ -5236,8 +5444,31 @@
|
||||
- filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
|
||||
sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
|
||||
uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
- &deepseek ## Deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
- !!merge <<: *llama31
|
||||
name: "selene-1-mini-llama-3.1-8b"
|
||||
icon: https://atla-ai.notion.site/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Ff08e6e70-73af-4363-9621-90e906b92ebc%2F1bfb4316-1ce6-40a0-800c-253739cfcdeb%2Fatla_white3x.svg?table=block&id=17c309d1-7745-80f9-8f60-e755409acd8d&spaceId=f08e6e70-73af-4363-9621-90e906b92ebc&userId=&cache=v2
|
||||
urls:
|
||||
- https://huggingface.co/AtlaAI/Selene-1-Mini-Llama-3.1-8B
|
||||
- https://huggingface.co/bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF
|
||||
description: |
|
||||
Atla Selene Mini is a state-of-the-art small language model-as-a-judge (SLMJ). Selene Mini achieves comparable performance to models 10x its size, outperforming GPT-4o on RewardBench, EvalBiasBench, and AutoJ.
|
||||
|
||||
Post-trained from Llama-3.1-8B across a wide range of evaluation tasks and scoring criteria, Selene Mini outperforms prior small models overall across 11 benchmarks covering three different types of tasks:
|
||||
|
||||
Absolute scoring, e.g. "Evaluate the harmlessness of this response on a scale of 1-5"
|
||||
Classification, e.g. "Does this response address the user query? Answer Yes or No."
|
||||
Pairwise preference. e.g. "Which of the following responses is more logically consistent - A or B?"
|
||||
|
||||
It is also the #1 8B generative model on RewardBench.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
|
||||
sha256: 908e6ce19f7cd3d7394bd7c38e43de2f228aca6aceda35c7ee70d069ad60493e
|
||||
uri: huggingface://bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF/Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
|
||||
- &deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
license: deepseek
|
||||
@@ -5301,8 +5532,8 @@
|
||||
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
|
||||
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
- &deepseek-r1 ## Start DeepSeek-R1
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- &deepseek-r1
|
||||
url: "github:mudler/LocalAI/gallery/deepseek-r1.yaml@master" ## Start DeepSeek-R1
|
||||
name: "deepseek-r1-distill-qwen-1.5b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
@@ -5381,8 +5612,126 @@
|
||||
- filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
|
||||
sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
|
||||
- &qwen2 ## Start QWEN2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-qwen-2.5-32b-ablated"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6587d8dd1b44d0e694104fbf/0dkt6EhZYwXVBxvSWXdaM.png
|
||||
urls:
|
||||
- https://huggingface.co/NaniDAO/deepseek-r1-qwen-2.5-32B-ablated
|
||||
- https://huggingface.co/bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF
|
||||
description: |
|
||||
DeepSeek-R1-Distill-Qwen-32B with ablation technique applied for a more helpful (and based) reasoning model.
|
||||
|
||||
This means it will refuse less of your valid requests for an uncensored UX. Use responsibly and use common sense.
|
||||
|
||||
We do not take any responsibility for how you apply this intelligence, just as we do not for how you apply your own.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
|
||||
sha256: 7f33898641ebe58fe178c3517efc129f4fe37c6ca2d8b91353c4539b0c3411ec
|
||||
uri: huggingface://bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF/deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1"
|
||||
urls:
|
||||
- https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview
|
||||
- https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF
|
||||
description: |
|
||||
FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
|
||||
overrides:
|
||||
parameters:
|
||||
model: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
|
||||
sha256: d7753547046cd6e3d45a2cfbd5557aa20dd0b9f0330931d3fd5b3d4a0b468b24
|
||||
uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "fuseo1-deepseekr1-qwen2.5-instruct-32b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview
|
||||
- https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF
|
||||
description: |
|
||||
FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
|
||||
overrides:
|
||||
parameters:
|
||||
model: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
|
||||
sha256: 3b06a004a6bb827f809a7326b30ee73f96a1a86742d8c2dd335d75874fa17aa4
|
||||
uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "fuseo1-deepseekr1-qwq-32b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-32B-Preview
|
||||
- https://huggingface.co/bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF
|
||||
description: |
|
||||
FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
|
||||
overrides:
|
||||
parameters:
|
||||
model: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
|
||||
sha256: 16f1fb6bf76bb971a7a63e1a68cddd09421f4a767b86eec55eed1e08178f78f2
|
||||
uri: huggingface://bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF/FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "fuseo1-deekseekr1-qwq-skyt1-32b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview
|
||||
- https://huggingface.co/bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF
|
||||
description: |
|
||||
FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
|
||||
overrides:
|
||||
parameters:
|
||||
model: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
|
||||
sha256: 13911dd4a62d4714a3447bc288ea9d49dbe575a91cab9e8f645057f1d8e1100e
|
||||
uri: huggingface://bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "steelskull_l3.3-damascus-r1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/iIzpqHDb9wU181AzfrjZy.png
|
||||
urls:
|
||||
- https://huggingface.co/Steelskull/L3.3-Damascus-R1
|
||||
- https://huggingface.co/bartowski/Steelskull_L3.3-Damascus-R1-GGUF
|
||||
description: |
|
||||
Damascus-R1 builds upon some elements of the Nevoria foundation but represents a significant step forward with a completely custom-made DeepSeek R1 Distill base: Hydroblated-R1-V3. Constructed using the new SCE (Select, Calculate, and Erase) merge method, Damascus-R1 prioritizes stability, intelligence, and enhanced awareness.
|
||||
|
||||
Technical Architecture
|
||||
Leveraging the SCE merge method and custom base, Damascus-R1 integrates newly added specialized components from multiple high-performance models:
|
||||
EVA and EURYALE foundations for creative expression and scene comprehension
|
||||
Cirrus and Hanami elements for enhanced reasoning capabilities
|
||||
Anubis components for detailed scene description
|
||||
Negative_LLAMA integration for balanced perspective and response
|
||||
|
||||
Core Philosophy
|
||||
Damascus-R1 embodies the principle that AI models can be intelligent and be fun. This version specifically addresses recent community feedback and iterates on prior experiments, optimizing the balance between technical capability and natural conversation flow.
|
||||
|
||||
Base Architecture
|
||||
At its core, Damascus-R1 utilizes the entirely custom Hydroblated-R1 base model, specifically engineered for stability, enhanced reasoning, and performance. The SCE merge method, with settings finely tuned based on community feedback from evaluations of Experiment-Model-Ver-A, L3.3-Exp-Nevoria-R1-70b-v0.1 and L3.3-Exp-Nevoria-70b-v0.1, enables precise and effective component integration while maintaining model coherence and reliability.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
|
||||
sha256: f1df5808b2099b26631d0bae870603a08dbfab6813471f514035d3fb92a47480
|
||||
uri: huggingface://bartowski/Steelskull_L3.3-Damascus-R1-GGUF/Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b"
|
||||
icon: https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B/resolve/main/h5dTflRHYMbGq3RXm9a61yz4io.avif
|
||||
urls:
|
||||
- https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B
|
||||
- https://huggingface.co/bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF
|
||||
description: |
|
||||
An UncensoredLLM with Reasoning, what more could you want?
|
||||
overrides:
|
||||
parameters:
|
||||
model: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
sha256: 85b2c3e1aa4e8cc3bf616f84c7595c963d5439f3fcfdbd5c957fb22e84d10b1c
|
||||
uri: huggingface://bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
- &qwen2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
|
||||
name: "qwen2-7b-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
license: apache-2.0
|
||||
@@ -5765,10 +6114,25 @@
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
- &mistral03 ## START Mistral
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
- !!merge <<: *qwen2
|
||||
name: "taid-llm-1.5b"
|
||||
icon: https://sakana.ai/assets/taid-jp/cover_large.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/SakanaAI/TAID-LLM-1.5B
|
||||
- https://huggingface.co/bartowski/TAID-LLM-1.5B-GGUF
|
||||
description: |
|
||||
TAID-LLM-1.5B is an English language model created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2-72B-Instruct as the teacher model and Qwen2-1.5B-Instruct as the student model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: TAID-LLM-1.5B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: TAID-LLM-1.5B-Q4_K_M.gguf
|
||||
sha256: dbffc989d12d42ef8e4a2994e102d7ec7a02c49ec08ea2e35426372ad07b4cd8
|
||||
uri: huggingface://bartowski/TAID-LLM-1.5B-GGUF/TAID-LLM-1.5B-Q4_K_M.gguf
|
||||
- &mistral03
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" ## START Mistral
|
||||
name: "mistral-7b-instruct-v0.3"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
|
||||
license: apache-2.0
|
||||
@@ -6399,8 +6763,25 @@
|
||||
- filename: Wayfarer-12B-Q4_K_M.gguf
|
||||
sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
|
||||
uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
|
||||
- &mudler ### START mudler's LocalAI specific-models
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||
- !!merge <<: *mistral03
|
||||
name: "mistral-small-24b-instruct-2501"
|
||||
urls:
|
||||
- https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
|
||||
- https://huggingface.co/bartowski/Mistral-Small-24B-Instruct-2501-GGUF
|
||||
description: |
|
||||
Mistral Small 3 ( 2501 ) sets a new benchmark in the "small" Large Language Models category below 70B, boasting 24B parameters and achieving state-of-the-art capabilities comparable to larger models!
|
||||
This model is an instruction-fine-tuned version of the base model: Mistral-Small-24B-Base-2501.
|
||||
|
||||
Mistral Small can be deployed locally and is exceptionally "knowledge-dense", fitting in a single RTX 4090 or a 32GB RAM MacBook once quantized.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
|
||||
sha256: d1a6d049f09730c3f8ba26cf6b0b60c89790b5fdafa9a59c819acdfe93fffd1b
|
||||
uri: huggingface://bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
|
||||
- &mudler
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
|
||||
license: llama3
|
||||
@@ -6444,8 +6825,8 @@
|
||||
- filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
|
||||
uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
- &parler-tts ### START parler-tts
|
||||
url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
|
||||
- &parler-tts
|
||||
url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" ### START parler-tts
|
||||
name: parler-tts-mini-v0.1
|
||||
overrides:
|
||||
parameters:
|
||||
@@ -6461,8 +6842,8 @@
|
||||
- cpu
|
||||
- text-to-speech
|
||||
- python
|
||||
- &rerankers ### START rerankers
|
||||
url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
|
||||
- &rerankers
|
||||
url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" ### START rerankers
|
||||
name: cross-encoder
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
@@ -7215,6 +7596,21 @@
|
||||
- filename: GWQ-9B-Preview2-Q4_K_M.gguf
|
||||
sha256: 04da51cdb17c7e51594f6daac595161a46298b48ab5e568a85e65541d10a861f
|
||||
uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma
|
||||
name: "thedrummer_gemmasutra-pro-27b-v1.1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/SrHUGXD_dp55pobeJK36t.png
|
||||
urls:
|
||||
- https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1.1
|
||||
- https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF
|
||||
description: |
|
||||
A Gemmasutra tune with modern techniques. Au Revoir, Gemma!
|
||||
overrides:
|
||||
parameters:
|
||||
model: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
|
||||
sha256: 218a14f0bf8266f9e77d16b8b4f5cc1dc76e97eb582a2c97cca5a3a2c35de86b
|
||||
uri: huggingface://bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF/TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
@@ -8713,8 +9109,8 @@
|
||||
- filename: Copus-2x8B.i1-Q4_K_M.gguf
|
||||
sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
|
||||
uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
|
||||
- &yi-chat ### Start Yi
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- &yi-chat
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### Start Yi
|
||||
icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
|
||||
name: "yi-1.5-9b-chat"
|
||||
license: apache-2.0
|
||||
@@ -8924,8 +9320,8 @@
|
||||
- filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
|
||||
uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
- &noromaid ### Start noromaid
|
||||
url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
|
||||
- &noromaid
|
||||
url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" ### Start noromaid
|
||||
name: "noromaid-13b-0.4-DPO"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
|
||||
license: cc-by-nc-4.0
|
||||
@@ -8944,8 +9340,8 @@
|
||||
- filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
|
||||
uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
- &wizardlm2 ### START Vicuna based
|
||||
url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
|
||||
- &wizardlm2
|
||||
url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" ### START Vicuna based
|
||||
name: "wizardlm2-7b"
|
||||
description: |
|
||||
We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B.
|
||||
@@ -8999,8 +9395,8 @@
|
||||
- filename: moondream2-mmproj-f16.gguf
|
||||
sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
|
||||
uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
|
||||
- &llava ### START LLaVa
|
||||
name: "llava-1.6-vicuna"
|
||||
- &llava
|
||||
name: "llava-1.6-vicuna" ### START LLaVa
|
||||
icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
|
||||
url: "github:mudler/LocalAI/gallery/llava.yaml@master"
|
||||
license: apache-2.0
|
||||
@@ -9413,8 +9809,8 @@
|
||||
sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2
|
||||
uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-llama3-mmproj-f16.gguf
|
||||
sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
|
||||
uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 2c2d773537faf6a7e093655d0d5e14801ef0b2121c6c3e1981ce094c2b62f4f9
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
|
||||
urls:
|
||||
@@ -9856,8 +10252,8 @@
|
||||
- filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
|
||||
uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
- &chatml ### ChatML
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- &chatml
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### ChatML
|
||||
name: "una-thepitbull-21.4b-v2"
|
||||
license: afl-3.0
|
||||
icon: https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2/resolve/main/DE-UNA-ThePitbull-21.4B-v2.png
|
||||
@@ -10141,8 +10537,8 @@
|
||||
- filename: Triangulum-10B.Q4_K_M.gguf
|
||||
sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
|
||||
uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
|
||||
- &command-R ### START Command-r
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
|
||||
- &command-R
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master" ### START Command-r
|
||||
name: "command-r-v01:q1_s"
|
||||
license: "cc-by-nc-4.0"
|
||||
icon: https://cdn.sanity.io/images/rjtqmwfu/production/ae020d94b599cc453cc09ebc80be06d35d953c23-102x18.svg
|
||||
@@ -10196,8 +10592,8 @@
|
||||
- filename: "aya-23-35B-Q4_K_M.gguf"
|
||||
sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
|
||||
uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
|
||||
- &phi-2-chat ### START Phi-2
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
|
||||
- &phi-2-chat
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" ### START Phi-2
|
||||
license: mit
|
||||
description: |
|
||||
Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
|
||||
@@ -10318,8 +10714,8 @@
|
||||
- filename: internlm3-8b-instruct-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
|
||||
sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
|
||||
- &phi-3 ### START Phi-3
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
|
||||
- &phi-3
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" ### START Phi-3
|
||||
name: "phi-3-mini-4k-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
license: mit
|
||||
@@ -10518,8 +10914,8 @@
|
||||
- filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
|
||||
uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
- &hermes-2-pro-mistral ### START Hermes
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
- &hermes-2-pro-mistral
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" ### START Hermes
|
||||
name: "hermes-2-pro-mistral"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
|
||||
license: apache-2.0
|
||||
@@ -10854,8 +11250,8 @@
|
||||
- filename: "galatolo-Q4_K.gguf"
|
||||
sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
|
||||
uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
|
||||
- &codellama ### START Codellama
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
- &codellama
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master" ### START Codellama
|
||||
name: "codellama-7b"
|
||||
license: llama2
|
||||
description: |
|
||||
@@ -10985,8 +11381,8 @@
|
||||
- filename: "llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
|
||||
- &openvino ### START OpenVINO
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
|
||||
- &openvino
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master" ### START OpenVINO
|
||||
name: "openvino-llama-3-8b-instruct-ov-int8"
|
||||
license: llama3
|
||||
urls:
|
||||
@@ -11099,8 +11495,8 @@
|
||||
- gpu
|
||||
- embedding
|
||||
- cpu
|
||||
- &sentencentransformers ### START Embeddings
|
||||
description: |
|
||||
- &sentencentransformers
|
||||
description: | ### START Embeddings
|
||||
This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
|
||||
urls:
|
||||
- https://github.com/UKPLab/sentence-transformers
|
||||
@@ -11114,8 +11510,8 @@
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
- &dreamshaper ### START Image generation
|
||||
name: dreamshaper
|
||||
- &dreamshaper
|
||||
name: dreamshaper ### START Image generation
|
||||
icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
|
||||
license: other
|
||||
description: |
|
||||
@@ -11312,8 +11708,8 @@
|
||||
- filename: t5xxl_fp16.safetensors
|
||||
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
|
||||
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
|
||||
- &whisper ## Whisper
|
||||
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
|
||||
- &whisper
|
||||
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" ## Whisper
|
||||
name: "whisper-1"
|
||||
icon: https://avatars.githubusercontent.com/u/14957082
|
||||
license: "MIT"
|
||||
@@ -11494,8 +11890,8 @@
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
name: stablediffusion-cpp
|
||||
icon: https://avatars.githubusercontent.com/u/100950301
|
||||
- &piper ## Piper TTS
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master
|
||||
- &piper
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master ## Piper TTS
|
||||
name: voice-en-us-kathleen-low
|
||||
icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
|
||||
license: mit
|
||||
|
||||
49
gallery/llama3.2-fcall.yaml
Normal file
49
gallery/llama3.2-fcall.yaml
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
name: "llama3.2-fcall"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
function:
|
||||
json_regex_match:
|
||||
- "(?s)<Output>(.*?)</Output>"
|
||||
capture_llm_results:
|
||||
- (?s)<Thought>(.*?)</Thought>
|
||||
replace_llm_results:
|
||||
- key: (?s)<Thought>(.*?)</Thought>
|
||||
value: ""
|
||||
grammar:
|
||||
properties_order: "name,arguments"
|
||||
function_arguments_key: "arguments"
|
||||
template:
|
||||
chat: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
chat_message: |
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
{{ if .FunctionCall -}}
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ toJson .FunctionCall -}}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
<|eot_id|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
- <|end_of_text|>
|
||||
55
gallery/llama3.2-quantized.yaml
Normal file
55
gallery/llama3.2-quantized.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
---
|
||||
name: "llama3.2-quantized"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
function:
|
||||
disable_no_action: true
|
||||
grammar:
|
||||
disable: true
|
||||
response_regex:
|
||||
- \[(?P<name>\w+)\((?P<arguments>.*)\)\]
|
||||
argument_regex:
|
||||
- (?P<key>[^ '\(=,]+)[='"]+(?P<value>[^=,"']+)['"]?
|
||||
template:
|
||||
chat: |
|
||||
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
chat_message: |
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
{{ if .FunctionCall -}}
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
The Function was executed and the response was:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ range .FunctionCall }}
|
||||
[{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
|
||||
{{ end }}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
You are an expert in composing functions. You are given a question and a set of possible functions.
|
||||
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
|
||||
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
|
||||
If you decide to invoke any of the function(s), you MUST put it in the format as follows:
|
||||
[func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
|
||||
You SHOULD NOT include any other text in the response.
|
||||
Here is a list of functions in JSON format that you can invoke.
|
||||
{{toJson .Functions}}
|
||||
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{{.Input}}
|
||||
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
- <|end_of_text|>
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/functions/grammars"
|
||||
@@ -46,6 +47,14 @@ type GrammarConfig struct {
|
||||
// SchemaType can be configured to use a specific schema type to force the grammar
|
||||
// available : json, llama3.1
|
||||
SchemaType string `yaml:"schema_type"`
|
||||
|
||||
GrammarTriggers []GrammarTrigger `yaml:"triggers"`
|
||||
}
|
||||
|
||||
type GrammarTrigger struct {
|
||||
// Trigger is the string that triggers the grammar
|
||||
Word string `yaml:"word"`
|
||||
AtStart bool `yaml:"at_start"`
|
||||
}
|
||||
|
||||
// FunctionsConfig is the configuration for the tool/function call.
|
||||
@@ -71,6 +80,12 @@ type FunctionsConfig struct {
|
||||
// JSONRegexMatch is a regex to extract the JSON object from the response
|
||||
JSONRegexMatch []string `yaml:"json_regex_match"`
|
||||
|
||||
// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
|
||||
ArgumentRegex []string `yaml:"argument_regex"`
|
||||
// ArgumentRegex named regex names for key and value extractions. default: key and value
|
||||
ArgumentRegexKey string `yaml:"argument_regex_key_name"` // default: key
|
||||
ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
|
||||
|
||||
// ReplaceFunctionResults allow to replace strings in the results before parsing them
|
||||
ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
|
||||
|
||||
@@ -310,7 +325,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
if functionName == "" {
|
||||
return results
|
||||
}
|
||||
results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result[functionArgumentsKey]})
|
||||
results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: ParseFunctionCallArgs(result[functionArgumentsKey], functionConfig)})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -322,3 +337,38 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func ParseFunctionCallArgs(functionArguments string, functionConfig FunctionsConfig) string {
|
||||
if len(functionConfig.ArgumentRegex) == 0 {
|
||||
return functionArguments
|
||||
}
|
||||
|
||||
// We use named regexes here to extract the function argument key value pairs and convert this to valid json.
|
||||
// TODO: there might be responses where an object as a value is expected/required. This is currently not handled.
|
||||
args := make(map[string]string)
|
||||
|
||||
agrsRegexKeyName := "key"
|
||||
agrsRegexValueName := "value"
|
||||
|
||||
if functionConfig.ArgumentRegexKey != "" {
|
||||
agrsRegexKeyName = functionConfig.ArgumentRegexKey
|
||||
}
|
||||
if functionConfig.ArgumentRegexValue != "" {
|
||||
agrsRegexValueName = functionConfig.ArgumentRegexValue
|
||||
}
|
||||
|
||||
for _, r := range functionConfig.ArgumentRegex {
|
||||
var respRegex = regexp.MustCompile(r)
|
||||
var nameRange []string = respRegex.SubexpNames()
|
||||
var keyIndex = slices.Index(nameRange, agrsRegexKeyName)
|
||||
var valueIndex = slices.Index(nameRange, agrsRegexValueName)
|
||||
matches := respRegex.FindAllStringSubmatch(functionArguments, -1)
|
||||
for _, match := range matches {
|
||||
args[match[keyIndex]] = match[valueIndex]
|
||||
}
|
||||
}
|
||||
|
||||
jsonBytes, _ := json.Marshal(args)
|
||||
|
||||
return string(jsonBytes)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/embedded"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -18,42 +17,17 @@ import (
|
||||
// InstallModels will preload models from the given list of URLs and galleries
|
||||
// It will download the model if it is not already present in the model path
|
||||
// It will also try to resolve if the model is an embedded model YAML configuration
|
||||
func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
||||
func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
||||
// create an error that groups all errors
|
||||
var err error
|
||||
|
||||
lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
|
||||
|
||||
for _, url := range models {
|
||||
// As a best effort, try to resolve the model from the remote library
|
||||
// if it's not resolved we try with the other method below
|
||||
if modelLibraryURL != "" {
|
||||
if lib[url] != "" {
|
||||
log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
|
||||
url = lib[url]
|
||||
}
|
||||
}
|
||||
|
||||
url = embedded.ModelShortURL(url)
|
||||
uri := downloader.URI(url)
|
||||
|
||||
switch {
|
||||
case embedded.ExistsInModelsLibrary(url):
|
||||
modelYAML, e := embedded.ResolveContent(url)
|
||||
// If we resolve something, just save it to disk and continue
|
||||
if e != nil {
|
||||
log.Error().Err(e).Msg("error resolving model content")
|
||||
err = errors.Join(err, e)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debug().Msgf("[startup] resolved embedded model: %s", url)
|
||||
md5Name := utils.MD5(url)
|
||||
modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
|
||||
if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil {
|
||||
log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
|
||||
err = errors.Join(err, e)
|
||||
}
|
||||
case uri.LooksLikeOCI():
|
||||
log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
. "github.com/mudler/LocalAI/pkg/startup"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
@@ -16,29 +15,13 @@ import (
|
||||
var _ = Describe("Preload test", func() {
|
||||
|
||||
Context("Preloading from strings", func() {
|
||||
It("loads from remote url", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
|
||||
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
||||
|
||||
InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2")
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
content, err := os.ReadFile(resultFile)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||
})
|
||||
|
||||
It("loads from embedded full-urls", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
|
||||
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
||||
|
||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
||||
InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
@@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() {
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||
})
|
||||
It("loads from embedded short-urls", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "phi-2"
|
||||
|
||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
||||
|
||||
entry, err := os.ReadDir(tmpdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(entry).To(HaveLen(1))
|
||||
resultFile := entry[0].Name()
|
||||
|
||||
content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||
})
|
||||
It("loads from embedded models", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "mistral-openorca"
|
||||
fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
|
||||
|
||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
content, err := os.ReadFile(resultFile)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
|
||||
})
|
||||
It("downloads from urls", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
|
||||
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
|
||||
|
||||
err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url)
|
||||
err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
@@ -765,6 +765,17 @@ const docTemplate = `{
|
||||
"/v1/tokenize": {
|
||||
"post": {
|
||||
"summary": "Tokenize the input.",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "Request",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.TokenizeRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
@@ -1838,6 +1849,17 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.TokenizeRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.TokenizeResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -758,6 +758,17 @@
|
||||
"/v1/tokenize": {
|
||||
"post": {
|
||||
"summary": "Tokenize the input.",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "Request",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.TokenizeRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
@@ -1831,6 +1842,17 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.TokenizeRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.TokenizeResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -705,6 +705,13 @@ definitions:
|
||||
description: voice audio file or speaker id
|
||||
type: string
|
||||
type: object
|
||||
schema.TokenizeRequest:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
type: object
|
||||
schema.TokenizeResponse:
|
||||
properties:
|
||||
tokens:
|
||||
@@ -1216,6 +1223,13 @@ paths:
|
||||
summary: Get TokenMetrics for Active Slot.
|
||||
/v1/tokenize:
|
||||
post:
|
||||
parameters:
|
||||
- description: Request
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/schema.TokenizeRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Response
|
||||
|
||||
Reference in New Issue
Block a user