mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 19:22:39 -05:00
Compare commits
91 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
015835dba2 | ||
|
|
313ea2c4d2 | ||
|
|
26c4058be4 | ||
|
|
32db787991 | ||
|
|
011565aaa3 | ||
|
|
c967ac37bc | ||
|
|
64721606b9 | ||
|
|
7c502ec209 | ||
|
|
7ee25ecfb3 | ||
|
|
cdbcac6a78 | ||
|
|
87f78ecfa9 | ||
|
|
cffecda48c | ||
|
|
963e5903fc | ||
|
|
9c425d55f6 | ||
|
|
398a9efa3a | ||
|
|
8f2cf52f3b | ||
|
|
134ea1a37b | ||
|
|
3e77a17b26 | ||
|
|
a26fb548b1 | ||
|
|
08e1e2251e | ||
|
|
dcabda42d1 | ||
|
|
fd4043266b | ||
|
|
e1db6dce82 | ||
|
|
d5da8c3509 | ||
|
|
9db068388b | ||
|
|
54c0f153e2 | ||
|
|
e45e8a58fc | ||
|
|
52bc463a3f | ||
|
|
0da16c73ba | ||
|
|
e416843f22 | ||
|
|
e65e3253a3 | ||
|
|
bc7d4586ed | ||
|
|
056d4b4fc9 | ||
|
|
5927f9e43e | ||
|
|
98dfa363db | ||
|
|
92cd538829 | ||
|
|
cdcfb2617c | ||
|
|
1a9299a7c0 | ||
|
|
a60b9b7a38 | ||
|
|
1b44a5a3b7 | ||
|
|
fdf1452c6b | ||
|
|
773cec77a2 | ||
|
|
585e0745da | ||
|
|
41db6668f0 | ||
|
|
c9f28e2b56 | ||
|
|
6afe9c8fda | ||
|
|
f166541ac3 | ||
|
|
7ddf486b37 | ||
|
|
5f130febb8 | ||
|
|
b82577d642 | ||
|
|
97cf028175 | ||
|
|
094f808549 | ||
|
|
18f9e11f1a | ||
|
|
18c35ee86f | ||
|
|
53d1db1da0 | ||
|
|
13e7432b89 | ||
|
|
ddd289d1af | ||
|
|
f9903d850f | ||
|
|
1e3cef6774 | ||
|
|
dcf28e6a28 | ||
|
|
cb47a03880 | ||
|
|
d2a5a58e11 | ||
|
|
88115e4ddb | ||
|
|
0a198e32de | ||
|
|
61388317c1 | ||
|
|
304484c59b | ||
|
|
93ba5ea14f | ||
|
|
8ec828a654 | ||
|
|
b6f681315a | ||
|
|
d53e71021f | ||
|
|
43146fa607 | ||
|
|
f4dab82919 | ||
|
|
f659304227 | ||
|
|
fd493a4451 | ||
|
|
181fa93168 | ||
|
|
d5d9e78983 | ||
|
|
a1a86aa1f7 | ||
|
|
9695969913 | ||
|
|
975c579d44 | ||
|
|
814cc24b69 | ||
|
|
086f9e1f07 | ||
|
|
3f923bb2ce | ||
|
|
803e2db30b | ||
|
|
a282bd4969 | ||
|
|
5bca02bad4 | ||
|
|
4858e72fd9 | ||
|
|
7eab6ba71b | ||
|
|
a909f63fbe | ||
|
|
b46f36195f | ||
|
|
465f1f14a7 | ||
|
|
b8b1e10f34 |
2
.github/dependabot.yml
vendored
2
.github/dependabot.yml
vendored
@@ -9,6 +9,8 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
ignore:
|
||||
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
- package-ecosystem: "github-actions"
|
||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||
directory: "/"
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -79,7 +79,7 @@ jobs:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
8
.github/workflows/release.yaml
vendored
8
.github/workflows/release.yaml
vendored
@@ -123,7 +123,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -232,7 +232,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -308,7 +308,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -350,7 +350,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
6
.github/workflows/test.yml
vendored
6
.github/workflows/test.yml
vendored
@@ -133,7 +133,7 @@ jobs:
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -197,7 +197,7 @@ jobs:
|
||||
make run-e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -235,7 +235,7 @@ jobs:
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
4
Makefile
4
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=96776405a17034dcfd53d3ddf5d142d34bdbb657
|
||||
CPPLLAMA_VERSION?=45f097645efb11b6d09a5b4adbbfd7c312ac0126
|
||||
|
||||
# go-rwkv version
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
|
||||
WHISPER_CPP_VERSION?=a5abfe6a90495f7bf19fe70d016ecc255e97359c
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
|
||||
15
README.md
15
README.md
@@ -66,6 +66,21 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
To load models:
|
||||
|
||||
```bash
|
||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
||||
# Start LocalAI with the phi-2 model directly from huggingface
|
||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||
# Install and run a model from the Ollama OCI registry
|
||||
local-ai run ollama://gemma:2b
|
||||
# Run a model from a configuration file
|
||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 📰 Latest project news
|
||||
|
||||
@@ -391,6 +391,39 @@ struct llama_metrics {
|
||||
}
|
||||
};
|
||||
|
||||
struct llava_embd_batch {
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id> seq_id_0;
|
||||
std::vector<llama_seq_id *> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
||||
pos .resize(n_tokens);
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids .resize(n_tokens + 1);
|
||||
logits .resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids [n_tokens] = nullptr;
|
||||
batch = {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ nullptr,
|
||||
/*embd =*/ embd,
|
||||
/*pos =*/ pos.data(),
|
||||
/*n_seq_id =*/ n_seq_id.data(),
|
||||
/*seq_id =*/ seq_ids.data(),
|
||||
/*logits =*/ logits.data(),
|
||||
};
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.pos [i] = pos_0 + i;
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
@@ -934,7 +967,6 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
if (llama_decode(ctx, batch_view) != 0)
|
||||
{
|
||||
@@ -1379,7 +1411,6 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
if (llama_decode(ctx, batch_view))
|
||||
{
|
||||
@@ -1398,8 +1429,9 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
const int n_embd = llama_n_embd(model);
|
||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||
if (llama_decode(ctx, batch_img))
|
||||
float * embd = img.image_embedding + i * n_embd;
|
||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
||||
if (llama_decode(ctx, llava_batch.batch))
|
||||
{
|
||||
LOG("%s : failed to eval image\n", __func__);
|
||||
return false;
|
||||
@@ -1904,7 +1936,6 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
|
||||
const int ret = llama_decode(ctx, batch_view);
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
|
||||
@@ -1 +1 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
@@ -1,2 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+rocm6.0
|
||||
torchaudio==2.4.1+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,2 +1,2 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
@@ -1,3 +1,4 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
coqui-tts
|
||||
@@ -1,5 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,4 +1,5 @@
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,5 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+rocm6.0
|
||||
torchaudio==2.4.1+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -5,4 +5,5 @@ torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
transformers
|
||||
accelerate
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,4 +1,4 @@
|
||||
coqui-tts
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -5,5 +5,5 @@ accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
torch
|
||||
torch==2.4.1
|
||||
optimum-quanto
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
@@ -1,4 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,3 +1,3 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,3 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
||||
@@ -1,2 +1,2 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1 +1,3 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
@@ -1,2 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
@@ -1 +1,3 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
@@ -1,2 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
@@ -2,22 +2,22 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==1.0.3
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.26.4
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.15.4
|
||||
whisper-timestamped==1.14.2
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
gradio==4.44.1
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect
|
||||
unidecode
|
||||
@@ -13,8 +13,8 @@ openai
|
||||
python-dotenv
|
||||
pypinyin
|
||||
cn2an==0.5.22
|
||||
networkx==2.8.8
|
||||
jieba==0.42.1
|
||||
gradio
|
||||
gradio==3.48.0
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
llvmlite==0.43.0
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
llvmlite==0.43.0
|
||||
@@ -1,4 +1,4 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
rerankers[transformers]
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
rerankers[transformers]
|
||||
@@ -1,4 +1,4 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
rerankers[transformers]
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
rerankers[transformers]
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,6 +1,6 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
sentence-transformers==3.1.1
|
||||
sentence-transformers==3.2.0
|
||||
transformers
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
accelerate
|
||||
sentence-transformers==3.1.1
|
||||
sentence-transformers==3.2.0
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
sentence-transformers==3.1.1
|
||||
sentence-transformers==3.2.0
|
||||
transformers
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
sentence-transformers==3.1.1
|
||||
sentence-transformers==3.2.0
|
||||
transformers
|
||||
@@ -4,5 +4,5 @@ torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
accelerate
|
||||
sentence-transformers==3.1.1
|
||||
sentence-transformers==3.2.0
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
datasets
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
@@ -1,4 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
@@ -1,3 +1,3 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
@@ -1,4 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
scipy==1.14.0
|
||||
certifi
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,4 +1,4 @@
|
||||
torch
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,3 +1,3 @@
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,4 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
@@ -1,3 +1,3 @@
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -19,6 +19,8 @@ from vllm.utils import random_uuid
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
from vllm.assets.video import VideoAsset
|
||||
import base64
|
||||
import io
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
@@ -217,13 +219,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# Generate text using the LLM engine
|
||||
request_id = random_uuid()
|
||||
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
|
||||
multi_modal_data = {}
|
||||
if image_data:
|
||||
multi_modal_data["image"] = image_data
|
||||
if video_data:
|
||||
multi_modal_data["video"] = video_data
|
||||
outputs = self.llm.generate(
|
||||
{
|
||||
"prompt": prompt,
|
||||
"multi_modal_data": {
|
||||
"image": image_data if image_data else None,
|
||||
"video": video_data if video_data else None,
|
||||
} if image_data or video_data else None,
|
||||
"prompt": prompt,
|
||||
"multi_modal_data": multi_modal_data if multi_modal_data else None,
|
||||
},
|
||||
sampling_params=sampling_params,
|
||||
request_id=request_id,
|
||||
@@ -262,19 +266,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
def load_image(self, image_path: str):
|
||||
"""
|
||||
Load an image from the given file path.
|
||||
Load an image from the given file path or base64 encoded data.
|
||||
|
||||
Args:
|
||||
image_path (str): The path to the image file.
|
||||
image_path (str): The path to the image file or base64 encoded data.
|
||||
|
||||
Returns:
|
||||
Image: The loaded image.
|
||||
"""
|
||||
try:
|
||||
return Image.open(image_path)
|
||||
|
||||
image_data = base64.b64decode(image_path)
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
return image
|
||||
except Exception as e:
|
||||
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
|
||||
return self.load_video(image_path)
|
||||
return None
|
||||
|
||||
def load_video(self, video_path: str):
|
||||
"""
|
||||
@@ -287,10 +294,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
Video: The loaded video.
|
||||
"""
|
||||
try:
|
||||
video = VideoAsset(name=video_path).np_ndarrays
|
||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
||||
p = f"/tmp/vl-{timestamp}.data" # Use timestamp in filename
|
||||
with open(p, "wb") as f:
|
||||
f.write(base64.b64decode(video_path))
|
||||
video = VideoAsset(name=p).np_ndarrays
|
||||
os.remove(p)
|
||||
return video
|
||||
except Exception as e:
|
||||
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
|
||||
print(f"Error loading video {video_path}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
async def serve(address):
|
||||
|
||||
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
|
||||
git clone https://github.com/vllm-project/vllm
|
||||
fi
|
||||
pushd vllm
|
||||
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
|
||||
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
|
||||
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||
popd
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
accelerate
|
||||
torch
|
||||
torch==2.4.1+rocm6.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.66.2
|
||||
grpcio==1.67.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -2,6 +2,7 @@ package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
@@ -77,6 +78,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
switch ct := message.Content.(type) {
|
||||
case string:
|
||||
protoMessages[i].Content = ct
|
||||
case []interface{}:
|
||||
// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
|
||||
data, _ := json.Marshal(ct)
|
||||
resultData := []struct {
|
||||
Text string `json:"text"`
|
||||
}{}
|
||||
json.Unmarshal(data, &resultData)
|
||||
for _, r := range resultData {
|
||||
protoMessages[i].Content += r.Text
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
|
||||
}
|
||||
|
||||
@@ -28,5 +28,6 @@ The list below is a list of software that integrates with LocalAI.
|
||||
- https://github.com/cedriking/spark
|
||||
- [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
|
||||
- [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
|
||||
- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases
|
||||
|
||||
Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.21.1"
|
||||
"version": "v2.22.0"
|
||||
}
|
||||
|
||||
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
Submodule docs/themes/hugo-theme-relearn updated: d5a0ee04ad...007cc20686
@@ -1,4 +1,4 @@
|
||||
llama_index==0.11.16
|
||||
llama_index==0.11.17
|
||||
requests==2.32.3
|
||||
weaviate_client==4.8.1
|
||||
transformers
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
langchain==0.3.2
|
||||
openai==1.51.1
|
||||
langchain==0.3.3
|
||||
openai==1.51.2
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
langchain==0.3.1
|
||||
openai==1.51.1
|
||||
chromadb==0.5.11
|
||||
llama-index==0.11.16
|
||||
langchain==0.3.3
|
||||
openai==1.51.2
|
||||
chromadb==0.5.13
|
||||
llama-index==0.11.17
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM python:3.12-bullseye
|
||||
FROM python:3.13-bullseye
|
||||
COPY ./langchainpy-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
aiohttp==3.10.9
|
||||
aiohttp==3.10.10
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.3
|
||||
attrs==24.2.0
|
||||
certifi==2024.8.30
|
||||
charset-normalizer==3.3.2
|
||||
charset-normalizer==3.4.0
|
||||
colorama==0.4.6
|
||||
dataclasses-json==0.6.7
|
||||
debugpy==1.8.6
|
||||
debugpy==1.8.7
|
||||
frozenlist==1.4.1
|
||||
greenlet==3.1.1
|
||||
idna==3.10
|
||||
langchain==0.3.2
|
||||
langchain-community==0.3.1
|
||||
langchain==0.3.3
|
||||
langchain-community==0.3.2
|
||||
marshmallow==3.22.0
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.1.0
|
||||
mypy-extensions==1.0.0
|
||||
numexpr==2.10.1
|
||||
numpy==2.1.1
|
||||
openai==1.51.1
|
||||
numpy==2.1.2
|
||||
openai==1.51.2
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging>=23.2
|
||||
pydantic==2.9.2
|
||||
@@ -30,4 +30,4 @@ tqdm==4.66.5
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.12.2
|
||||
urllib3==2.2.3
|
||||
yarl==1.13.1
|
||||
yarl==1.15.2
|
||||
|
||||
@@ -182,6 +182,34 @@
|
||||
- filename: Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
|
||||
sha256: 7f45fa79bc6c9847ef9fbad08c3bb5a0f2dbb56d2e2200a5d37b260a57274e55
|
||||
uri: huggingface://QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-chibi-3b"
|
||||
icon: https://huggingface.co/AELLM/Llama-3.2-Chibi-3B/resolve/main/chibi.jpg
|
||||
urls:
|
||||
- https://huggingface.co/AELLM/Llama-3.2-Chibi-3B
|
||||
- https://huggingface.co/mradermacher/Llama-3.2-Chibi-3B-GGUF
|
||||
description: |
|
||||
Small parameter LLMs are ideal for navigating the complexities of the Japanese language, which involves multiple character systems like kanji, hiragana, and katakana, along with subtle social cues. Despite their smaller size, these models are capable of delivering highly accurate and context-aware results, making them perfect for use in environments where resources are constrained. Whether deployed on mobile devices with limited processing power or in edge computing scenarios where fast, real-time responses are needed, these models strike the perfect balance between performance and efficiency, without sacrificing quality or speed.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3.2-Chibi-3B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3.2-Chibi-3B.Q4_K_M.gguf
|
||||
sha256: 4b594cd5f66181202713f1cf97ce2f86d0acfa1b862a64930d5f512c45640a2f
|
||||
uri: huggingface://mradermacher/Llama-3.2-Chibi-3B-GGUF/Llama-3.2-Chibi-3B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-3b-reasoning-time"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF
|
||||
description: |
|
||||
Lyte/Llama-3.2-3B-Reasoning-Time is a large language model with 3.2 billion parameters, designed for reasoning and time-based tasks in English. It is based on the Llama architecture and has been quantized using the GGUF format by mradermacher.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
|
||||
sha256: 80b10e1a5c6e27f6d8cf08c3472af2b15a9f63ebf8385eedfe8615f85116c73f
|
||||
uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
|
||||
- &qwen25
|
||||
## Qwen2.5
|
||||
name: "qwen2.5-14b-instruct"
|
||||
@@ -472,6 +500,134 @@
|
||||
- filename: qwen2.5-7b-ins-v3-Q4_K_M.gguf
|
||||
sha256: 9c23734072714a4886c0386ae0ff07a5e940d67ad52278e2ed689fec44e1e0c8
|
||||
uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "supernova-medius"
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
|
||||
description: |
|
||||
Arcee-SuperNova-Medius is a 14B parameter language model developed by Arcee.ai, built on the Qwen2.5-14B-Instruct architecture. This unique model is the result of a cross-architecture distillation pipeline, combining knowledge from both the Qwen2.5-72B-Instruct model and the Llama-3.1-405B-Instruct model. By leveraging the strengths of these two distinct architectures, SuperNova-Medius achieves high-quality instruction-following and complex reasoning capabilities in a mid-sized, resource-efficient form.
|
||||
|
||||
SuperNova-Medius is designed to excel in a variety of business use cases, including customer support, content creation, and technical assistance, while maintaining compatibility with smaller hardware configurations. It’s an ideal solution for organizations looking for advanced capabilities without the high resource requirements of larger models like our SuperNova-70B.
|
||||
overrides:
|
||||
parameters:
|
||||
model: SuperNova-Medius-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: SuperNova-Medius-Q4_K_M.gguf
|
||||
sha256: aaa4bf3451bc900f186fd4b6b3a6a26bfd40c85908f605db76b92e58aadcc864
|
||||
uri: huggingface://arcee-ai/SuperNova-Medius-GGUF/SuperNova-Medius-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "eva-qwen2.5-14b-v0.1-i1"
|
||||
urls:
|
||||
- https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
|
||||
- https://huggingface.co/mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF
|
||||
description: |
|
||||
A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data.
|
||||
It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
|
||||
sha256: 4e9665d4f83cd97efb42c8427f9c09be93b72e23a0364c91ad0b5de8056f2795
|
||||
uri: huggingface://mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF/EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "cursorcore-qw2.5-7b-i1"
|
||||
urls:
|
||||
- https://huggingface.co/TechxGenus/CursorCore-QW2.5-7B
|
||||
- https://huggingface.co/mradermacher/CursorCore-QW2.5-7B-i1-GGUF
|
||||
description: |
|
||||
CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
|
||||
overrides:
|
||||
parameters:
|
||||
model: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
|
||||
sha256: 81868f4edb4ec1a61debde1dbdebc02b407930ee19a6d946ff801afba840a102
|
||||
uri: huggingface://mradermacher/CursorCore-QW2.5-7B-i1-GGUF/CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "cursorcore-qw2.5-1.5b-lc-i1"
|
||||
urls:
|
||||
- https://huggingface.co/TechxGenus/CursorCore-QW2.5-1.5B-LC
|
||||
- https://huggingface.co/mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF
|
||||
description: |
|
||||
CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
|
||||
overrides:
|
||||
parameters:
|
||||
model: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
|
||||
sha256: 185d720c810f7345ef861ad8eef1199bb15afa8e4f3c03bd5ffd476cfa465127
|
||||
uri: huggingface://mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF/CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "edgerunner-command-nested-i1"
|
||||
urls:
|
||||
- https://huggingface.co/edgerunner-ai/EdgeRunner-Command-Nested
|
||||
- https://huggingface.co/mradermacher/EdgeRunner-Command-Nested-i1-GGUF
|
||||
description: |
|
||||
EdgeRunner-Command-Nested is an advanced large language model designed specifically for handling complex nested function calls. Initialized from Qwen2.5-7B-Instruct, further enhanced by the integration of the Hermes function call template and additional training on a specialized dataset (based on TinyAgent). This extra dataset focuses on personal domain applications, providing the model with a robust understanding of nested function scenarios that are typical in complex user interactions.
|
||||
overrides:
|
||||
parameters:
|
||||
model: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
|
||||
sha256: a1cc4d2b601dc20e58cbb549bd3e9bc460995840c0aaf1cd3c1cb5414c900ac7
|
||||
uri: huggingface://mradermacher/EdgeRunner-Command-Nested-i1-GGUF/EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "tsunami-0.5x-7b-instruct-i1"
|
||||
icon: https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct/resolve/main/Tsunami.webp
|
||||
urls:
|
||||
- https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct
|
||||
- https://huggingface.co/mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF
|
||||
description: |
|
||||
TSUNAMI: Transformative Semantic Understanding and Natural Augmentation Model for Intelligence.
|
||||
|
||||
TSUNAMI full name was created by ChatGPT.
|
||||
infomation
|
||||
|
||||
Tsunami-0.5x-7B-Instruct is Thai Large Language Model that fine-tuned from Qwen2.5-7B around 100,000 rows in Thai dataset.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
|
||||
sha256: 22e2003ecec7f1e91f2e9aaec334613c0f37fb3000d0e628b5a9980e53322fa7
|
||||
uri: huggingface://mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF/Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "qevacot-7b-v2"
|
||||
urls:
|
||||
- https://huggingface.co/bunnycore/Qevacot-7B-v2
|
||||
- https://huggingface.co/mradermacher/Qevacot-7B-v2-GGUF
|
||||
description: |
|
||||
This model was merged using the TIES merge method using Qwen/Qwen2.5-7B as a base.
|
||||
The following models were included in the merge:
|
||||
c10x/CoT-2.5
|
||||
EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1
|
||||
huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
|
||||
Cran-May/T.E-8.1
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qevacot-7B-v2.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qevacot-7B-v2.Q4_K_M.gguf
|
||||
sha256: a45b3d3b74bc68a5c7ac07d251cdeff671e64085d1816cd86fca6cfb7eab204e
|
||||
uri: huggingface://mradermacher/Qevacot-7B-v2-GGUF/Qevacot-7B-v2.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "meissa-qwen2.5-7b-instruct"
|
||||
icon: https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct/resolve/main/meissa.jpg
|
||||
urls:
|
||||
- https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct
|
||||
- https://huggingface.co/QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF
|
||||
description: |
|
||||
Meissa is designated Lambda Orionis, forms Orion's head, and is a multiple star with a combined apparent magnitude of 3.33. Its name means the "shining one".
|
||||
This model is fine tuned over writing and role playing datasets (maybe the first on qwen2.5-7b), aiming to enhance model's performance in novel writing and roleplaying.
|
||||
The model is fine-tuned over Orion-zhen/Qwen2.5-7B-Instruct-Uncensored
|
||||
overrides:
|
||||
parameters:
|
||||
model: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
|
||||
sha256: 632b10d5c0e98bc8d53295886da2d57772a54bb6f6fa01d458e9e8c7fa9c905a
|
||||
uri: huggingface://QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF/Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
|
||||
- &archfunct
|
||||
license: apache-2.0
|
||||
tags:
|
||||
@@ -1412,6 +1568,216 @@
|
||||
- filename: NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
|
||||
sha256: 9bd46a06093448b143bd2775f0fb1b1b172c851fafdce31289e13b7dfc23a0d7
|
||||
uri: huggingface://QuantFactory/NIHAPPY-L3.1-8B-v0.09-GGUF/NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama3.1-flammades-70b"
|
||||
icon: https://huggingface.co/flammenai/Flammades-Mistral-7B/resolve/main/flammades.png?download=true
|
||||
urls:
|
||||
- https://huggingface.co/flammenai/Llama3.1-Flammades-70B
|
||||
- https://huggingface.co/mradermacher/Llama3.1-Flammades-70B-GGUF
|
||||
description: |
|
||||
nbeerbower/Llama3.1-Gutenberg-Doppel-70B finetuned on flammenai/Date-DPO-NoAsterisks and jondurbin/truthy-dpo-v0.1.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama3.1-Flammades-70B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama3.1-Flammades-70B.Q4_K_M.gguf
|
||||
sha256: f602ed006d0059ac87c6ce5904a7cc6f4b4f290886a1049f96b5b2c561ab5a89
|
||||
uri: huggingface://mradermacher/Llama3.1-Flammades-70B-GGUF/Llama3.1-Flammades-70B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama3.1-gutenberg-doppel-70b"
|
||||
# chatml
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://huggingface.co/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/resolve/main/doppel-header?download=true
|
||||
urls:
|
||||
- https://huggingface.co/nbeerbower/Llama3.1-Gutenberg-Doppel-70B
|
||||
- https://huggingface.co/mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF
|
||||
description: |
|
||||
mlabonne/Hermes-3-Llama-3.1-70B-lorablated finetuned on jondurbin/gutenberg-dpo-v0.1 and nbeerbower/gutenberg2-dpo.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
|
||||
sha256: af558f954fa26c5bb75352178cb815bbf268f01c0ca0b96f2149422d4c19511b
|
||||
uri: huggingface://mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF/Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-8b-arliai-formax-v1.0-iq-arm-imatrix"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://iili.io/2HmlLn2.md.png
|
||||
urls:
|
||||
- https://huggingface.co/Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix
|
||||
description: |
|
||||
Quants for ArliAI/Llama-3.1-8B-ArliAI-Formax-v1.0.
|
||||
|
||||
"Formax is a model that specializes in following response format instructions. Tell it the format of it's response and it will follow it perfectly. Great for data processing and dataset creation tasks."
|
||||
|
||||
"It is also a highly uncensored model that will follow your instructions very well."
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
|
||||
sha256: b548ad47caf7008a697afb3556190359529f5a05ec0e4e48ef992c7869e14255
|
||||
uri: huggingface://Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix/Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hermes-3-llama-3.1-70b-lorablated"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-70B-lorablated
|
||||
- https://huggingface.co/mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF
|
||||
description: |
|
||||
This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-70B using lorablation.
|
||||
The recipe is based on @grimjim's grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter (special thanks):
|
||||
Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3 (meta-llama/Meta-Llama-3-70B-Instruct) and an abliterated Llama 3.1 (failspy/Meta-Llama-3.1-70B-Instruct-abliterated).
|
||||
Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-70B to abliterate it.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
|
||||
sha256: 9294875ae3b8822855072b0f710ce800536d144cf303a91bcb087c4a307b578d
|
||||
uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hermes-3-llama-3.1-8b-lorablated"
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
|
||||
description: |
|
||||
This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-8B using lorablation.
|
||||
The recipe is simple:
|
||||
Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3.1 (meta-llama/Meta-Llama-3-8B-Instruct) and an abliterated Llama 3.1 (mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated).
|
||||
Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-8B to abliterate it.
|
||||
overrides:
|
||||
parameters:
|
||||
model: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
|
||||
sha256: 8cff9d399a0583616fe1f290da6daa091ab5c5493d0e173a8fffb45202d79417
|
||||
uri: huggingface://mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF/hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "doctoraifinetune-3.1-8b-i1"
|
||||
urls:
|
||||
- https://huggingface.co/huzaifa525/Doctoraifinetune-3.1-8B
|
||||
- https://huggingface.co/mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF
|
||||
description: |
|
||||
This is a fine-tuned version of the Meta-Llama-3.1-8B-bnb-4bit model, specifically adapted for the medical field. It has been trained using a dataset that provides extensive information on diseases, symptoms, and treatments, making it ideal for AI-powered healthcare tools such as medical chatbots, virtual assistants, and diagnostic support systems.
|
||||
Key Features
|
||||
|
||||
Disease Diagnosis: Accurately identifies diseases based on symptoms provided by the user.
|
||||
Symptom Analysis: Breaks down and interprets symptoms to provide a comprehensive medical overview.
|
||||
Treatment Recommendations: Suggests treatments and remedies according to medical conditions.
|
||||
|
||||
Dataset
|
||||
|
||||
The model is fine-tuned on 2000 rows from a dataset consisting of 272k rows. This dataset includes rich information about diseases, symptoms, and their corresponding treatments. The model is continuously being updated and will be further trained on the remaining data in future releases to improve accuracy and capabilities.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
|
||||
sha256: 282456efcb6c7e54d34ac25ae7fc022a94152ed77281ae4625b9628091e0a3d6
|
||||
uri: huggingface://mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF/Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "astral-fusion-neural-happy-l3.1-8b"
|
||||
urls:
|
||||
- https://huggingface.co/ZeroXClem/Astral-Fusion-Neural-Happy-L3.1-8B
|
||||
- https://huggingface.co/mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF
|
||||
description: |
|
||||
Astral-Fusion-Neural-Happy-L3.1-8B is a celestial blend of magic, creativity, and dynamic storytelling. Designed to excel in instruction-following, immersive roleplaying, and magical narrative generation, this model is a fusion of the finest qualities from Astral-Fusion, NIHAPPY, and NeuralMahou. ✨🚀
|
||||
|
||||
This model is perfect for anyone seeking a cosmic narrative experience, with the ability to generate both precise instructional content and fantastical stories in one cohesive framework. Whether you're crafting immersive stories, creating AI roleplaying characters, or working on interactive storytelling, this model brings out the magic. 🌟
|
||||
overrides:
|
||||
parameters:
|
||||
model: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
|
||||
sha256: 14a3b07c1723ef1ca24f99382254b1227d95974541e23792a4e7ff621896055d
|
||||
uri: huggingface://mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF/Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "mahou-1.5-llama3.1-70b-i1"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
|
||||
urls:
|
||||
- https://huggingface.co/flammenai/Mahou-1.5-llama3.1-70B
|
||||
- https://huggingface.co/mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF
|
||||
description: |
|
||||
Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
|
||||
sha256: c2711c4c9c8d011edbeaa391b4418d433e273a318d1de3dbdda9b85baf4996f2
|
||||
uri: huggingface://mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF/Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-nemotron-70b-instruct-hf"
|
||||
urls:
|
||||
- https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
|
||||
- https://huggingface.co/mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF
|
||||
description: |
|
||||
Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries.
|
||||
|
||||
This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo
|
||||
|
||||
As of 1 Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet.
|
||||
|
||||
This model was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model as the initial policy.
|
||||
|
||||
Llama-3.1-Nemotron-70B-Instruct-HF has been converted from Llama-3.1-Nemotron-70B-Instruct to support it in the HuggingFace Transformers codebase. Please note that evaluation results might be slightly different from the Llama-3.1-Nemotron-70B-Instruct as evaluated in NeMo-Aligner, which the evaluation results below are based on.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
|
||||
sha256: b6b80001b849e3c59c39b09508c018b35b491a5c7bbafafa23f2fc04243f3e30
|
||||
uri: huggingface://mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "l3.1-etherealrainbow-v1.0-rc1-8b"
|
||||
icon: https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B/resolve/main/header.png
|
||||
urls:
|
||||
- https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B
|
||||
- https://huggingface.co/mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF
|
||||
description: |
|
||||
Ethereal Rainbow v1.0 is the sequel to the popular Llama 3 8B merge, EtherealRainbow v0.3. Instead of a straight merge of other peoples' models, v1.0 is a finetune on the Instruct model, using 245 million tokens of training data (approx 177 million of these tokens are my own novel datasets).
|
||||
|
||||
This model is designed to be suitable for creative writing and roleplay, and to push the boundaries of what's possible with an 8B model. This RC is not a finished product, but your feedback will drive the creation of better models.
|
||||
|
||||
This is a release candidate model. It has some known issues and probably some unknown ones too, because the purpose of these early releases is to seek feedback.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
|
||||
sha256: c5556b2563112e512acca171415783f0988545b02c1834696c1cc35952def72c
|
||||
uri: huggingface://mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF/L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "theia-llama-3.1-8b-v1"
|
||||
urls:
|
||||
- https://huggingface.co/Chainbase-Labs/Theia-Llama-3.1-8B-v1
|
||||
- https://huggingface.co/QuantFactory/Theia-Llama-3.1-8B-v1-GGUF
|
||||
description: |
|
||||
Theia-Llama-3.1-8B-v1 is an open-source large language model (LLM) trained specifically in the cryptocurrency domain. It was fine-tuned from the Llama-3.1-8B base model using a dataset curated from top 2000 cryptocurrency projects and comprehensive research reports to specialize in crypto-related tasks. Theia-Llama-3.1-8B-v1 has been quantized to optimize it for efficient deployment and reduced memory footprint. It's benchmarked highly for crypto knowledge comprehension and generation, knowledge coverage, and reasoning capabilities. The system prompt used for its training is "You are a helpful assistant who will answer crypto related questions." The recommended parameters for performance include sequence length of 256, temperature of 0, top-k-sampling of -1, top-p of 1, and context window of 39680.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
|
||||
sha256: db876d033f86f118b49a1f1006e5d078d494c93b73c7e595bd10ca789a0c8fdb
|
||||
uri: huggingface://QuantFactory/Theia-Llama-3.1-8B-v1-GGUF/Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
icon: https://huggingface.co/Delta-Vector/Baldur-8B/resolve/main/Baldur.jpg
|
||||
name: "baldur-8b"
|
||||
urls:
|
||||
- https://huggingface.co/QuantFactory/Baldur-8B-GGUF
|
||||
- https://huggingface.co/QuantFactory/Baldur-8B-GGUF
|
||||
description: |
|
||||
An finetune of the L3.1 instruct distill done by Arcee, The intent of this model is to have differing prose then my other releases, in my testing it has achieved this and avoiding using common -isms frequently and has a differing flavor then my other models.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Baldur-8B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Baldur-8B.Q4_K_M.gguf
|
||||
sha256: 645b393fbac5cd17ccfd66840a3a05c3930e01b903dd1535f0347a74cc443fc7
|
||||
uri: huggingface://QuantFactory/Baldur-8B-GGUF/Baldur-8B.Q4_K_M.gguf
|
||||
- &deepseek
|
||||
## Deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
@@ -1437,6 +1803,20 @@
|
||||
- filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
|
||||
sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
|
||||
uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek
|
||||
name: "cursorcore-ds-6.7b-i1"
|
||||
urls:
|
||||
- https://huggingface.co/TechxGenus/CursorCore-DS-6.7B
|
||||
- https://huggingface.co/mradermacher/CursorCore-DS-6.7B-i1-GGUF
|
||||
description: |
|
||||
CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
|
||||
overrides:
|
||||
parameters:
|
||||
model: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
|
||||
sha256: 71b94496be79e5bc45c23d6aa6c242f5f1d3625b4f00fe91d781d381ef35c538
|
||||
uri: huggingface://mradermacher/CursorCore-DS-6.7B-i1-GGUF/CursorCore-DS-6.7B.i1-Q4_K_M.gguf
|
||||
- name: "archangel_sft_pythia2-8b"
|
||||
url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
|
||||
icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
|
||||
@@ -2022,6 +2402,76 @@
|
||||
- filename: MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
|
||||
sha256: cea68768dff58b553974b755bb40ef790ab8b86866d9b5c46bc2e6c3311b876a
|
||||
uri: huggingface://Lewdiculous/MN-BackyardAI-Party-12B-v1-GGUF-IQ-ARM-Imatrix/MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "ml-ms-etheris-123b"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/ieEjL3TxpDM3WAZQcya6E.png
|
||||
urls:
|
||||
- https://huggingface.co/Steelskull/ML-MS-Etheris-123B
|
||||
- https://huggingface.co/mradermacher/ML-MS-Etheris-123B-GGUF
|
||||
description: |
|
||||
This model merges the robust storytelling of mutiple models while attempting to maintain intelligence. The final model was merged after Model Soup with DELLA to add some specal sause.
|
||||
- model: NeverSleep/Lumimaid-v0.2-123B
|
||||
- model: TheDrummer/Behemoth-123B-v1
|
||||
- model: migtissera/Tess-3-Mistral-Large-2-123B
|
||||
- model: anthracite-org/magnum-v2-123b
|
||||
Use Mistral, ChatML, or Meth Format
|
||||
overrides:
|
||||
parameters:
|
||||
model: ML-MS-Etheris-123B.Q2_K.gguf
|
||||
files:
|
||||
- filename: ML-MS-Etheris-123B.Q2_K.gguf
|
||||
sha256: a17c5615413b5c9c8d01cf55386573d0acd00e01f6e2bcdf492624c73c593fc3
|
||||
uri: huggingface://mradermacher/ML-MS-Etheris-123B-GGUF/ML-MS-Etheris-123B.Q2_K.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "mn-lulanum-12b-fix-i1"
|
||||
urls:
|
||||
- https://huggingface.co/djuna/MN-Lulanum-12B-FIX
|
||||
- https://huggingface.co/mradermacher/MN-Lulanum-12B-FIX-i1-GGUF
|
||||
description: |
|
||||
This model was merged using the della_linear merge method using unsloth/Mistral-Nemo-Base-2407 as a base.
|
||||
The following models were included in the merge:
|
||||
VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct
|
||||
anthracite-org/magnum-v2.5-12b-kto
|
||||
Undi95/LocalC-12B-e2.0
|
||||
NeverSleep/Lumimaid-v0.2-12B
|
||||
overrides:
|
||||
parameters:
|
||||
model: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
|
||||
sha256: 7e24d57249059d45bb508565ec3055e585a4e658c1815c67ea92397acc6aa775
|
||||
uri: huggingface://mradermacher/MN-Lulanum-12B-FIX-i1-GGUF/MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "tor-8b"
|
||||
icon: https://huggingface.co/Delta-Vector/Tor-8B/resolve/main/FinalTor8B.jpg
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/QuantFactory/Tor-8B-GGUF
|
||||
description: |
|
||||
An earlier checkpoint of Darkens-8B using the same configuration that i felt was different enough from it's 4 epoch cousin to release, Finetuned ontop of the Prune/Distill NeMo 8B done by Nvidia, This model aims to have generally good prose and writing while not falling into claude-isms.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Tor-8B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Tor-8B.Q4_K_M.gguf
|
||||
sha256: 9dd64bd886aa7682b6179340449b38feda405b44722ef7ac752cedb807af370e
|
||||
uri: huggingface://QuantFactory/Tor-8B-GGUF/Tor-8B.Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "darkens-8b"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Darkens-8B
|
||||
- https://huggingface.co/QuantFactory/Darkens-8B-GGUF
|
||||
description: |
|
||||
This is the fully cooked, 4 epoch version of Tor-8B, this is an experimental version, despite being trained for 4 epochs, the model feels fresh and new and is not overfit, This model aims to have generally good prose and writing while not falling into claude-isms, it follows the actions "dialogue" format heavily.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Darkens-8B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Darkens-8B.Q4_K_M.gguf
|
||||
sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
|
||||
uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
|
||||
- &mudler
|
||||
### START mudler's LocalAI specific-models
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||
@@ -2649,6 +3099,20 @@
|
||||
- filename: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
|
||||
sha256: f14c5b9373d4058f0f812c6c34184addeb4aeeecb02a7bbcf9844d9afc8d0066
|
||||
uri: huggingface://QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF/Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
|
||||
- !!merge <<: *gemma
|
||||
name: "apollo2-9b"
|
||||
url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/Apollo2-9B-GGUF
|
||||
description: |
|
||||
Covering 12 Major Languages including English, Chinese, French, Hindi, Spanish, Arabic, Russian, Japanese, Korean, German, Italian, Portuguese and 38 Minor Languages So far.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Apollo2-9B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Apollo2-9B.Q4_K_M.gguf
|
||||
sha256: 9fdb63f78e574558a4f33782eca88716eea28e90ea3ae36c381769cde6b81e0f
|
||||
uri: huggingface://mradermacher/Apollo2-9B-GGUF/Apollo2-9B.Q4_K_M.gguf
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
@@ -4248,6 +4712,19 @@
|
||||
- filename: Yi-Coder-9B.Q4_K_M.gguf
|
||||
sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
|
||||
uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
|
||||
- !!merge <<: *yi-chat
|
||||
name: "cursorcore-yi-9b"
|
||||
urls:
|
||||
- https://huggingface.co/mradermacher/CursorCore-Yi-9B-GGUF
|
||||
description: |
|
||||
CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
|
||||
overrides:
|
||||
parameters:
|
||||
model: CursorCore-Yi-9B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: CursorCore-Yi-9B.Q4_K_M.gguf
|
||||
sha256: 943bf59b34bee34afae8390c1791ccbc7c742e11a4d04d538a699754eb92215e
|
||||
uri: huggingface://mradermacher/CursorCore-Yi-9B-GGUF/CursorCore-Yi-9B.Q4_K_M.gguf
|
||||
- &vicuna-chat
|
||||
## LLama2 and derivatives
|
||||
### Start Fimbulvetr
|
||||
@@ -5175,6 +5652,26 @@
|
||||
- filename: L3-8B-Niitama-v1.i1-Q4_K_M.gguf
|
||||
sha256: 8c62f831db2a6e34aa75459fe8a98815199ecc2dac1892a460b8b86363b6826e
|
||||
uri: huggingface://mradermacher/L3-8B-Niitama-v1-i1-GGUF/L3-8B-Niitama-v1.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
icon: https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/resolve/main/Images/LLAMA-3_8B_Unaligned_BETA.png
|
||||
name: "llama-3_8b_unaligned_beta"
|
||||
urls:
|
||||
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA
|
||||
- https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF
|
||||
description: |
|
||||
In the Wild West of the AI world, the real titans never hit their deadlines, no sir!
|
||||
The projects that finish on time? They’re the soft ones—basic, surface-level shenanigans. But the serious projects? They’re always delayed. You set a date, then reality hits: not gonna happen, scope creep that mutates the roadmap, unexpected turn of events that derails everything.
|
||||
It's only been 4 months since the Alpha was released, and half a year since the project started, but it felt like nearly a decade.
|
||||
Deadlines shift, but with each delay, you’re not failing—you’re refining, and becoming more ambitious. A project that keeps getting pushed isn’t late; it’s just gaining weight, becoming something worth building, and truly worth seeing all the way through. The longer it’s delayed, the more serious it gets.
|
||||
LLAMA-3_8B_Unaligned is a serious project, and thank god, the Beta is finally here.
|
||||
I love you all unconditionally, thanks for all the support and kind words!
|
||||
overrides:
|
||||
parameters:
|
||||
model: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
|
||||
sha256: 5b88fb4537339996c04e4a1b6ef6a2d555c4103b6378e273ae9c6c5e77af67eb
|
||||
uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF/LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
|
||||
- &chatml
|
||||
### ChatML
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
@@ -5707,6 +6204,40 @@
|
||||
- filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
|
||||
sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
|
||||
uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *phi-3
|
||||
name: "phi-3.5-mini-titanfusion-0.2"
|
||||
urls:
|
||||
- https://huggingface.co/bunnycore/Phi-3.5-mini-TitanFusion-0.2
|
||||
- https://huggingface.co/mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF
|
||||
description: |
|
||||
This model was merged using the TIES merge method using microsoft/Phi-3.5-mini-instruct as a base.
|
||||
The following models were included in the merge:
|
||||
nbeerbower/phi3.5-gutenberg-4B
|
||||
ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1
|
||||
bunnycore/Phi-3.5-Mini-Hyper
|
||||
bunnycore/Phi-3.5-Mini-Hyper + bunnycore/Phi-3.1-EvolKit-lora
|
||||
bunnycore/Phi-3.5-Mini-Sonet-RP
|
||||
bunnycore/Phi-3.5-mini-TitanFusion-0.1
|
||||
overrides:
|
||||
parameters:
|
||||
model: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
|
||||
sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234
|
||||
uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
|
||||
- !!merge <<: *phi-3
|
||||
name: "phi-3-vision:vllm"
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
|
||||
description: |
|
||||
Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
|
||||
- !!merge <<: *phi-3
|
||||
name: "phi-3.5-vision:vllm"
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
|
||||
override:
|
||||
parameters:
|
||||
model: microsoft/Phi-3.5-vision-instruct
|
||||
description: |
|
||||
Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
|
||||
- &hermes-2-pro-mistral
|
||||
### START Hermes
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
|
||||
23
gallery/phi-3-vision.yaml
Normal file
23
gallery/phi-3-vision.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
---
|
||||
name: "phi3-vision"
|
||||
|
||||
config_file: |
|
||||
name: phi3-vision
|
||||
backend: vllm
|
||||
parameters:
|
||||
model: microsoft/Phi-3-vision-128k-instruct
|
||||
trust_remote_code: true
|
||||
max_model_len: 32768
|
||||
template:
|
||||
chat_message: |-
|
||||
<|{{ .RoleName }}|>
|
||||
{{.Content}}<|end|>
|
||||
chat: >-
|
||||
{{.Input}}
|
||||
|
||||
<|assistant|>
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
use_tokenizer_template: false
|
||||
image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
|
||||
@@ -14,6 +14,10 @@ config_file: |
|
||||
system: "System: "
|
||||
assistant: "Assistant: "
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|end|>
|
||||
- <|endoftext|>
|
||||
- <eos>
|
||||
template:
|
||||
completion: |
|
||||
Complete the following sentence: {{.Input}}
|
||||
|
||||
@@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
|
||||
// No GPU found or no specific binaries found, try to load the CPU variant(s)
|
||||
|
||||
// Select the Fallback by default
|
||||
selectedProcess := backendPath(assetDir, LLamaCPPFallback)
|
||||
// Select a binary based on availability/capability
|
||||
selectedProcess := ""
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
|
||||
log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPPFallback)
|
||||
}
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that instead
|
||||
// As a reminder, we do ultimately attempt again with the fallback variant
|
||||
// If things fail with what we select here
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
|
||||
log.Debug().Msgf("[%s] attempting to load with native variant", backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPP)
|
||||
}
|
||||
|
||||
// IF we find any optimized binary, we use that
|
||||
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
|
||||
@@ -269,7 +283,7 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the binary exists!
|
||||
// Safety measure: check if the binary exists otherwise return empty string
|
||||
if _, err := os.Stat(selectedProcess); err == nil {
|
||||
return selectedProcess
|
||||
}
|
||||
@@ -277,6 +291,21 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
return ""
|
||||
}
|
||||
|
||||
func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
|
||||
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
|
||||
// We failed somehow starting the binary. For instance, could be that we are missing
|
||||
// some libraries if running in binary-only mode.
|
||||
// In this case, we attempt to load the model with the fallback variant.
|
||||
|
||||
// If not llama-cpp backend, return the error immediately
|
||||
if backend != LLamaCPP {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
|
||||
return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
// It also loads the model
|
||||
func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
|
||||
@@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
|
||||
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
|
||||
if err != nil {
|
||||
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
|
||||
// We failed somehow starting the binary. For instance, could be that we are missing
|
||||
// some libraries if running in binary-only mode.
|
||||
// In this case, we attempt to load the model with the fallback variant.
|
||||
|
||||
// If not llama-cpp backend, return error immediately
|
||||
if backend != LLamaCPP {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Otherwise attempt with fallback
|
||||
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
|
||||
model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
|
||||
model, err = attemptLoadingOnFailure(backend, ml, o, err)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -3,11 +3,13 @@ package templates
|
||||
import (
|
||||
"bytes"
|
||||
"text/template"
|
||||
|
||||
"github.com/Masterminds/sprig/v3"
|
||||
)
|
||||
|
||||
func TemplateMultiModal(templateString string, templateID int, text string) (string, error) {
|
||||
// compile the template
|
||||
tmpl, err := template.New("template").Parse(templateString)
|
||||
tmpl, err := template.New("template").Funcs(sprig.FuncMap()).Parse(templateString)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user