mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 03:32:40 -05:00
Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8fb95686af | ||
|
|
4132085c01 | ||
|
|
c14f1ffcfd | ||
|
|
07cca4b69a | ||
|
|
dd927c36f6 | ||
|
|
052f42e926 | ||
|
|
30d43588ab | ||
|
|
d21ec22f74 | ||
|
|
04fecd634a | ||
|
|
33c14198db | ||
|
|
967c2727e3 | ||
|
|
f41f30ad92 | ||
|
|
e77340e8a5 | ||
|
|
d51a3090f7 | ||
|
|
1bf3bc932c | ||
|
|
564a47da4e | ||
|
|
c37ee93ff2 | ||
|
|
f4b65db4e7 | ||
|
|
f5fa8e6649 | ||
|
|
570e39bdcf | ||
|
|
2ebe37b671 | ||
|
|
dca685f784 | ||
|
|
84ebf2a2c9 | ||
|
|
ce5662ba90 | ||
|
|
9878f27813 | ||
|
|
f2b9452ec4 | ||
|
|
585da99c52 | ||
|
|
fd4f432079 | ||
|
|
238c68c57b | ||
|
|
04fbf5cb82 | ||
|
|
c85d559919 | ||
|
|
b5efc4f89e | ||
|
|
3f9c09a4c5 | ||
|
|
4a84660475 | ||
|
|
737248256e | ||
|
|
0ae334fc62 | ||
|
|
36c373b7c9 | ||
|
|
6afcb932b7 | ||
|
|
357bf571a3 | ||
|
|
e74ade9ebb |
26
.github/workflows/backend.yml
vendored
26
.github/workflows/backend.yml
vendored
@@ -489,6 +489,18 @@ jobs:
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-l4t-kokoro'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
skip-drivers: 'true'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# SYCL additional backends
|
||||
- build-type: 'intel'
|
||||
cuda-major-version: ""
|
||||
@@ -870,7 +882,7 @@ jobs:
|
||||
backend: "rfdetr"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
@@ -943,6 +955,18 @@ jobs:
|
||||
backend: "exllama2"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-l4t-arm64-chatterbox'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# runs out of space on the runner
|
||||
# - build-type: 'hipblas'
|
||||
# cuda-major-version: ""
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: securego/gosec@v2.22.8
|
||||
uses: securego/gosec@v2.22.9
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
10
Dockerfile
10
Dockerfile
@@ -78,6 +78,16 @@ RUN <<EOT bash
|
||||
fi
|
||||
EOT
|
||||
|
||||
# https://github.com/NVIDIA/Isaac-GR00T/issues/343
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
|
||||
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
|
||||
dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
|
||||
cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
|
||||
apt-get update && apt-get -y install cudss
|
||||
fi
|
||||
EOT
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||
apt-get update && \
|
||||
|
||||
3
Makefile
3
Makefile
@@ -429,6 +429,9 @@ docker-build-kitten-tts:
|
||||
docker-save-kitten-tts: backend-images
|
||||
docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
|
||||
|
||||
docker-save-chatterbox: backend-images
|
||||
docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
|
||||
|
||||
docker-build-kokoro:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=f432d8d83e7407073634c5e4fd81a3d23a10827f
|
||||
LLAMA_VERSION?=d64c8104f090b27b1f99e8da5995ffcfa6b726e2
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -14,7 +14,7 @@ CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF -DLLAMA_OPENSSL=OFF
|
||||
endif
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
|
||||
@@ -231,6 +231,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
params.n_ubatch = request->nbatch(); // fixes issue with reranking models being limited to 512 tokens (the default n_ubatch size); allows for setting the maximum input amount of tokens thereby avoiding this error "input is too large to process. increase the physical batch size"
|
||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||
//params.n_parallel = 1;
|
||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||
@@ -801,11 +802,6 @@ public:
|
||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array");
|
||||
}
|
||||
|
||||
// Tokenize the query
|
||||
auto tokenized_query = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, request->query(), /* add_special */ false, true);
|
||||
if (tokenized_query.size() != 1) {
|
||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"query\" must contain only a single prompt");
|
||||
}
|
||||
// Create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
@@ -817,10 +813,9 @@ public:
|
||||
documents.push_back(request->documents(i));
|
||||
}
|
||||
|
||||
auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, documents, /* add_special */ false, true);
|
||||
tasks.reserve(tokenized_docs.size());
|
||||
for (size_t i = 0; i < tokenized_docs.size(); i++) {
|
||||
auto tmp = format_rerank(ctx_server.vocab, tokenized_query[0], tokenized_docs[i]);
|
||||
tasks.reserve(documents.size());
|
||||
for (size_t i = 0; i < documents.size(); i++) {
|
||||
auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
|
||||
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
|
||||
task.id = ctx_server.queue_tasks.get_new_id();
|
||||
task.index = i;
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=44fa2f647cf2a6953493b21ab83b50d5f5dbc483
|
||||
WHISPER_CPP_VERSION?=7849aff7a2e1f4234aa31b01a1870906d5431959
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
|
||||
@@ -270,6 +270,7 @@
|
||||
nvidia: "cuda12-kokoro"
|
||||
intel: "intel-kokoro"
|
||||
amd: "rocm-kokoro"
|
||||
nvidia-l4t: "nvidia-l4t-kokoro"
|
||||
- &coqui
|
||||
urls:
|
||||
- https://github.com/idiap/coqui-ai-TTS
|
||||
@@ -352,6 +353,7 @@
|
||||
nvidia: "cuda12-chatterbox"
|
||||
metal: "metal-chatterbox"
|
||||
default: "cpu-chatterbox"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||
- &piper
|
||||
name: "piper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
||||
@@ -1049,6 +1051,7 @@
|
||||
nvidia: "cuda12-kokoro-development"
|
||||
intel: "intel-kokoro-development"
|
||||
amd: "rocm-kokoro-development"
|
||||
nvidia-l4t: "nvidia-l4t-kokoro-development"
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda11-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
|
||||
@@ -1074,6 +1077,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "nvidia-l4t-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-l4t-kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "nvidia-l4t-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-l4t-kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda11-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
|
||||
@@ -1227,6 +1240,7 @@
|
||||
nvidia: "cuda12-chatterbox-development"
|
||||
metal: "metal-chatterbox-development"
|
||||
default: "cpu-chatterbox-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
||||
@@ -1237,6 +1251,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "nvidia-l4t-arm64-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "nvidia-l4t-arm64-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "metal-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -14,9 +14,23 @@ import backend_pb2_grpc
|
||||
import torch
|
||||
import torchaudio as ta
|
||||
from chatterbox.tts import ChatterboxTTS
|
||||
|
||||
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
|
||||
import grpc
|
||||
|
||||
def is_float(s):
|
||||
"""Check if a string can be converted to float."""
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
def is_int(s):
|
||||
"""Check if a string can be converted to int."""
|
||||
try:
|
||||
int(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
@@ -47,6 +61,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
if not torch.cuda.is_available() and request.CUDA:
|
||||
return backend_pb2.Result(success=False, message="CUDA is not available")
|
||||
|
||||
|
||||
options = request.Options
|
||||
|
||||
# empty dict
|
||||
self.options = {}
|
||||
|
||||
# The options are a list of strings in this form optname:optvalue
|
||||
# We are storing all the options in a dict so we can use it later when
|
||||
# generating the images
|
||||
for opt in options:
|
||||
if ":" not in opt:
|
||||
continue
|
||||
key, value = opt.split(":")
|
||||
# if value is a number, convert it to the appropriate type
|
||||
if is_float(value):
|
||||
value = float(value)
|
||||
elif is_int(value):
|
||||
value = int(value)
|
||||
elif value.lower() in ["true", "false"]:
|
||||
value = value.lower() == "true"
|
||||
self.options[key] = value
|
||||
|
||||
self.AudioPath = None
|
||||
|
||||
if os.path.isabs(request.AudioPath):
|
||||
@@ -56,10 +92,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
# modify LoraAdapter to be relative to modelFileBase
|
||||
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||
|
||||
try:
|
||||
print("Preparing models, please wait", file=sys.stderr)
|
||||
self.model = ChatterboxTTS.from_pretrained(device=device)
|
||||
if "multilingual" in self.options:
|
||||
# remove key from options
|
||||
del self.options["multilingual"]
|
||||
self.model = ChatterboxMultilingualTTS.from_pretrained(device=device)
|
||||
else:
|
||||
self.model = ChatterboxTTS.from_pretrained(device=device)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
@@ -68,12 +108,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
def TTS(self, request, context):
|
||||
try:
|
||||
# Generate audio using ChatterboxTTS
|
||||
kwargs = {}
|
||||
|
||||
if "language" in self.options:
|
||||
kwargs["language_id"] = self.options["language"]
|
||||
if self.AudioPath is not None:
|
||||
wav = self.model.generate(request.text, audio_prompt_path=self.AudioPath)
|
||||
else:
|
||||
wav = self.model.generate(request.text)
|
||||
|
||||
kwargs["audio_prompt_path"] = self.AudioPath
|
||||
|
||||
# add options to kwargs
|
||||
kwargs.update(self.options)
|
||||
|
||||
# Generate audio using ChatterboxTTS
|
||||
wav = self.model.generate(request.text, **kwargs)
|
||||
# Save the generated audio
|
||||
ta.save(request.dst, wav, self.model.sr)
|
||||
|
||||
|
||||
@@ -15,5 +15,6 @@ fi
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||
|
||||
installRequirements
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts==0.1.2
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
#chatterbox-tts==0.1.4
|
||||
@@ -2,5 +2,6 @@
|
||||
torch==2.6.0+cu118
|
||||
torchaudio==2.6.0+cu118
|
||||
transformers==4.46.3
|
||||
chatterbox-tts==0.1.2
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
@@ -1,5 +1,6 @@
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts==0.1.2
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.6.0+rocm6.1
|
||||
torchaudio==2.6.0+rocm6.1
|
||||
transformers==4.46.3
|
||||
chatterbox-tts==0.1.2
|
||||
transformers
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
transformers==4.46.3
|
||||
chatterbox-tts==0.1.2
|
||||
transformers
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
|
||||
6
backend/python/chatterbox/requirements-l4t.txt
Normal file
6
backend/python/chatterbox/requirements-l4t.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
7
backend/python/kokoro/requirements-l4t.txt
Normal file
7
backend/python/kokoro/requirements-l4t.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
accelerate
|
||||
kokoro
|
||||
soundfile
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf==6.32.0
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.74.0
|
||||
grpcio==1.75.1
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.5.3"
|
||||
"version": "v3.5.4"
|
||||
}
|
||||
|
||||
48
gallery/granite4.yaml
Normal file
48
gallery/granite4.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
---
|
||||
name: "granite-3.2"
|
||||
|
||||
config_file: |
|
||||
backend: "llama-cpp"
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|start_of_role|>{{ .RoleName }}<|end_of_role|>
|
||||
{{ if .FunctionCall -}}
|
||||
<tool_call>
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
<tool_response>
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if eq .RoleName "tool" -}}
|
||||
</tool_response>
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
</tool_call>
|
||||
{{ end -}}
|
||||
<|end_of_text|>
|
||||
function: |
|
||||
<|start_of_role|>system<|end_of_role|>
|
||||
You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
|
||||
|
||||
Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
{{.Input -}}
|
||||
<|start_of_role|>assistant<|end_of_role|>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|start_of_role|>assistant<|end_of_role|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- '<|im_end|>'
|
||||
- '<dummy32000>'
|
||||
- '</s>'
|
||||
- '<|end_of_text|>'
|
||||
@@ -1,4 +1,68 @@
|
||||
---
|
||||
- &granite4
|
||||
url: "github:mudler/LocalAI/gallery/granite4.yaml@master"
|
||||
name: "ibm-granite_granite-4.0-h-small"
|
||||
license: apache-2.0
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/639bcaa2445b133a4e942436/CEW-OjXkRkDNmTxSu8Egh.png
|
||||
tags:
|
||||
- gguf
|
||||
- GPU
|
||||
- CPU
|
||||
- text-to-text
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-4.0-h-small
|
||||
- https://huggingface.co/bartowski/ibm-granite_granite-4.0-h-small-GGUF
|
||||
description: |
|
||||
Granite-4.0-H-Small is a 32B parameter long-context instruct model finetuned from Granite-4.0-H-Small-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved instruction following (IF) and tool-calling capabilities, making them more effective in enterprise applications.
|
||||
overrides:
|
||||
parameters:
|
||||
model: ibm-granite_granite-4.0-h-small-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ibm-granite_granite-4.0-h-small-Q4_K_M.gguf
|
||||
sha256: c59ce76239bd5794acdbdf88616dfc296247f4e78792a9678d4b3e24966ead69
|
||||
uri: huggingface://bartowski/ibm-granite_granite-4.0-h-small-GGUF/ibm-granite_granite-4.0-h-small-Q4_K_M.gguf
|
||||
- !!merge <<: *granite4
|
||||
name: "ibm-granite_granite-4.0-h-tiny"
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-4.0-h-tiny
|
||||
- https://huggingface.co/bartowski/ibm-granite_granite-4.0-h-tiny-GGUF
|
||||
description: |
|
||||
Granite-4.0-H-Tiny is a 7B parameter long-context instruct model finetuned from Granite-4.0-H-Tiny-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved instruction following (IF) and tool-calling capabilities, making them more effective in enterprise applications.
|
||||
overrides:
|
||||
parameters:
|
||||
model: ibm-granite_granite-4.0-h-tiny-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ibm-granite_granite-4.0-h-tiny-Q4_K_M.gguf
|
||||
sha256: 33a689fe7f35b14ebab3ae599b65aaa3ed8548c393373b1b0eebee36c653146f
|
||||
uri: huggingface://bartowski/ibm-granite_granite-4.0-h-tiny-GGUF/ibm-granite_granite-4.0-h-tiny-Q4_K_M.gguf
|
||||
- !!merge <<: *granite4
|
||||
name: "ibm-granite_granite-4.0-h-micro"
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-4.0-h-micro
|
||||
- https://huggingface.co/bartowski/ibm-granite_granite-4.0-h-micro-GGUF
|
||||
description: |
|
||||
Granite-4.0-H-Micro is a 3B parameter long-context instruct model finetuned from Granite-4.0-H-Micro-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved instruction following (IF) and tool-calling capabilities, making them more effective in enterprise applications.
|
||||
overrides:
|
||||
parameters:
|
||||
model: ibm-granite_granite-4.0-h-micro-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ibm-granite_granite-4.0-h-micro-Q4_K_M.gguf
|
||||
sha256: 48376d61449687a56b3811a418d92cc0e8e77b4d96ec13eb6c9d9503968c9f20
|
||||
uri: huggingface://bartowski/ibm-granite_granite-4.0-h-micro-GGUF/ibm-granite_granite-4.0-h-micro-Q4_K_M.gguf
|
||||
- !!merge <<: *granite4
|
||||
name: "ibm-granite_granite-4.0-micro"
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-4.0-micro
|
||||
- https://huggingface.co/bartowski/ibm-granite_granite-4.0-micro-GGUF
|
||||
description: |
|
||||
Granite-4.0-Micro is a 3B parameter long-context instruct model finetuned from Granite-4.0-Micro-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved instruction following (IF) and tool-calling capabilities, making them more effective in enterprise applications.
|
||||
overrides:
|
||||
parameters:
|
||||
model: ibm-granite_granite-4.0-micro-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ibm-granite_granite-4.0-micro-Q4_K_M.gguf
|
||||
sha256: bd9d7b4795b9dc44e3e81aeae93bb5d8e6b891b7e823be5bf9910ed3ac060baf
|
||||
uri: huggingface://bartowski/ibm-granite_granite-4.0-micro-GGUF/ibm-granite_granite-4.0-micro-Q4_K_M.gguf
|
||||
- &ernie
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "baidu_ernie-4.5-21b-a3b-thinking"
|
||||
@@ -335,7 +399,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/qwen-image.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Qwen/Qwen-Image-Edit
|
||||
icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_logo.png
|
||||
icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- qwen-image
|
||||
@@ -350,6 +414,26 @@
|
||||
cuda: true
|
||||
pipeline_type: QwenImageEditPipeline
|
||||
enable_parameters: num_inference_steps,image
|
||||
- !!merge <<: *qwenimage
|
||||
name: "qwen-image-edit-2509"
|
||||
url: "github:mudler/LocalAI/gallery/qwen-image.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Qwen/Qwen-Image-Edit-2509
|
||||
icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- qwen-image
|
||||
- gpu
|
||||
- image-to-image
|
||||
description: |
|
||||
Qwen-Image-Edit is a model for image editing, which is based on Qwen-Image.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen/Qwen-Image-Edit-2509
|
||||
diffusers:
|
||||
cuda: true
|
||||
pipeline_type: QwenImageEditPipeline
|
||||
enable_parameters: num_inference_steps,image
|
||||
- &gptoss
|
||||
name: "gpt-oss-20b"
|
||||
url: "github:mudler/LocalAI/gallery/harmony.yaml@master"
|
||||
@@ -20390,9 +20474,9 @@
|
||||
- https://huggingface.co/ggerganov/whisper.cpp
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
model: ggml-base.bin
|
||||
files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
- filename: "ggml-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
||||
description: |
|
||||
@@ -20437,11 +20521,20 @@
|
||||
name: "whisper-large-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-q5_0.bin
|
||||
model: ggml-large-v3-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-large-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-q5_0.bin"
|
||||
sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2
|
||||
- filename: "ggml-large-v3-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-q5_0.bin"
|
||||
sha256: d75795ecff3f83b5faa89d1900604ad8c780abd5739fae406de19f23ecd98ad1
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-medium"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-medium.bin
|
||||
files:
|
||||
- filename: "ggml-medium.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-medium.bin"
|
||||
sha256: 6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-medium-q5_0"
|
||||
overrides:
|
||||
@@ -20469,15 +20562,6 @@
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.bin"
|
||||
sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-tdrz"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-small.en-tdrz.bin
|
||||
files:
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://akashmjn/tinydiarize-whisper.cpp/ggml-small.en-tdrz.bin"
|
||||
sha256: ceac3ec06d1d98ef71aec665283564631055fd6129b79d8e1be4f9cc33cc54b4
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-q5_1"
|
||||
overrides:
|
||||
@@ -20550,6 +20634,51 @@
|
||||
- filename: "ggml-tiny.en-q8_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q8_0.bin"
|
||||
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-v3.bin
|
||||
files:
|
||||
- filename: "ggml-large-v3.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3.bin"
|
||||
sha256: 64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-v3-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-large-v3-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-q5_0.bin"
|
||||
sha256: d75795ecff3f83b5faa89d1900604ad8c780abd5739fae406de19f23ecd98ad1
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-turbo"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-v3-turbo.bin
|
||||
files:
|
||||
- filename: "ggml-large-v3-turbo.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo.bin"
|
||||
sha256: 1fc70f774d38eb169993ac391eea357ef47c88757ef72ee5943879b7e8e2bc69
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-turbo-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-v3-turbo-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-large-v3-turbo-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo-q5_0.bin"
|
||||
sha256: 394221709cd5ad1f40c46e6031ca61bce88931e6e088c188294c6d5a55ffa7e2
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-turbo-q8_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-large-v3-turbo-q8_0.bin
|
||||
files:
|
||||
- filename: "ggml-large-v3-turbo-q8_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo-q8_0.bin"
|
||||
sha256: 317eb69c11673c9de1e1f0d459b253999804ec71ac4c23c17ecf5fbe24e259a1
|
||||
## Bert embeddings (llama3.2 drop-in)
|
||||
- !!merge <<: *llama32
|
||||
name: "bert-embeddings"
|
||||
|
||||
@@ -95,6 +95,7 @@ var knownModelsNameSuffixToSkip []string = []string{
|
||||
".DS_Store",
|
||||
".",
|
||||
".safetensors",
|
||||
".bin",
|
||||
".partial",
|
||||
".tar.gz",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user