add libx11

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
add libxcb
2026-02-03 11:13:31 -05:00 · 2025-09-28 20:30:31 +02:00 · 2025-09-28 18:59:12 +02:00 · 2025-09-28 18:34:35 +02:00 · 2025-09-28 18:24:56 +02:00 · 2025-09-28 18:20:11 +02:00
25 changed files with 867 additions and 1199 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -60,7 +60,7 @@ jobs:
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-core'
            runs-on: 'ubuntu-latest'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -101,7 +101,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
            aio: "-aio-gpu-nvidia-cuda-12"
          - build-type: 'vulkan'
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan'
            runs-on: 'ubuntu-latest'
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -18,7 +18,7 @@ jobs:
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.22.8
+        uses: securego/gosec@v2.22.9
        with:
          # we let the report trigger content trigger a failure using the GitHub Security features.
          args: '-no-fail -fmt sarif -out results.sarif ./...'
--- a/40
+++ b/40
@@ -32,15 +32,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        echo "vulkan" > /run/localai/capability
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

@@ -78,6 +90,16 @@ RUN <<EOT bash
    fi
 EOT

+# https://github.com/NVIDIA/Isaac-GR00T/issues/343
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
+        wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
+        dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
+        cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
+        apt-get update && apt-get -y install cudss
+    fi
+EOT
+
 # If we are building with clblas support, we need the libraries for the builds
 RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        apt-get update && \
--- a/3
+++ b/3
@@ -429,6 +429,9 @@ docker-build-kitten-tts:
 docker-save-kitten-tts: backend-images
 	docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar

+docker-save-chatterbox: backend-images
+	docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
+
 docker-build-kokoro:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend

--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -37,14 +37,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -85,14 +85,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils libxcb1 libx11-6 && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && vulkaninfo \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -45,14 +45,27 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+            software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
+            echo "vulkan" > /run/localai/capability && \
+        if [ "amd64" = "$TARGETARCH" ]; then
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+            wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+            apt-get update && \
+            apt-get install -y \
+                vulkan-sdk && \
+            apt-get clean && \
+            rm -rf /var/lib/apt/lists/*
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            # For ARM64, we need to build the Vulkan SDK manually as there are no packages available
+            mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
+            tar -xvf vulkan-sdk.tar.xz && \
+            rm vulkan-sdk.tar.xz && \
+            cd * && \
+            cp -rfv aarch64/* /usr/ && \
+            cd ../.. && \
+            rm -rf vulkan
+        fi
    fi
 EOT

--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=f432d8d83e7407073634c5e4fd81a3d23a10827f
+LLAMA_VERSION?=4807e8f96a61b2adccebd5e57444c94d18de7264
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
@@ -14,7 +14,7 @@ CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF

 CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF -DLLAMA_OPENSSL=OFF
 endif
 # If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
 ifeq ($(BUILD_TYPE),cublas)
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -231,6 +231,7 @@ static void params_parse(const backend::ModelOptions* request,
    params.cpuparams.n_threads = request->threads();
    params.n_gpu_layers = request->ngpulayers();
    params.n_batch = request->nbatch();
+    params.n_ubatch = request->nbatch(); // fixes issue with reranking models being limited to 512 tokens (the default n_ubatch size); allows for setting the maximum input amount of tokens thereby avoiding this error "input is too large to process. increase the physical batch size"
    // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
    //params.n_parallel = 1;
    const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
@@ -801,11 +802,6 @@ public:
            return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array");
        }

-        // Tokenize the query
-        auto tokenized_query = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, request->query(), /* add_special */ false, true);
-        if (tokenized_query.size() != 1) {
-            return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"query\" must contain only a single prompt");
-        }
        // Create and queue the task
        json responses = json::array();
        bool error = false;
@@ -817,10 +813,9 @@ public:
                documents.push_back(request->documents(i));
            }
            
-            auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, documents, /* add_special */ false, true);
-            tasks.reserve(tokenized_docs.size());
-            for (size_t i = 0; i < tokenized_docs.size(); i++) {
-                auto tmp = format_rerank(ctx_server.vocab, tokenized_query[0], tokenized_docs[i]);
+            tasks.reserve(documents.size());
+            for (size_t i = 0; i < documents.size(); i++) {
+                auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
                server_task task = server_task(SERVER_TASK_TYPE_RERANK);
                task.id = ctx_server.queue_tasks.get_new_id();
                task.index = i;
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -270,6 +270,7 @@
    nvidia: "cuda12-kokoro"
    intel: "intel-kokoro"
    amd: "rocm-kokoro"
+    nvidia-l4t: "nvidia-l4t-kokoro"
 - &coqui
  urls:
    - https://github.com/idiap/coqui-ai-TTS
@@ -352,6 +353,7 @@
    nvidia: "cuda12-chatterbox"
    metal: "metal-chatterbox"
    default: "cpu-chatterbox"
+    nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
 - &piper
  name: "piper"
  uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
@@ -1049,6 +1051,7 @@
    nvidia: "cuda12-kokoro-development"
    intel: "intel-kokoro-development"
    amd: "rocm-kokoro-development"
+    nvidia-l4t: "nvidia-l4t-kokoro-development"
 - !!merge <<: *kokoro
  name: "cuda11-kokoro-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
@@ -1074,6 +1077,16 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-kokoro"
  mirrors:
    - localai/localai-backends:master-gpu-intel-kokoro
+- !!merge <<: *kokoro
+  name: "nvidia-l4t-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-kokoro"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-l4t-kokoro
+- !!merge <<: *kokoro
+  name: "nvidia-l4t-kokoro-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-kokoro"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-l4t-kokoro
 - !!merge <<: *kokoro
  name: "cuda11-kokoro"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
@@ -1227,6 +1240,7 @@
    nvidia: "cuda12-chatterbox-development"
    metal: "metal-chatterbox-development"
    default: "cpu-chatterbox-development"
+    nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
 - !!merge <<: *chatterbox
  name: "cpu-chatterbox"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
@@ -1237,6 +1251,16 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
  mirrors:
    - localai/localai-backends:master-cpu-chatterbox
+- !!merge <<: *chatterbox
+  name: "nvidia-l4t-arm64-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox
+- !!merge <<: *chatterbox
+  name: "nvidia-l4t-arm64-chatterbox-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox
 - !!merge <<: *chatterbox
  name: "metal-chatterbox"
  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
--- a/backend/python/chatterbox/backend.py
+++ b/backend/python/chatterbox/backend.py
@@ -14,9 +14,23 @@ import backend_pb2_grpc
 import torch
 import torchaudio as ta
 from chatterbox.tts import ChatterboxTTS
-
+from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import grpc

+def is_float(s):
+    """Check if a string can be converted to float."""
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+def is_int(s):
+    """Check if a string can be converted to int."""
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24

@@ -47,6 +61,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        if not torch.cuda.is_available() and request.CUDA:
            return backend_pb2.Result(success=False, message="CUDA is not available")

+
+        options = request.Options
+
+        # empty dict
+        self.options = {}
+
+        # The options are a list of strings in this form optname:optvalue
+        # We are storing all the options in a dict so we can use it later when
+        # generating the images
+        for opt in options:
+            if ":" not in opt:
+                continue
+            key, value = opt.split(":")
+            # if value is a number, convert it to the appropriate type
+            if is_float(value):
+                value = float(value)
+            elif is_int(value):
+                value = int(value)
+            elif value.lower() in ["true", "false"]:
+                value = value.lower() == "true"
+            self.options[key] = value
+
        self.AudioPath = None

        if os.path.isabs(request.AudioPath):
@@ -56,10 +92,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            modelFileBase = os.path.dirname(request.ModelFile)
            # modify LoraAdapter to be relative to modelFileBase
            self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
-
        try:
            print("Preparing models, please wait", file=sys.stderr)
-            self.model = ChatterboxTTS.from_pretrained(device=device)
+            if "multilingual" in self.options:
+                # remove key from options
+                del self.options["multilingual"]
+                self.model = ChatterboxMultilingualTTS.from_pretrained(device=device)
+            else:
+                self.model = ChatterboxTTS.from_pretrained(device=device)
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
@@ -68,12 +108,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

    def TTS(self, request, context):
        try:
-            # Generate audio using ChatterboxTTS
+            kwargs = {}
+
+            if "language" in self.options:
+                kwargs["language_id"] = self.options["language"]
            if self.AudioPath is not None:
-                wav = self.model.generate(request.text, audio_prompt_path=self.AudioPath)
-            else:
-                wav = self.model.generate(request.text)
-            
+                kwargs["audio_prompt_path"] = self.AudioPath
+
+            # add options to kwargs
+            kwargs.update(self.options)
+
+            # Generate audio using ChatterboxTTS
+            wav = self.model.generate(request.text, **kwargs)
            # Save the generated audio
            ta.save(request.dst, wav, self.model.sr)
            
--- a/backend/python/chatterbox/install.sh
+++ b/backend/python/chatterbox/install.sh
@@ -15,5 +15,6 @@ fi
 if [ "x${BUILD_PROFILE}" == "xintel" ]; then
    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
+EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"

 installRequirements
--- a/backend/python/chatterbox/requirements-cpu.txt
+++ b/backend/python/chatterbox/requirements-cpu.txt
@@ -1,6 +1,8 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate
-torch==2.6.0
-torchaudio==2.6.0
-transformers==4.46.3
-chatterbox-tts==0.1.2
+torch
+torchaudio
+transformers
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
+#chatterbox-tts==0.1.4
--- a/backend/python/chatterbox/requirements-cublas11.txt
+++ b/backend/python/chatterbox/requirements-cublas11.txt
@@ -2,5 +2,6 @@
 torch==2.6.0+cu118
 torchaudio==2.6.0+cu118
 transformers==4.46.3
-chatterbox-tts==0.1.2
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-cublas12.txt
+++ b/backend/python/chatterbox/requirements-cublas12.txt
@@ -1,5 +1,6 @@
-torch==2.6.0
-torchaudio==2.6.0
-transformers==4.46.3
-chatterbox-tts==0.1.2
+torch
+torchaudio
+transformers
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.6.0+rocm6.1
 torchaudio==2.6.0+rocm6.1
-transformers==4.46.3
-chatterbox-tts==0.1.2
+transformers
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-intel.txt
+++ b/backend/python/chatterbox/requirements-intel.txt
@@ -2,8 +2,9 @@
 intel-extension-for-pytorch==2.3.110+xpu
 torch==2.3.1+cxx11.abi
 torchaudio==2.3.1+cxx11.abi
-transformers==4.46.3
-chatterbox-tts==0.1.2
+transformers
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
 oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
--- a/backend/python/chatterbox/requirements-l4t.txt
+++ b/backend/python/chatterbox/requirements-l4t.txt
@@ -0,0 +1,6 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
+torch
+torchaudio
+transformers
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
+accelerate
--- a/backend/python/kokoro/requirements-l4t.txt
+++ b/backend/python/kokoro/requirements-l4t.txt
@@ -0,0 +1,7 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
+torch
+torchaudio
+transformers
+accelerate
+kokoro
+soundfile
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.74.0
+grpcio==1.75.0
 protobuf==6.32.0
 certifi
 setuptools
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v3.5.3"
+  "version": "v3.5.4"
 }
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -335,7 +335,7 @@
  url: "github:mudler/LocalAI/gallery/qwen-image.yaml@master"
  urls:
    - https://huggingface.co/Qwen/Qwen-Image-Edit
-  icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_logo.png
+  icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png
  license: apache-2.0
  tags:
    - qwen-image
@@ -350,6 +350,26 @@
      cuda: true
      pipeline_type: QwenImageEditPipeline
      enable_parameters: num_inference_steps,image
+- !!merge <<: *qwenimage
+  name: "qwen-image-edit-2509"
+  url: "github:mudler/LocalAI/gallery/qwen-image.yaml@master"
+  urls:
+    - https://huggingface.co/Qwen/Qwen-Image-Edit-2509
+  icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png
+  license: apache-2.0
+  tags:
+    - qwen-image
+    - gpu
+    - image-to-image
+  description: |
+    Qwen-Image-Edit is a model for image editing, which is based on Qwen-Image.
+  overrides:
+    parameters:
+      model: Qwen/Qwen-Image-Edit-2509
+    diffusers:
+      cuda: true
+      pipeline_type: QwenImageEditPipeline
+      enable_parameters: num_inference_steps,image
 - &gptoss
  name: "gpt-oss-20b"
  url: "github:mudler/LocalAI/gallery/harmony.yaml@master"
@@ -20390,9 +20410,9 @@
    - https://huggingface.co/ggerganov/whisper.cpp
  overrides:
    parameters:
-      model: ggml-whisper-base.bin
+      model: ggml-base.bin
  files:
-    - filename: "ggml-whisper-base.bin"
+    - filename: "ggml-base.bin"
      sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
      uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
  description: |
@@ -20437,11 +20457,20 @@
  name: "whisper-large-q5_0"
  overrides:
    parameters:
-      model: ggml-large-q5_0.bin
+      model: ggml-large-v3-q5_0.bin
  files:
-    - filename: "ggml-large-q5_0.bin"
-      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-q5_0.bin"
-      sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2
+    - filename: "ggml-large-v3-q5_0.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-q5_0.bin"
+      sha256: d75795ecff3f83b5faa89d1900604ad8c780abd5739fae406de19f23ecd98ad1
+- !!merge <<: *whisper
+  name: "whisper-medium"
+  overrides:
+    parameters:
+      model: ggml-medium.bin
+  files:
+    - filename: "ggml-medium.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-medium.bin"
+      sha256: 6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208
 - !!merge <<: *whisper
  name: "whisper-medium-q5_0"
  overrides:
@@ -20469,15 +20498,6 @@
    - filename: "ggml-small.bin"
      uri: "huggingface://ggerganov/whisper.cpp/ggml-small.bin"
      sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
- !!merge <<: *whisper
-  name: "whisper-small-en-tdrz"
-  overrides:
-    parameters:
-      model: ggml-small.en-tdrz.bin
-  files:
-    - filename: "ggml-small.bin"
-      uri: "huggingface://akashmjn/tinydiarize-whisper.cpp/ggml-small.en-tdrz.bin"
-      sha256: ceac3ec06d1d98ef71aec665283564631055fd6129b79d8e1be4f9cc33cc54b4
 - !!merge <<: *whisper
  name: "whisper-small-en-q5_1"
  overrides:
@@ -20550,6 +20570,51 @@
    - filename: "ggml-tiny.en-q8_0.bin"
      uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q8_0.bin"
      sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
+- !!merge <<: *whisper
+  name: "whisper-large"
+  overrides:
+    parameters:
+      model: ggml-large-v3.bin
+  files:
+    - filename: "ggml-large-v3.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3.bin"
+      sha256: 64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2
+- !!merge <<: *whisper
+  name: "whisper-large-q5_0"
+  overrides:
+    parameters:
+      model: ggml-large-v3-q5_0.bin
+  files:
+    - filename: "ggml-large-v3-q5_0.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-q5_0.bin"
+      sha256: d75795ecff3f83b5faa89d1900604ad8c780abd5739fae406de19f23ecd98ad1
+- !!merge <<: *whisper
+  name: "whisper-large-turbo"
+  overrides:
+    parameters:
+      model: ggml-large-v3-turbo.bin
+  files:
+    - filename: "ggml-large-v3-turbo.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo.bin"
+      sha256: 1fc70f774d38eb169993ac391eea357ef47c88757ef72ee5943879b7e8e2bc69
+- !!merge <<: *whisper
+  name: "whisper-large-turbo-q5_0"
+  overrides:
+    parameters:
+      model: ggml-large-v3-turbo-q5_0.bin
+  files:
+    - filename: "ggml-large-v3-turbo-q5_0.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo-q5_0.bin"
+      sha256: 394221709cd5ad1f40c46e6031ca61bce88931e6e088c188294c6d5a55ffa7e2
+- !!merge <<: *whisper
+  name: "whisper-large-turbo-q8_0"
+  overrides:
+    parameters:
+      model: ggml-large-v3-turbo-q8_0.bin
+  files:
+    - filename: "ggml-large-v3-turbo-q8_0.bin"
+      uri: "huggingface://ggerganov/whisper.cpp/ggml-large-v3-turbo-q8_0.bin"
+      sha256: 317eb69c11673c9de1e1f0d459b253999804ec71ac4c23c17ecf5fbe24e259a1
 ## Bert embeddings (llama3.2 drop-in)
 - !!merge <<: *llama32
  name: "bert-embeddings"
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -95,6 +95,7 @@ var knownModelsNameSuffixToSkip []string = []string{
 	".DS_Store",
 	".",
 	".safetensors",
+	".bin",
 	".partial",
 	".tar.gz",
 }
Author	SHA1	Message	Date
Ettore Di Giacinto	2aed98d14b	add libx11 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 20:30:31 +02:00
Ettore Di Giacinto	b3a1b3d63f	add libxcb Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:59:12 +02:00
Ettore Di Giacinto	e25dd2fe26	debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:34:35 +02:00
Ettore Di Giacinto	002f75ac79	ci(tests): drop me. Test vulkan build Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:24:56 +02:00
Ettore Di Giacinto	1ce8f35834	Consume custom vulkan build for arm64 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 18:20:11 +02:00
Ettore Di Giacinto	be8a314496	Vulkansdk needs sudo Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00
Ettore Di Giacinto	406d62d6aa	build vulkan manually on arm64 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00
Ettore Di Giacinto	9e9f953eec	chore(vulkan): enable arm64 image builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-28 10:01:30 +02:00
LocalAI [bot]	84ebf2a2c9	chore: ⬆️ Update ggml-org/llama.cpp to `4807e8f96a61b2adccebd5e57444c94d18de7264` (#6350 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-28 00:33:46 +02:00
Ettore Di Giacinto	ce5662ba90	chore(deps): bump llama.cpp to '72b24d96c6888c609d562779a23787304ae4609c' (#6349 ) * chore(deps): bump llama.cpp to '72b24d96c6888c609d562779a23787304ae4609c' Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable OPENSSL (just introduced upstream) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-27 13:55:51 +02:00
Ettore Di Giacinto	9878f27813	chore(deps): bump llama.cpp to '835b2b915c52bcabcd688d025eacff9a07b65f52' (#6347 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-26 23:26:14 +02:00
jongames	f2b9452ec4	fix: reranking models limited to 512 tokens in llama.cpp backend (#6344 ) Fix reranking models being limited to 512 tokens input in llama.cpp backend Signed-off-by: JonGames <18472148+jongames@users.noreply.github.com>	2025-09-25 23:32:07 +00:00
Ettore Di Giacinto	585da99c52	chore(models): add whisper-turbo via whisper.cpp (#6340 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-25 09:15:06 +02:00
Ettore Di Giacinto	fd4f432079	CI: disable build-testing on PRs against arm64 (#6341 ) CI: disable testing on PRs against arm64 Removed configuration for cublas and arm64 platform. Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-09-25 09:14:50 +02:00
LocalAI [bot]	238c68c57b	chore: ⬆️ Update ggml-org/llama.cpp to `4ae88d07d026e66b41e85afece74e88af54f4e66` (#6339 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-25 08:47:02 +02:00
Ettore Di Giacinto	04fbf5cb82	Change build type and update tag suffix in backend.yml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-09-24 22:08:29 +02:00
Ettore Di Giacinto	c85d559919	feat(chatterbox): support multilingual (#6240 ) * feat(chatterbox): support multilingual Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add l4t support Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: switch to fork Until https://github.com/resemble-ai/chatterbox/pull/295 is merged Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-24 18:37:37 +02:00
Ettore Di Giacinto	b5efc4f89e	chore(cudss): add cudds to l4t images (#6338 ) * chore(cudds): add cudds to l4t images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add arm64 to CI tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-24 16:46:24 +02:00
Ettore Di Giacinto	3f9c09a4c5	chore(model gallery): add qwen-image-edit-2509 (#6336 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-24 10:05:03 +02:00
dependabot[bot]	4a84660475	chore(deps): bump securego/gosec from 2.22.8 to 2.22.9 (#6324 ) Bumps [securego/gosec](https://github.com/securego/gosec) from 2.22.8 to 2.22.9. - [Release notes](https://github.com/securego/gosec/releases) - [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml) - [Commits](https://github.com/securego/gosec/compare/v2.22.8...v2.22.9) --- updated-dependencies: - dependency-name: securego/gosec dependency-version: 2.22.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-09-23 08:26:50 +02:00
LocalAI [bot]	737248256e	chore: ⬆️ Update ggml-org/llama.cpp to `1d0125bcf1cbd7195ad0faf826a20bc7cec7d3f4` (#6335 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-22 21:13:34 +00:00
dependabot[bot]	0ae334fc62	chore(deps): bump grpcio from 1.74.0 to 1.75.0 in /backend/python/transformers (#6332 ) chore(deps): bump grpcio in /backend/python/transformers Bumps [grpcio](https://github.com/grpc/grpc) from 1.74.0 to 1.75.0. - [Release notes](https://github.com/grpc/grpc/releases) - [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md) - [Commits](https://github.com/grpc/grpc/compare/v1.74.0...v1.75.0) --- updated-dependencies: - dependency-name: grpcio dependency-version: 1.75.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-09-22 19:53:35 +00:00
Ettore Di Giacinto	36c373b7c9	feat(kokoro): add support for l4t devices (#6322 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-09-22 10:33:26 +02:00
LocalAI [bot]	6afcb932b7	chore: ⬆️ Update ggml-org/llama.cpp to `da30ab5f8696cabb2d4620cdc0aa41a298c54fd6` (#6321 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-21 21:28:27 +00:00
LocalAI [bot]	357bf571a3	docs: ⬆️ update docs version mudler/LocalAI (#6318 ) ⬆️ Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-21 08:40:00 +02:00
LocalAI [bot]	e74ade9ebb	chore: ⬆️ Update ggml-org/llama.cpp to `7f766929ca8e8e01dcceb1c526ee584f7e5e1408` (#6319 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-09-20 21:05:25 +00:00