From b1fc5acd4a7a3d234ca95f62b653350d1c9441e4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 20 Jul 2025 22:52:45 +0200 Subject: [PATCH] feat: split whisper from main binary (#5863) * feat: split whisper from main binary Signed-off-by: Ettore Di Giacinto * Cleanup makefile Signed-off-by: Ettore Di Giacinto * Add backend builds (missing only darwin) Signed-off-by: Ettore Di Giacinto * Test CI Signed-off-by: Ettore Di Giacinto * Add whisper backend to test runs Signed-off-by: Ettore Di Giacinto * Fixups Signed-off-by: Ettore Di Giacinto * Make sure we have runtime libs Signed-off-by: Ettore Di Giacinto * Less grpc on the main Dockerfile Signed-off-by: Ettore Di Giacinto * fixups Signed-off-by: Ettore Di Giacinto * Fix hipblas build Signed-off-by: Ettore Di Giacinto * Add whisper to index Signed-off-by: Ettore Di Giacinto * Re-enable CI Signed-off-by: Ettore Di Giacinto * Adapt auto-bumper Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/backend.yml | 97 +++++++++ .github/workflows/bump_deps.yaml | 2 +- .github/workflows/test.yml | 4 +- Dockerfile | 56 +---- Makefile | 198 ++---------------- backend/Dockerfile.go | 11 + backend/go/whisper/Makefile | 131 ++++++++++++ backend/go/{transcribe => }/whisper/main.go | 0 backend/go/whisper/package.sh | 52 +++++ backend/go/whisper/run.sh | 14 ++ .../go/{transcribe => }/whisper/whisper.go | 0 backend/index.yaml | 79 ++++++- 12 files changed, 405 insertions(+), 239 deletions(-) create mode 100644 backend/go/whisper/Makefile rename backend/go/{transcribe => }/whisper/main.go (100%) create mode 100755 backend/go/whisper/package.sh create mode 100755 backend/go/whisper/run.sh rename backend/go/{transcribe => }/whisper/whisper.go (100%) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 66cdabf06..cf7536e81 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -745,6 +745,103 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" + # whisper + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-cpu-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-whisper' + runs-on: 'ubuntu-latest' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-whisper' + runs-on: 'ubuntu-latest' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-vulkan-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'true' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-whisper' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-hipblas-whisper' + base-image: "rocm/dev-ubuntu-22.04:6.1" + runs-on: 'ubuntu-latest' + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.go" + context: "./" llama-cpp-darwin: runs-on: macOS-14 strategy: diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index d26627b0f..f15d62f7f 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -16,7 +16,7 @@ jobs: - repository: "ggml-org/whisper.cpp" variable: "WHISPER_CPP_VERSION" branch: "master" - file: "Makefile" + file: "backend/go/whisper/Makefile" - repository: "PABannier/bark.cpp" variable: "BARKCPP_VERSION" branch: "main" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e2ac48697..cc6ef333d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -103,7 +103,7 @@ jobs: make -C backend/python/transformers - make backends/llama-cpp backends/piper backends/stablediffusion-ggml + make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml env: CUDA_VERSION: 12-4 - name: Test @@ -168,7 +168,7 @@ jobs: PATH="$PATH:$HOME/go/bin" make protogen-go - name: Test run: | - PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio + PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.22 diff --git a/Dockerfile b/Dockerfile index 653cf384c..91e8aea5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -181,57 +181,12 @@ FROM ${INTEL_BASE_IMAGE} AS intel RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list - -################################### -################################### - -# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI. -# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work. -FROM ${GRPC_BASE_IMAGE} AS grpc - -# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI -ARG GRPC_MAKEFLAGS="-j4 -Otarget" -ARG GRPC_VERSION=v1.65.0 -ARG CMAKE_FROM_SOURCE=false -ARG CMAKE_VERSION=3.26.4 - -ENV MAKEFLAGS=${GRPC_MAKEFLAGS} - -WORKDIR /build - RUN apt-get update && \ apt-get install -y --no-install-recommends \ - ca-certificates \ - build-essential curl libssl-dev \ - git && \ + intel-oneapi-runtime-libs && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Install CMake (the version in 22.04 is too old) -RUN <