From b1fc5acd4a7a3d234ca95f62b653350d1c9441e4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 20 Jul 2025 22:52:45 +0200
Subject: [PATCH] feat: split whisper from main binary (#5863)

* feat: split whisper from main binary

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Cleanup makefile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backend builds (missing only darwin)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper backend to test runs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make sure we have runtime libs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Less grpc on the main Dockerfile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix hipblas build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Re-enable CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt auto-bumper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/backend.yml                 |  97 +++++++++
 .github/workflows/bump_deps.yaml              |   2 +-
 .github/workflows/test.yml                    |   4 +-
 Dockerfile                                    |  56 +----
 Makefile                                      | 198 ++----------------
 backend/Dockerfile.go                         |  11 +
 backend/go/whisper/Makefile                   | 131 ++++++++++++
 backend/go/{transcribe => }/whisper/main.go   |   0
 backend/go/whisper/package.sh                 |  52 +++++
 backend/go/whisper/run.sh                     |  14 ++
 .../go/{transcribe => }/whisper/whisper.go    |   0
 backend/index.yaml                            |  79 ++++++-
 12 files changed, 405 insertions(+), 239 deletions(-)
 create mode 100644 backend/go/whisper/Makefile
 rename backend/go/{transcribe => }/whisper/main.go (100%)
 create mode 100755 backend/go/whisper/package.sh
 create mode 100755 backend/go/whisper/run.sh
 rename backend/go/{transcribe => }/whisper/whisper.go (100%)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 66cdabf06..cf7536e81 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -745,6 +745,103 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.go"
             context: "./"
+          # whisper
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-cpu-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-vulkan-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'true'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-whisper'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-hipblas-whisper'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            runs-on: 'ubuntu-latest'
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
   llama-cpp-darwin:
     runs-on: macOS-14
     strategy:
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index d26627b0f..f15d62f7f 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -16,7 +16,7 @@ jobs:
           - repository: "ggml-org/whisper.cpp"
             variable: "WHISPER_CPP_VERSION"
             branch: "master"
-            file: "Makefile"
+            file: "backend/go/whisper/Makefile"
           - repository: "PABannier/bark.cpp"
             variable: "BARKCPP_VERSION"
             branch: "main"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e2ac48697..cc6ef333d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -103,7 +103,7 @@ jobs:
 
           make -C backend/python/transformers
 
-          make backends/llama-cpp backends/piper backends/stablediffusion-ggml
+          make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
         env:
           CUDA_VERSION: 12-4
       - name: Test
@@ -168,7 +168,7 @@ jobs:
           PATH="$PATH:$HOME/go/bin" make protogen-go
       - name: Test
         run: |
-            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
         uses: mxschmitt/action-tmate@v3.22
diff --git a/Dockerfile b/Dockerfile
index 653cf384c..91e8aea5a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -181,57 +181,12 @@ FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
 RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
-
-###################################
-###################################
-
-# The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
-# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
-FROM ${GRPC_BASE_IMAGE} AS grpc
-
-# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
-ARG GRPC_MAKEFLAGS="-j4 -Otarget"
-ARG GRPC_VERSION=v1.65.0
-ARG CMAKE_FROM_SOURCE=false
-ARG CMAKE_VERSION=3.26.4
-
-ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
-
-WORKDIR /build
-
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        ca-certificates \
-        build-essential curl libssl-dev \
-        git && \
+        intel-oneapi-runtime-libs && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
-# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
-# and running make install in the target container
-RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    mkdir -p /build/grpc/cmake/build && \
-    cd /build/grpc/cmake/build && \
-    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
-    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
-    make && \
-    make install && \
-    rm -rf /build
-
 ###################################
 ###################################
 
@@ -258,9 +213,7 @@ RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 WORKDIR /build
 
 
-# We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
-# but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
-# here so that we can generate the grpc code for the stablediffusion build
+# We need protoc installed, and the version in 22.04 is too old.
 RUN <<EOT bash
     if [ "amd64" = "$TARGETARCH" ]; then
         curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
@@ -282,8 +235,6 @@ FROM builder-base AS builder-backends
 ARG TARGETARCH
 ARG TARGETVARIANT
 
-COPY --from=grpc /opt/grpc /usr/local
-
 WORKDIR /build
 
 COPY ./Makefile .
@@ -299,7 +250,6 @@ COPY ./pkg/langchain ./pkg/langchain
 
 RUN ls -l ./
 RUN make backend-assets
-RUN make prepare
 RUN make grpcs
 
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
@@ -323,8 +273,6 @@ RUN make build
 
 FROM builder-base AS devcontainer
 
-COPY --from=grpc /opt/grpc /usr/local
-
 COPY .devcontainer-scripts /.devcontainer-scripts
 
 RUN apt-get update && \
diff --git a/Makefile b/Makefile
index 1fd523b46..6f2be04d7 100644
--- a/Makefile
+++ b/Makefile
@@ -3,33 +3,12 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-DETECT_LIBS?=true
-
-# whisper.cpp version
-WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
-
-# ONEAPI variables for SYCL
-export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
-ONEAPI_VERSION=2025.1
-
 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64
 ONNX_OS?=linux
 
 export BUILD_TYPE?=
-export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
-export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
-export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
-export BACKEND_LIBS?=
-export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
-export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
-export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
 
-CGO_LDFLAGS?=
-CGO_LDFLAGS_WHISPER?=
-CGO_LDFLAGS_WHISPER+=-lggml
-CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
 NATIVE?=false
@@ -70,13 +49,6 @@ E2E_BRIDGE_IP?=172.17.0.1
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
-
-# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-	WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-
 # Detect if we are running on arm64
 ifneq (,$(findstring aarch64,$(shell uname -m)))
 	ONNX_ARCH=aarch64
@@ -95,114 +67,9 @@ ifeq ($(OS),Darwin)
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
-
-	# on OSX, if BUILD_TYPE is blank, we should default to use Metal
-	ifeq ($(BUILD_TYPE),)
-		BUILD_TYPE=metal
-	# disable metal if on Darwin and any other value is explicitly passed.
-	else ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DGGML_METAL=OFF
-		WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
-		export GGML_NO_ACCELERATE=1
-		export GGML_NO_METAL=1
-		GO_LDFLAGS_WHISPER+=-lggml-blas
-		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
-	endif
-
-	ifeq ($(BUILD_TYPE),metal)
-		CGO_LDFLAGS += -framework Accelerate
-		CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
-		CMAKE_ARGS+=-DGGML_METAL=ON
-		CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
-		CMAKE_ARGS+=-DGGML_OPENMP=OFF
-		WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
-		WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
-		WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
-		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
-		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
-		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
-		WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
-		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
-	else
-		CGO_LDFLAGS_WHISPER+=-lggml-blas
-		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
-	endif
-else
-CGO_LDFLAGS_WHISPER+=-lgomp
-endif
-
-ifeq ($(BUILD_TYPE),openblas)
-	CGO_LDFLAGS+=-lopenblas
-	export GGML_OPENBLAS=1
-endif
-
-ifeq ($(BUILD_TYPE),cublas)
-	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
-	export GGML_CUDA=1
-	CMAKE_ARGS+=-DGGML_CUDA=ON
-	WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
-	CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
-endif
-
-ifeq ($(BUILD_TYPE),vulkan)
-	CMAKE_ARGS+=-DGGML_VULKAN=1
-	WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
-	CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
-endif
-
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	export GGML_SYCL=1
-	CMAKE_ARGS+=-DGGML_SYCL=ON
-	WHISPER_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-	export CC=icx
-	export CXX=icpx
-	CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
-	CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
-	CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
-	CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f16)
-	export GGML_SYCL_F16=1
-	CMAKE_ARGS+=-DGGML_SYCL_F16=ON
-	WHISPER_CMAKE_ARGS+=-DGGML_SYCL_F16=ON
-endif
-
-ifeq ($(BUILD_TYPE),hipblas)
-	ROCM_HOME ?= /opt/rocm
-	ROCM_PATH ?= /opt/rocm
-	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
-	export CXX=$(ROCM_HOME)/llvm/bin/clang++
-	export CC=$(ROCM_HOME)/llvm/bin/clang
-	export STABLE_BUILD_TYPE=
-	export GGML_HIP=1
-	GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
-	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
-	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
-endif
-
-ifeq ($(BUILD_TYPE),metal)
-	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-	export GGML_METAL=1
-endif
-
-ifeq ($(BUILD_TYPE),clblas)
-	CGO_LDFLAGS+=-lOpenCL -lclblast
-	export GGML_OPENBLAS=1
-endif
-
-# glibc-static or glibc-devel-static required
-ifeq ($(STATIC),true)
-	LD_FLAGS+=-linkmode external -extldflags -static
 endif
 
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
 ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
@@ -221,7 +88,7 @@ ifeq ($(BUILD_API_ONLY),true)
 	GRPC_BACKENDS=
 endif
 
-.PHONY: all test build vendor get-sources prepare-sources prepare
+.PHONY: all test build vendor
 
 all: help
 
@@ -239,50 +106,18 @@ else
 	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
 endif
 
-## whisper
-sources/whisper.cpp:
-	mkdir -p sources/whisper.cpp
-	cd sources/whisper.cpp && \
-	git init && \
-	git remote add origin $(WHISPER_REPO) && \
-	git fetch origin && \
-	git checkout $(WHISPER_CPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
-	cd sources/whisper.cpp/build && cmake --build . --config Release
-
-get-sources: sources/whisper.cpp
-
-replace:
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-
-dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
-
-prepare-sources: get-sources replace
-	$(GOCMD) mod download
-
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
-	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) build
 
-prepare: prepare-sources $(OPTIONAL_TARGETS)
-
 clean: ## Remove build related file
 	$(GOCMD) clean -cache
 	rm -f prepare
-	rm -rf ./sources
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
 	rm -rf backend-assets/*
 	$(MAKE) -C backend/cpp/grpc clean
-	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
 	rmdir pkg/grpc/proto || true
 
@@ -301,7 +136,7 @@ install-go-tools:
 	go install github.com/GeertJohan/go.rice/rice@latest
 
 ## Build:
-build: prepare backend-assets grpcs install-go-tools ## Build the project
+build: backend-assets grpcs install-go-tools ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -338,7 +173,7 @@ osx-signed: build
 	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
 
 ## Run
-run: prepare ## run local-ai
+run: ## run local-ai
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
 
 test-models/testmodel.ggml:
@@ -359,7 +194,7 @@ prepare-test: grpcs
 ########################################################
 
 ## Test targets
-test: prepare test-models/testmodel.ggml grpcs
+test: test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
 	export GO_TAGS="debug"
 	$(MAKE) prepare-test
@@ -378,6 +213,9 @@ backends/piper: docker-build-piper docker-save-piper build-api
 backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
 	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
 
+backends/whisper: docker-build-whisper docker-save-whisper build-api
+	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
+
 ########################################################
 ## AIO tests
 ########################################################
@@ -611,28 +449,20 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/huggingface
 endif
 
-backend-assets/grpc/silero-vad: protogen-go replace backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
+backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
 ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/silero-vad
 endif
 
-backend-assets/grpc/whisper: protogen-go replace sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
-	CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif
-
-backend-assets/grpc/local-store: backend-assets/grpc protogen-go replace
+backend-assets/grpc/local-store: backend-assets/grpc protogen-go
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
 ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/local-store
 endif
 
-grpcs: prepare protogen-go $(GRPC_BACKENDS)
+grpcs: protogen-go $(GRPC_BACKENDS)
 
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
@@ -696,7 +526,7 @@ backend-images:
 	mkdir -p backend-images
 
 docker-build-llama-cpp:
-	docker build -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
 
 docker-build-bark-cpp:
 	docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
@@ -734,6 +564,12 @@ docker-build-diffusers:
 docker-build-kokoro:
 	docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
 
+docker-build-whisper:
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper  .
+
+docker-save-whisper: backend-images
+	docker save local-ai-backend:whisper -o backend-images/whisper.tar
+
 docker-build-faster-whisper:
 	docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
 
diff --git a/backend/Dockerfile.go b/backend/Dockerfile.go
index dbfee61e2..99906008e 100644
--- a/backend/Dockerfile.go
+++ b/backend/Dockerfile.go
@@ -96,6 +96,17 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
         ldconfig \
     ; fi
 
+# Intel oneAPI requirements
+RUN <<EOT bash
+    if [[ "${BUILD_TYPE}" == sycl* ]] && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            intel-oneapi-runtime-libs && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin
diff --git a/backend/go/whisper/Makefile b/backend/go/whisper/Makefile
new file mode 100644
index 000000000..76a4fabb9
--- /dev/null
+++ b/backend/go/whisper/Makefile
@@ -0,0 +1,131 @@
+GOCMD=go
+NATIVE?=false
+
+BUILD_TYPE?=
+CMAKE_ARGS?=
+
+# whisper.cpp version
+WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
+WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
+
+export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
+export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
+export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
+export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
+
+CGO_LDFLAGS_WHISPER?=
+CGO_LDFLAGS_WHISPER+=-lggml
+CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
+CUDA_LIBPATH?=/usr/local/cuda/lib64/
+
+ONEAPI_VERSION?=2025.1
+
+# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+	WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
+ifeq ($(BUILD_TYPE),cublas)
+	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+	CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
+	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
+# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# to CMAKE_ARGS automatically
+else ifeq ($(BUILD_TYPE),openblas)
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+else ifeq ($(BUILD_TYPE),clblas)
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
+else ifeq ($(BUILD_TYPE),hipblas)
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
+	export STABLE_BUILD_TYPE=
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+#	GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
+#	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIP=ON
+	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib -L$(CURRENT_MAKEFILE_DIR)/sources/whisper.cpp/build/ggml/src/ggml-hip/ -lggml-hip
+#	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+else ifeq ($(BUILD_TYPE),vulkan)
+	CMAKE_ARGS+=-DGGML_VULKAN=1
+	CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
+	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
+else ifeq ($(OS),Darwin)
+	ifeq ($(BUILD_TYPE),)
+		BUILD_TYPE=metal
+	endif
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+		CGO_LDFLAGS_WHISPER+=-lggml-blas
+		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+		CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
+		CMAKE_ARGS+=-DGGML_OPENMP=OFF
+		CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
+		CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
+		CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
+		CGO_LDFLAGS += -framework Accelerate
+		CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
+		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
+	endif
+	TARGET+=--target ggml-metal
+endif
+
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+	export CC=icx
+	export CXX=icpx
+	CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
+	CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
+	CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
+	CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
+	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx \
+		-DCMAKE_CXX_FLAGS="-fsycl"
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f16)
+	CMAKE_ARGS+=-DGGML_SYCL_F16=ON
+endif
+
+ifneq ($(OS),Darwin)
+	CGO_LDFLAGS_WHISPER+=-lgomp
+endif
+
+## whisper
+sources/whisper.cpp:
+	mkdir -p sources/whisper.cpp
+	cd sources/whisper.cpp && \
+	git init && \
+	git remote add origin $(WHISPER_REPO) && \
+	git fetch origin && \
+	git checkout $(WHISPER_CPP_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
+	cd sources/whisper.cpp && cmake $(CMAKE_ARGS) $(WHISPER_CMAKE_ARGS) . -B ./build
+	cd sources/whisper.cpp/build && cmake --build . --config Release
+
+whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go	
+	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
+	CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o whisper ./
+
+package:
+	bash package.sh
+
+build: whisper package
\ No newline at end of file
diff --git a/backend/go/transcribe/whisper/main.go b/backend/go/whisper/main.go
similarity index 100%
rename from backend/go/transcribe/whisper/main.go
rename to backend/go/whisper/main.go
diff --git a/backend/go/whisper/package.sh b/backend/go/whisper/package.sh
new file mode 100755
index 000000000..3bda9695c
--- /dev/null
+++ b/backend/go/whisper/package.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avrf $CURDIR/whisper $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    # x86_64 architecture
+    echo "Detected x86_64 architecture, copying x86_64 libraries..."
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    # ARM64 architecture
+    echo "Detected ARM64 architecture, copying ARM64 libraries..."
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+else
+    echo "Error: Could not detect architecture"
+    exit 1
+fi
+
+echo "Packaging completed successfully" 
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
\ No newline at end of file
diff --git a/backend/go/whisper/run.sh b/backend/go/whisper/run.sh
new file mode 100755
index 000000000..2c23f321a
--- /dev/null
+++ b/backend/go/whisper/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+	echo "Using lib/ld.so"
+	exec $CURDIR/lib/ld.so $CURDIR/whisper "$@"
+fi
+
+exec $CURDIR/whisper "$@"
\ No newline at end of file
diff --git a/backend/go/transcribe/whisper/whisper.go b/backend/go/whisper/whisper.go
similarity index 100%
rename from backend/go/transcribe/whisper/whisper.go
rename to backend/go/whisper/whisper.go
diff --git a/backend/index.yaml b/backend/index.yaml
index e155ea9f9..e0d4d1784 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -26,6 +26,28 @@
     vulkan: "vulkan-llama-cpp"
     nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
     darwin-x86: "darwin-x86-llama-cpp"
+- &whispercpp
+  name: "whisper"
+  alias: "whisper"
+  license: mit
+  icon: https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg
+  description: |
+    Port of OpenAI's Whisper model in C/C++
+  urls:
+    - https://github.com/ggml-org/whisper.cpp
+  tags:
+    - audio-transcription
+    - CPU
+    - GPU
+    - CUDA
+    - HIP
+  capabilities:
+    default: "cpu-whisper"
+    nvidia: "cuda12-whisper"
+    intel: "intel-sycl-f16-whisper"
+    amd: "rocm-whisper"
+    vulkan: "vulkan-whisper"
+    nvidia-l4t: "nvidia-l4t-arm64-whisper"
 - &stablediffusionggml
   name: "stablediffusion-ggml"
   alias: "stablediffusion-ggml"
@@ -326,7 +348,62 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp"
 - !!merge <<: *llamacpp
   name: "intel-sycl-f16-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
+## whisper
+- !!merge <<: *whisper
+  name: "nvidia-l4t-arm64-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper"
+- !!merge <<: *whisper
+  name: "nvidia-l4t-arm64-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
+- !!merge <<: *whisper
+  name: "cpu-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
+- !!merge <<: *whisper
+  name: "cpu-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
+- !!merge <<: *whisper
+  name: "cuda11-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
+- !!merge <<: *whisper
+  name: "cuda12-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
+- !!merge <<: *whisper
+  name: "rocm-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisper"
+- !!merge <<: *whisper
+  name: "intel-sycl-f32-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-whisper"
+- !!merge <<: *whisper
+  name: "intel-sycl-f16-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-whisper"
+- !!merge <<: *whisper
+  name: "vulkan-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-whisper"
+- !!merge <<: *whisper
+  name: "vulkan-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-whisper"
+- !!merge <<: *whisper
+  name: "metal-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper"
+- !!merge <<: *whisper
+  name: "metal-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
+- !!merge <<: *whisper
+  name: "cuda11-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
+- !!merge <<: *whisper
+  name: "cuda12-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
+- !!merge <<: *whisper
+  name: "rocm-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisper"
+- !!merge <<: *whisper
+  name: "intel-sycl-f32-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-whisper"
+- !!merge <<: *whisper
+  name: "intel-sycl-f16-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
 ## stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "cpu-stablediffusion-ggml"