feat: split whisper from main binary (#5863)

* feat: split whisper from main binary

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Cleanup makefile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backend builds (missing only darwin)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper backend to test runs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make sure we have runtime libs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Less grpc on the main Dockerfile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix hipblas build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Re-enable CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt auto-bumper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-07-20 22:52:45 +02:00
committed by GitHub
parent fab41c29dd
commit b1fc5acd4a
12 changed files with 405 additions and 239 deletions

View File

@@ -745,6 +745,103 @@ jobs:
backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.go"
context: "./"
# whisper
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-cpu-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-vulkan-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-whisper'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-whisper'
base-image: "rocm/dev-ubuntu-22.04:6.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
llama-cpp-darwin:
runs-on: macOS-14
strategy:

View File

@@ -16,7 +16,7 @@ jobs:
- repository: "ggml-org/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
file: "Makefile"
file: "backend/go/whisper/Makefile"
- repository: "PABannier/bark.cpp"
variable: "BARKCPP_VERSION"
branch: "main"

View File

@@ -103,7 +103,7 @@ jobs:
make -C backend/python/transformers
make backends/llama-cpp backends/piper backends/stablediffusion-ggml
make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
env:
CUDA_VERSION: 12-4
- name: Test
@@ -168,7 +168,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Test
run: |
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22

View File

@@ -181,57 +181,12 @@ FROM ${INTEL_BASE_IMAGE} AS intel
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
###################################
###################################
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
WORKDIR /build
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
build-essential curl libssl-dev \
git && \
intel-oneapi-runtime-libs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
mkdir -p /build/grpc/cmake/build && \
cd /build/grpc/cmake/build && \
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
make && \
make install && \
rm -rf /build
###################################
###################################
@@ -258,9 +213,7 @@ RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
WORKDIR /build
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
# here so that we can generate the grpc code for the stablediffusion build
# We need protoc installed, and the version in 22.04 is too old.
RUN <<EOT bash
if [ "amd64" = "$TARGETARCH" ]; then
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
@@ -282,8 +235,6 @@ FROM builder-base AS builder-backends
ARG TARGETARCH
ARG TARGETVARIANT
COPY --from=grpc /opt/grpc /usr/local
WORKDIR /build
COPY ./Makefile .
@@ -299,7 +250,6 @@ COPY ./pkg/langchain ./pkg/langchain
RUN ls -l ./
RUN make backend-assets
RUN make prepare
RUN make grpcs
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
@@ -323,8 +273,6 @@ RUN make build
FROM builder-base AS devcontainer
COPY --from=grpc /opt/grpc /usr/local
COPY .devcontainer-scripts /.devcontainer-scripts
RUN apt-get update && \

198
Makefile
View File

@@ -3,33 +3,12 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
DETECT_LIBS?=true
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
# ONEAPI variables for SYCL
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
ONEAPI_VERSION=2025.1
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export BACKEND_LIBS?=
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=
NATIVE?=false
@@ -70,13 +49,6 @@ E2E_BRIDGE_IP?=172.17.0.1
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
@@ -95,114 +67,9 @@ ifeq ($(OS),Darwin)
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
ifeq ($(BUILD_TYPE),)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
GO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS += -framework Accelerate
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DGGML_OPENMP=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
else
CGO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
endif
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
export GGML_OPENBLAS=1
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
export GGML_CUDA=1
CMAKE_ARGS+=-DGGML_CUDA=ON
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
CMAKE_ARGS+=-DGGML_SYCL=ON
WHISPER_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
export CC=icx
export CXX=icpx
CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
WHISPER_CMAKE_ARGS+=-DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
export STABLE_BUILD_TYPE=
export GGML_HIP=1
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1
endif
ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast
export GGML_OPENBLAS=1
endif
# glibc-static or glibc-devel-static required
ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
@@ -221,7 +88,7 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
.PHONY: all test build vendor get-sources prepare-sources prepare
.PHONY: all test build vendor
all: help
@@ -239,50 +106,18 @@ else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## whisper
sources/whisper.cpp:
mkdir -p sources/whisper.cpp
cd sources/whisper.cpp && \
git init && \
git remote add origin $(WHISPER_REPO) && \
git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
cd sources/whisper.cpp/build && cmake --build . --config Release
get-sources: sources/whisper.cpp
replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
prepare-sources: get-sources replace
$(GOCMD) mod download
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
clean: ## Remove build related file
$(GOCMD) clean -cache
rm -f prepare
rm -rf ./sources
rm -rf $(BINARY_NAME)
rm -rf release/
rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) dropreplace
$(MAKE) protogen-clean
rmdir pkg/grpc/proto || true
@@ -301,7 +136,7 @@ install-go-tools:
go install github.com/GeertJohan/go.rice/rice@latest
## Build:
build: prepare backend-assets grpcs install-go-tools ## Build the project
build: backend-assets grpcs install-go-tools ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -338,7 +173,7 @@ osx-signed: build
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
## Run
run: prepare ## run local-ai
run: ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
test-models/testmodel.ggml:
@@ -359,7 +194,7 @@ prepare-test: grpcs
########################################################
## Test targets
test: prepare test-models/testmodel.ggml grpcs
test: test-models/testmodel.ggml grpcs
@echo 'Running tests'
export GO_TAGS="debug"
$(MAKE) prepare-test
@@ -378,6 +213,9 @@ backends/piper: docker-build-piper docker-save-piper build-api
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backends/whisper: docker-build-whisper docker-save-whisper build-api
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
########################################################
## AIO tests
########################################################
@@ -611,28 +449,20 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/huggingface
endif
backend-assets/grpc/silero-vad: protogen-go replace backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/whisper: protogen-go replace sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif
backend-assets/grpc/local-store: backend-assets/grpc protogen-go replace
backend-assets/grpc/local-store: backend-assets/grpc protogen-go
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/local-store
endif
grpcs: prepare protogen-go $(GRPC_BACKENDS)
grpcs: protogen-go $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
@@ -696,7 +526,7 @@ backend-images:
mkdir -p backend-images
docker-build-llama-cpp:
docker build -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
docker-build-bark-cpp:
docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
@@ -734,6 +564,12 @@ docker-build-diffusers:
docker-build-kokoro:
docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
docker-build-whisper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper .
docker-save-whisper: backend-images
docker save local-ai-backend:whisper -o backend-images/whisper.tar
docker-build-faster-whisper:
docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .

View File

@@ -96,6 +96,17 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
ldconfig \
; fi
# Intel oneAPI requirements
RUN <<EOT bash
if [[ "${BUILD_TYPE}" == sycl* ]] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
intel-oneapi-runtime-libs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin

131
backend/go/whisper/Makefile Normal file
View File

@@ -0,0 +1,131 @@
GOCMD=go
NATIVE?=false
BUILD_TYPE?=
CMAKE_ARGS?=
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
CUDA_LIBPATH?=/usr/local/cuda/lib64/
ONEAPI_VERSION?=2025.1
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
CMAKE_ARGS+=-DGGML_CUDA=ON
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export STABLE_BUILD_TYPE=
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
# AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib -L$(CURRENT_MAKEFILE_DIR)/sources/whisper.cpp/build/ggml/src/ggml-hip/ -lggml-hip
# CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
else ifeq ($(OS),Darwin)
ifeq ($(BUILD_TYPE),)
BUILD_TYPE=metal
endif
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
CGO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
CMAKE_ARGS+=-DGGML_OPENMP=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
CGO_LDFLAGS += -framework Accelerate
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
TARGET+=--target ggml-metal
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export CC=icx
export CXX=icpx
CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DCMAKE_CXX_FLAGS="-fsycl"
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
endif
ifneq ($(OS),Darwin)
CGO_LDFLAGS_WHISPER+=-lgomp
endif
## whisper
sources/whisper.cpp:
mkdir -p sources/whisper.cpp
cd sources/whisper.cpp && \
git init && \
git remote add origin $(WHISPER_REPO) && \
git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && cmake $(CMAKE_ARGS) $(WHISPER_CMAKE_ARGS) . -B ./build
cd sources/whisper.cpp/build && cmake --build . --config Release
whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o whisper ./
package:
bash package.sh
build: whisper package

52
backend/go/whisper/package.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
# This script is used in the final stage of the Dockerfile
set -e
CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avrf $CURDIR/whisper $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
# x86_64 architecture
echo "Detected x86_64 architecture, copying x86_64 libraries..."
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
# ARM64 architecture
echo "Detected ARM64 architecture, copying ARM64 libraries..."
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
else
echo "Error: Could not detect architecture"
exit 1
fi
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/

14
backend/go/whisper/run.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -ex
CURDIR=$(dirname "$(realpath $0)")
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
# If there is a lib/ld.so, use it
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
exec $CURDIR/lib/ld.so $CURDIR/whisper "$@"
fi
exec $CURDIR/whisper "$@"

View File

@@ -26,6 +26,28 @@
vulkan: "vulkan-llama-cpp"
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
darwin-x86: "darwin-x86-llama-cpp"
- &whispercpp
name: "whisper"
alias: "whisper"
license: mit
icon: https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg
description: |
Port of OpenAI's Whisper model in C/C++
urls:
- https://github.com/ggml-org/whisper.cpp
tags:
- audio-transcription
- CPU
- GPU
- CUDA
- HIP
capabilities:
default: "cpu-whisper"
nvidia: "cuda12-whisper"
intel: "intel-sycl-f16-whisper"
amd: "rocm-whisper"
vulkan: "vulkan-whisper"
nvidia-l4t: "nvidia-l4t-arm64-whisper"
- &stablediffusionggml
name: "stablediffusion-ggml"
alias: "stablediffusion-ggml"
@@ -326,7 +348,62 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp"
- !!merge <<: *llamacpp
name: "intel-sycl-f16-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
## whisper
- !!merge <<: *whisper
name: "nvidia-l4t-arm64-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper"
- !!merge <<: *whisper
name: "nvidia-l4t-arm64-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
- !!merge <<: *whisper
name: "cpu-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
- !!merge <<: *whisper
name: "cpu-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
- !!merge <<: *whisper
name: "cuda11-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
- !!merge <<: *whisper
name: "cuda12-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
- !!merge <<: *whisper
name: "rocm-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisper"
- !!merge <<: *whisper
name: "intel-sycl-f32-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-whisper"
- !!merge <<: *whisper
name: "intel-sycl-f16-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-whisper"
- !!merge <<: *whisper
name: "vulkan-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-whisper"
- !!merge <<: *whisper
name: "vulkan-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-whisper"
- !!merge <<: *whisper
name: "metal-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper"
- !!merge <<: *whisper
name: "metal-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
- !!merge <<: *whisper
name: "cuda11-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
- !!merge <<: *whisper
name: "cuda12-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
- !!merge <<: *whisper
name: "rocm-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisper"
- !!merge <<: *whisper
name: "intel-sycl-f32-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-whisper"
- !!merge <<: *whisper
name: "intel-sycl-f16-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
## stablediffusion-ggml
- !!merge <<: *stablediffusionggml
name: "cpu-stablediffusion-ggml"