feat(qwen-asr): add support to qwen-asr (#8281)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-01-29 21:50:35 +01:00
committed by GitHub
parent dd8e74a486
commit 1e08e02598
22 changed files with 651 additions and 2 deletions

View File

@@ -105,6 +105,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "9"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-qwen-asr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "9"
@@ -366,6 +379,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-qwen-asr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -470,6 +496,19 @@ jobs:
backend: "vibevoice"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-qwen-asr'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -732,6 +771,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-qwen-asr'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
skip-drivers: 'false'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
@@ -889,6 +941,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-qwen-asr'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
@@ -968,6 +1033,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'intel'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-qwen-asr'
runs-on: 'arc-runner-set'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'intel'
cuda-major-version: ""
cuda-minor-version: ""
@@ -1380,6 +1458,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-qwen-asr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""

View File

@@ -304,6 +304,25 @@ jobs:
run: |
make --jobs=5 --output-sync=target -C backend/python/qwen-tts
make --jobs=5 --output-sync=target -C backend/python/qwen-tts test
tests-qwen-asr:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential ffmpeg sox
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test qwen-asr
run: |
make --jobs=5 --output-sync=target -C backend/python/qwen-asr
make --jobs=5 --output-sync=target -C backend/python/qwen-asr test
tests-voxcpm:
runs-on: ubuntu-latest
steps:

View File

@@ -1,5 +1,5 @@
# Disable parallel execution for backend builds
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/voxcpm
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm
GOCMD=go
GOTEST=$(GOCMD) test
@@ -319,6 +319,7 @@ prepare-test-extra: protogen-python
$(MAKE) -C backend/python/moonshine
$(MAKE) -C backend/python/pocket-tts
$(MAKE) -C backend/python/qwen-tts
$(MAKE) -C backend/python/qwen-asr
$(MAKE) -C backend/python/voxcpm
test-extra: prepare-test-extra
@@ -331,6 +332,7 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/moonshine test
$(MAKE) -C backend/python/pocket-tts test
$(MAKE) -C backend/python/qwen-tts test
$(MAKE) -C backend/python/qwen-asr test
$(MAKE) -C backend/python/voxcpm test
DOCKER_IMAGE?=local-ai
@@ -464,6 +466,7 @@ BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true
BACKEND_MOONSHINE = moonshine|python|.|false|true
BACKEND_POCKET_TTS = pocket-tts|python|.|false|true
BACKEND_QWEN_TTS = qwen-tts|python|.|false|true
BACKEND_QWEN_ASR = qwen-asr|python|.|false|true
BACKEND_VOXCPM = voxcpm|python|.|false|true
# Helper function to build docker image for a backend
@@ -510,13 +513,14 @@ $(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
$(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE)))
$(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR)))
$(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM)))
# Pattern rule for docker-save targets
docker-save-%: backend-images
docker save local-ai-backend:$* -o backend-images/$*.tar
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-voxcpm
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm
########################################################
### END Backends

View File

@@ -414,6 +414,28 @@
nvidia-l4t-cuda-12: "nvidia-l4t-qwen-tts"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen-tts"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
- &qwen-asr
urls:
- https://github.com/QwenLM/Qwen3-ASR
description: |
Qwen3-ASR is an automatic speech recognition model supporting multiple languages and batch inference.
tags:
- speech-recognition
- ASR
license: apache-2.0
name: "qwen-asr"
alias: "qwen-asr"
capabilities:
nvidia: "cuda12-qwen-asr"
intel: "intel-qwen-asr"
amd: "rocm-qwen-asr"
nvidia-l4t: "nvidia-l4t-qwen-asr"
default: "cpu-qwen-asr"
nvidia-cuda-13: "cuda13-qwen-asr"
nvidia-cuda-12: "cuda12-qwen-asr"
nvidia-l4t-cuda-12: "nvidia-l4t-qwen-asr"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen-asr"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
- &voxcpm
urls:
- https://github.com/ModelBest/VoxCPM
@@ -1671,6 +1693,89 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts
## qwen-asr
- !!merge <<: *qwen-asr
name: "qwen-asr-development"
capabilities:
nvidia: "cuda12-qwen-asr-development"
intel: "intel-qwen-asr-development"
amd: "rocm-qwen-asr-development"
nvidia-l4t: "nvidia-l4t-qwen-asr-development"
default: "cpu-qwen-asr-development"
nvidia-cuda-13: "cuda13-qwen-asr-development"
nvidia-cuda-12: "cuda12-qwen-asr-development"
nvidia-l4t-cuda-12: "nvidia-l4t-qwen-asr-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen-asr-development"
- !!merge <<: *qwen-asr
name: "cpu-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-qwen-asr"
mirrors:
- localai/localai-backends:latest-cpu-qwen-asr
- !!merge <<: *qwen-asr
name: "cpu-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-qwen-asr"
mirrors:
- localai/localai-backends:master-cpu-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda12-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-qwen-asr"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda12-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-qwen-asr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda13-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-qwen-asr"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda13-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-qwen-asr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-qwen-asr
- !!merge <<: *qwen-asr
name: "intel-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-qwen-asr"
mirrors:
- localai/localai-backends:latest-gpu-intel-qwen-asr
- !!merge <<: *qwen-asr
name: "intel-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-qwen-asr"
mirrors:
- localai/localai-backends:master-gpu-intel-qwen-asr
- !!merge <<: *qwen-asr
name: "rocm-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-qwen-asr"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-qwen-asr
- !!merge <<: *qwen-asr
name: "rocm-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-qwen-asr"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-qwen-asr
- !!merge <<: *qwen-asr
name: "nvidia-l4t-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-qwen-asr"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-qwen-asr
- !!merge <<: *qwen-asr
name: "nvidia-l4t-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-qwen-asr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda13-nvidia-l4t-arm64-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-qwen-asr"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-qwen-asr
- !!merge <<: *qwen-asr
name: "cuda13-nvidia-l4t-arm64-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr
## voxcpm
- !!merge <<: *voxcpm
name: "voxcpm-development"

View File

@@ -0,0 +1,25 @@
.DEFAULT_GOAL := install
.PHONY: qwen-asr
qwen-asr:
bash install.sh
.PHONY: run
run: qwen-asr
@echo "Running qwen-asr..."
bash run.sh
@echo "qwen-asr run."
.PHONY: test
test: qwen-asr
@echo "Testing qwen-asr..."
bash test.sh
@echo "qwen-asr tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -0,0 +1,212 @@
#!/usr/bin/env python3
"""
gRPC server of LocalAI for Qwen3-ASR (transformers backend, non-vLLM).
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import torch
from qwen_asr import Qwen3ASRModel
import grpc
def is_float(s):
try:
float(s)
return True
except ValueError:
return False
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")
self.device = device
self.options = {}
for opt in request.Options:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
if is_float(value):
value = float(value)
elif is_int(value):
value = int(value)
elif value.lower() in ["true", "false"]:
value = value.lower() == "true"
self.options[key] = value
model_path = request.Model or "Qwen/Qwen3-ASR-1.7B"
default_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
load_dtype = default_dtype
if "torch_dtype" in self.options:
d = str(self.options["torch_dtype"]).lower()
if d == "fp16":
load_dtype = torch.float16
elif d == "bf16":
load_dtype = torch.bfloat16
elif d == "fp32":
load_dtype = torch.float32
del self.options["torch_dtype"]
self.max_inference_batch_size = self.options.get("max_inference_batch_size", 32)
self.max_new_tokens = self.options.get("max_new_tokens", 256)
forced_aligner = self.options.get("forced_aligner")
if forced_aligner is not None and isinstance(forced_aligner, str):
forced_aligner = forced_aligner.strip() or None
attn_implementation = self.options.get("attn_implementation")
if attn_implementation is not None and isinstance(attn_implementation, str):
attn_implementation = attn_implementation.strip() or None
if self.device == "mps":
device_map = None
elif self.device == "cuda":
device_map = "cuda:0"
else:
device_map = "cpu"
load_kwargs = dict(
dtype=load_dtype,
device_map=device_map,
max_inference_batch_size=self.max_inference_batch_size,
max_new_tokens=self.max_new_tokens,
)
if attn_implementation:
load_kwargs["attn_implementation"] = attn_implementation
if forced_aligner:
load_kwargs["forced_aligner"] = forced_aligner
forced_aligner_kwargs = dict(
dtype=load_dtype,
device_map=device_map,
)
if attn_implementation:
forced_aligner_kwargs["attn_implementation"] = attn_implementation
load_kwargs["forced_aligner_kwargs"] = forced_aligner_kwargs
try:
print(f"Loading Qwen3-ASR from {model_path}", file=sys.stderr)
if attn_implementation:
print(f"Using attn_implementation: {attn_implementation}", file=sys.stderr)
if forced_aligner:
print(f"Loading with forced_aligner: {forced_aligner}", file=sys.stderr)
self.model = Qwen3ASRModel.from_pretrained(model_path, **load_kwargs)
print("Qwen3-ASR model loaded successfully", file=sys.stderr)
except Exception as err:
print(f"[ERROR] LoadModel failed: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.Result(success=False, message=str(err))
return backend_pb2.Result(message="Model loaded successfully", success=True)
def AudioTranscription(self, request, context):
result_segments = []
text = ""
try:
audio_path = request.dst
if not audio_path or not os.path.exists(audio_path):
print(f"Error: Audio file not found: {audio_path}", file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")
language = None
if request.language and request.language.strip():
language = request.language.strip()
results = self.model.transcribe(audio=audio_path, language=language)
if not results:
return backend_pb2.TranscriptResult(segments=[], text="")
r = results[0]
text = r.text or ""
if getattr(r, 'time_stamps', None) and len(r.time_stamps) > 0:
for idx, ts in enumerate(r.time_stamps):
start_ms = 0
end_ms = 0
seg_text = text
if isinstance(ts, (list, tuple)) and len(ts) >= 3:
start_ms = int(float(ts[0]) * 1000) if ts[0] is not None else 0
end_ms = int(float(ts[1]) * 1000) if ts[1] is not None else 0
seg_text = ts[2] if len(ts) > 2 and ts[2] is not None else ""
result_segments.append(backend_pb2.TranscriptSegment(
id=idx, start=start_ms, end=end_ms, text=seg_text
))
else:
if text:
result_segments.append(backend_pb2.TranscriptSegment(
id=0, start=0, end=0, text=text
))
except Exception as err:
print(f"Error in AudioTranscription: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")
return backend_pb2.TranscriptResult(segments=result_segments, text=text)
def serve(address):
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024),
('grpc.max_send_message_length', 50 * 1024 * 1024),
('grpc.max_receive_message_length', 50 * 1024 * 1024),
])
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.")
args = parser.parse_args()
serve(args.addr)

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -e
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"
installRequirements

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch
qwen-asr

View File

@@ -0,0 +1 @@
flash-attn

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu121
torch
qwen-asr

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
qwen-asr

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.7.1+rocm6.3
qwen-asr

View File

@@ -0,0 +1 @@
flash-attn

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/xpu
torch
qwen-asr

View File

@@ -0,0 +1,3 @@
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
torch
qwen-asr

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
qwen-asr

View File

@@ -0,0 +1,2 @@
torch==2.7.1
qwen-asr

View File

@@ -0,0 +1,5 @@
grpcio==1.71.0
protobuf
certifi
packaging==24.1
setuptools

View File

@@ -0,0 +1,9 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -0,0 +1,94 @@
"""
Tests for the Qwen3-ASR gRPC backend.
"""
import unittest
import subprocess
import time
import os
import tempfile
import shutil
import backend_pb2
import backend_pb2_grpc
import grpc
# Skip heavy transcription test in CI (model download + inference)
SKIP_ASR_TESTS = os.environ.get("SKIP_ASR_TESTS", "false").lower() == "true"
class TestBackendServicer(unittest.TestCase):
def setUp(self):
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(15)
def tearDown(self):
self.service.terminate()
self.service.wait()
def test_server_startup(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Qwen/Qwen3-ASR-1.7B"))
self.assertTrue(response.success, response.message)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
@unittest.skipIf(SKIP_ASR_TESTS, "ASR transcription test skipped (SKIP_ASR_TESTS=true)")
def test_audio_transcription(self):
temp_dir = tempfile.mkdtemp()
audio_file = os.path.join(temp_dir, 'audio.wav')
try:
url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav"
result = subprocess.run(
["wget", "-q", url, "-O", audio_file],
capture_output=True,
text=True,
timeout=30,
)
if result.returncode != 0:
self.skipTest(f"Could not download sample audio: {result.stderr}")
if not os.path.exists(audio_file):
self.skipTest("Sample audio file not found after download")
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="Qwen/Qwen3-ASR-0.6B"))
self.assertTrue(load_response.success, load_response.message)
transcript_response = stub.AudioTranscription(
backend_pb2.TranscriptRequest(dst=audio_file)
)
self.assertIsNotNone(transcript_response)
self.assertIsNotNone(transcript_response.text)
self.assertGreaterEqual(len(transcript_response.segments), 0)
all_text = ""
for segment in transcript_response.segments:
all_text += segment.text
print(f"All text: {all_text}")
self.assertIn("big", all_text)
if transcript_response.segments:
self.assertIsNotNone(transcript_response.segments[0].text)
finally:
self.tearDown()
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)

View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests

View File

@@ -94,6 +94,34 @@
voice: Aiden # Available speakers: Vivian, Serena, Uncle_Fu, Dylan, Eric, Ryan, Aiden, Ono_Anna, Sohee
parameters:
model: Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
- &qwen-asr
urls:
- https://huggingface.co/Qwen/Qwen3-ASR-1.7B
description: |
Qwen3-ASR is an automatic speech recognition model supporting multiple languages and batch inference.
tags:
- speech-recognition
- ASR
license: apache-2.0
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
name: "qwen3-asr-1.7b"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
overrides:
backend: qwen-asr
known_usecases:
- transcript
parameters:
model: Qwen/Qwen3-ASR-1.7B
- !!merge <<: *qwen-asr
urls:
- https://huggingface.co/Qwen/Qwen3-ASR-0.6B
name: "qwen3-asr-0.6b"
overrides:
backend: qwen-asr
known_usecases:
- transcript
parameters:
model: Qwen/Qwen3-ASR-0.6B
- name: "huihui-glm-4.7-flash-abliterated-i1"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls: