diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index ac6766950..fb32a52b9 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -184,6 +184,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-nemo' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -536,6 +549,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-nemo' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1045,6 +1071,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-rocm-hipblas-nemo' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1372,6 +1411,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'intel' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-nemo' + runs-on: 'arc-runner-set' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1797,6 +1849,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-nemo' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -1886,6 +1951,9 @@ jobs: - backend: "qwen-asr" tag-suffix: "-metal-darwin-arm64-qwen-asr" build-type: "mps" + - backend: "nemo" + tag-suffix: "-metal-darwin-arm64-nemo" + build-type: "mps" - backend: "qwen-tts" tag-suffix: "-metal-darwin-arm64-qwen-tts" build-type: "mps" diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index a473c8904..a8c45f107 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -323,6 +323,25 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/qwen-asr make --jobs=5 --output-sync=target -C backend/python/qwen-asr test + tests-nemo: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential ffmpeg sox + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + # Install UV + curl -LsSf https://astral.sh/uv/install.sh | sh + pip install --user --no-cache-dir grpcio-tools==1.64.1 + - name: Test nemo + run: | + make --jobs=5 --output-sync=target -C backend/python/nemo + make --jobs=5 --output-sync=target -C backend/python/nemo test tests-voxcpm: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index 1c5ce3347..4f56ef202 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx backends/ace-step +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step GOCMD=go GOTEST=$(GOCMD) test @@ -318,6 +318,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/pocket-tts $(MAKE) -C backend/python/qwen-tts $(MAKE) -C backend/python/qwen-asr + $(MAKE) -C backend/python/nemo $(MAKE) -C backend/python/voxcpm $(MAKE) -C backend/python/whisperx $(MAKE) -C backend/python/ace-step @@ -334,6 +335,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/pocket-tts test $(MAKE) -C backend/python/qwen-tts test $(MAKE) -C backend/python/qwen-asr test + $(MAKE) -C backend/python/nemo test $(MAKE) -C backend/python/voxcpm test $(MAKE) -C backend/python/whisperx test $(MAKE) -C backend/python/ace-step test @@ -471,6 +473,7 @@ BACKEND_MOONSHINE = moonshine|python|.|false|true BACKEND_POCKET_TTS = pocket-tts|python|.|false|true BACKEND_QWEN_TTS = qwen-tts|python|.|false|true BACKEND_QWEN_ASR = qwen-asr|python|.|false|true +BACKEND_NEMO = nemo|python|.|false|true BACKEND_VOXCPM = voxcpm|python|.|false|true BACKEND_WHISPERX = whisperx|python|.|false|true BACKEND_ACE_STEP = ace-step|python|.|false|true @@ -521,6 +524,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE))) $(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR))) +$(eval $(call generate-docker-build-target,$(BACKEND_NEMO))) $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM))) $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX))) $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) @@ -529,7 +533,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx docker-build-ace-step +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/index.yaml b/backend/index.yaml index 39d1e0562..3b43f9b86 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -536,6 +536,27 @@ nvidia-l4t-cuda-12: "nvidia-l4t-qwen-asr" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen-asr" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png +- &nemo + urls: + - https://github.com/NVIDIA/NeMo + description: | + NVIDIA NEMO Toolkit for ASR provides state-of-the-art automatic speech recognition models including Parakeet models for various languages and use cases. + tags: + - speech-recognition + - ASR + - NVIDIA + license: apache-2.0 + name: "nemo" + alias: "nemo" + capabilities: + nvidia: "cuda12-nemo" + intel: "intel-nemo" + amd: "rocm-nemo" + metal: "metal-nemo" + default: "cpu-nemo" + nvidia-cuda-13: "cuda13-nemo" + nvidia-cuda-12: "cuda12-nemo" + icon: https://www.nvidia.com/favicon.ico - &voxcpm urls: - https://github.com/ModelBest/VoxCPM @@ -2337,6 +2358,77 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-asr" mirrors: - localai/localai-backends:master-metal-darwin-arm64-qwen-asr +## nemo +- !!merge <<: *nemo + name: "nemo-development" + capabilities: + nvidia: "cuda12-nemo-development" + intel: "intel-nemo-development" + amd: "rocm-nemo-development" + metal: "metal-nemo-development" + default: "cpu-nemo-development" + nvidia-cuda-13: "cuda13-nemo-development" + nvidia-cuda-12: "cuda12-nemo-development" +- !!merge <<: *nemo + name: "cpu-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-nemo" + mirrors: + - localai/localai-backends:latest-cpu-nemo +- !!merge <<: *nemo + name: "cpu-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-nemo" + mirrors: + - localai/localai-backends:master-cpu-nemo +- !!merge <<: *nemo + name: "cuda12-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-nemo" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-nemo +- !!merge <<: *nemo + name: "cuda12-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-nemo" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-nemo +- !!merge <<: *nemo + name: "cuda13-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-nemo" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-nemo +- !!merge <<: *nemo + name: "cuda13-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-nemo" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-nemo +- !!merge <<: *nemo + name: "intel-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-nemo" + mirrors: + - localai/localai-backends:latest-gpu-intel-nemo +- !!merge <<: *nemo + name: "intel-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-nemo" + mirrors: + - localai/localai-backends:master-gpu-intel-nemo +- !!merge <<: *nemo + name: "rocm-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-nemo" + mirrors: + - localai/localai-backends:latest-gpu-rocm-hipblas-nemo +- !!merge <<: *nemo + name: "rocm-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-nemo" + mirrors: + - localai/localai-backends:master-gpu-rocm-hipblas-nemo +- !!merge <<: *nemo + name: "metal-nemo" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-nemo" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-nemo +- !!merge <<: *nemo + name: "metal-nemo-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-nemo" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-nemo ## voxcpm - !!merge <<: *voxcpm name: "voxcpm-development" diff --git a/backend/python/nemo/Makefile b/backend/python/nemo/Makefile new file mode 100644 index 000000000..6f5372a1a --- /dev/null +++ b/backend/python/nemo/Makefile @@ -0,0 +1,23 @@ +.PHONY: nemo-asr +nemo-asr: + bash install.sh + +.PHONY: run +run: nemo-asr + @echo "Running nemo-asr..." + bash run.sh + @echo "nemo-asr run." + +.PHONY: test +test: nemo-asr + @echo "Testing nemo-asr..." + bash test.sh + @echo "nemo-asr tested." + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ diff --git a/backend/python/nemo/backend.py b/backend/python/nemo/backend.py new file mode 100644 index 000000000..fd2218f69 --- /dev/null +++ b/backend/python/nemo/backend.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +gRPC server of LocalAI for NVIDIA NEMO Toolkit ASR. +""" +from concurrent import futures +import time +import argparse +import signal +import sys +import os +import backend_pb2 +import backend_pb2_grpc +import torch +import nemo.collections.asr as nemo_asr + +import grpc + + +def is_float(s): + try: + float(s) + return True + except ValueError: + return False + + +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + + +class BackendServicer(backend_pb2_grpc.BackendServicer): + def Health(self, request, context): + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + if torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + if mps_available: + device = "mps" + if not torch.cuda.is_available() and request.CUDA: + return backend_pb2.Result(success=False, message="CUDA is not available") + + self.device = device + self.options = {} + + for opt in request.Options: + if ":" not in opt: + continue + key, value = opt.split(":", 1) + if is_float(value): + value = float(value) + elif is_int(value): + value = int(value) + elif value.lower() in ["true", "false"]: + value = value.lower() == "true" + self.options[key] = value + + model_name = request.Model or "nvidia/parakeet-tdt-0.6b-v3" + + try: + print(f"Loading NEMO ASR model from {model_name}", file=sys.stderr) + self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name) + print("NEMO ASR model loaded successfully", file=sys.stderr) + except Exception as err: + print(f"[ERROR] LoadModel failed: {err}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return backend_pb2.Result(success=False, message=str(err)) + + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def AudioTranscription(self, request, context): + result_segments = [] + text = "" + try: + audio_path = request.dst + if not audio_path or not os.path.exists(audio_path): + print(f"Error: Audio file not found: {audio_path}", file=sys.stderr) + return backend_pb2.TranscriptResult(segments=[], text="") + + # NEMO's transcribe method accepts a list of audio paths and returns a list of transcripts + results = self.model.transcribe([audio_path]) + + if not results or len(results) == 0: + return backend_pb2.TranscriptResult(segments=[], text="") + + # Get the transcript text from the first result + text = results[0] + if text: + # Create a single segment with the full transcription + result_segments.append(backend_pb2.TranscriptSegment( + id=0, start=0, end=0, text=text + )) + + except Exception as err: + print(f"Error in AudioTranscription: {err}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return backend_pb2.TranscriptResult(segments=[], text="") + + return backend_pb2.TranscriptResult(segments=result_segments, text=text) + + +def serve(address): + server = grpc.server( + futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), + options=[ + ('grpc.max_message_length', 50 * 1024 * 1024), + ('grpc.max_send_message_length', 50 * 1024 * 1024), + ('grpc.max_receive_message_length', 50 * 1024 * 1024), + ]) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.") + args = parser.parse_args() + serve(args.addr) diff --git a/backend/python/nemo/install.sh b/backend/python/nemo/install.sh new file mode 100755 index 000000000..71c9e79a9 --- /dev/null +++ b/backend/python/nemo/install.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation" + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +if [ "x${BUILD_PROFILE}" == "xintel" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" +fi + +PYTHON_VERSION="3.12" +PYTHON_PATCH="12" +PY_STANDALONE_TAG="20251120" + +installRequirements diff --git a/backend/python/nemo/protogen.sh b/backend/python/nemo/protogen.sh new file mode 100755 index 000000000..1ad37dee1 --- /dev/null +++ b/backend/python/nemo/protogen.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto diff --git a/backend/python/nemo/requirements-cpu.txt b/backend/python/nemo/requirements-cpu.txt new file mode 100644 index 000000000..9cc827fdc --- /dev/null +++ b/backend/python/nemo/requirements-cpu.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-cublas12.txt b/backend/python/nemo/requirements-cublas12.txt new file mode 100644 index 000000000..baef1ac71 --- /dev/null +++ b/backend/python/nemo/requirements-cublas12.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu128 +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-cublas13.txt b/backend/python/nemo/requirements-cublas13.txt new file mode 100644 index 000000000..50c18d53e --- /dev/null +++ b/backend/python/nemo/requirements-cublas13.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-hipblas.txt b/backend/python/nemo/requirements-hipblas.txt new file mode 100644 index 000000000..21a5d9635 --- /dev/null +++ b/backend/python/nemo/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.3 +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-intel.txt b/backend/python/nemo/requirements-intel.txt new file mode 100644 index 000000000..c863eec72 --- /dev/null +++ b/backend/python/nemo/requirements-intel.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/xpu +torch +nemo_toolkit[asr] \ No newline at end of file diff --git a/backend/python/nemo/requirements-l4t12.txt b/backend/python/nemo/requirements-l4t12.txt new file mode 100644 index 000000000..177af424f --- /dev/null +++ b/backend/python/nemo/requirements-l4t12.txt @@ -0,0 +1,3 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/ +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-l4t13.txt b/backend/python/nemo/requirements-l4t13.txt new file mode 100644 index 000000000..50c18d53e --- /dev/null +++ b/backend/python/nemo/requirements-l4t13.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements-mps.txt b/backend/python/nemo/requirements-mps.txt new file mode 100644 index 000000000..dd7304836 --- /dev/null +++ b/backend/python/nemo/requirements-mps.txt @@ -0,0 +1,2 @@ +torch +nemo_toolkit[asr] diff --git a/backend/python/nemo/requirements.txt b/backend/python/nemo/requirements.txt new file mode 100644 index 000000000..9ce0da738 --- /dev/null +++ b/backend/python/nemo/requirements.txt @@ -0,0 +1,5 @@ +grpcio==1.71.0 +protobuf +certifi +packaging==24.1 +setuptools diff --git a/backend/python/nemo/run.sh b/backend/python/nemo/run.sh new file mode 100755 index 000000000..eae121f37 --- /dev/null +++ b/backend/python/nemo/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +startBackend $@ diff --git a/backend/python/nemo/test.py b/backend/python/nemo/test.py new file mode 100644 index 000000000..ee65193ec --- /dev/null +++ b/backend/python/nemo/test.py @@ -0,0 +1,99 @@ +""" +Tests for the NEMO Toolkit ASR gRPC backend. +""" +import unittest +import subprocess +import time +import os +import tempfile +import shutil +import backend_pb2 +import backend_pb2_grpc + +import grpc + +# Skip heavy transcription test in CI (model download + inference) +SKIP_ASR_TESTS = os.environ.get("SKIP_ASR_TESTS", "false").lower() == "true" + + +class TestBackendServicer(unittest.TestCase): + def setUp(self): + self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(15) + + def tearDown(self): + self.service.terminate() + self.service.wait() + + def test_server_startup(self): + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="nvidia/parakeet-tdt-0.6b-v3")) + self.assertTrue(response.success, response.message) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + @unittest.skipIf(SKIP_ASR_TESTS, "ASR transcription test skipped (SKIP_ASR_TESTS=true)") + def test_audio_transcription(self): + temp_dir = tempfile.mkdtemp() + audio_file = os.path.join(temp_dir, 'audio.wav') + try: + # Download a sample audio file for testing + url = "https://audio-samples.github.io/samples/mp3/crowd-cheering-and-applause-sound-effect.mp3" + result = subprocess.run( + ["wget", "-q", url, "-O", audio_file], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + self.skipTest(f"Could not download sample audio: {result.stderr}") + if not os.path.exists(audio_file): + self.skipTest("Sample audio file not found after download") + + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="nvidia/parakeet-tdt-0.6b-v3")) + self.assertTrue(load_response.success, load_response.message) + + transcript_response = stub.AudioTranscription( + backend_pb2.TranscriptRequest(dst=audio_file) + ) + self.assertIsNotNone(transcript_response) + self.assertIsNotNone(transcript_response.text) + self.assertGreaterEqual(len(transcript_response.segments), 0) + all_text = "" + for segment in transcript_response.segments: + all_text += segment.text + print(f"Transcription result: {all_text}") + self.assertIn("big", all_text) + if transcript_response.segments: + self.assertIsNotNone(transcript_response.segments[0].text) + finally: + self.tearDown() + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + + +if __name__ == '__main__': + unittest.main() diff --git a/backend/python/nemo/test.sh b/backend/python/nemo/test.sh new file mode 100755 index 000000000..eb59f2aaf --- /dev/null +++ b/backend/python/nemo/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +runUnittests