feat(nemo): add Nemo (only asr for now) backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-02-07 00:00:20 +01:00
parent ae2689936a
commit ec7e5129e7
20 changed files with 534 additions and 2 deletions

View File

@@ -0,0 +1,23 @@
.PHONY: nemo-asr
nemo-asr:
bash install.sh
.PHONY: run
run: nemo-asr
@echo "Running nemo-asr..."
bash run.sh
@echo "nemo-asr run."
.PHONY: test
test: nemo-asr
@echo "Testing nemo-asr..."
bash test.sh
@echo "nemo-asr tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -0,0 +1,147 @@
#!/usr/bin/env python3
"""
gRPC server of LocalAI for NVIDIA NEMO Toolkit ASR.
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import torch
import nemo.collections.asr as nemo_asr
import grpc
def is_float(s):
try:
float(s)
return True
except ValueError:
return False
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")
self.device = device
self.options = {}
for opt in request.Options:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
if is_float(value):
value = float(value)
elif is_int(value):
value = int(value)
elif value.lower() in ["true", "false"]:
value = value.lower() == "true"
self.options[key] = value
model_name = request.Model or "nvidia/parakeet-tdt-0.6b-v3"
try:
print(f"Loading NEMO ASR model from {model_name}", file=sys.stderr)
self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)
print("NEMO ASR model loaded successfully", file=sys.stderr)
except Exception as err:
print(f"[ERROR] LoadModel failed: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.Result(success=False, message=str(err))
return backend_pb2.Result(message="Model loaded successfully", success=True)
def AudioTranscription(self, request, context):
result_segments = []
text = ""
try:
audio_path = request.dst
if not audio_path or not os.path.exists(audio_path):
print(f"Error: Audio file not found: {audio_path}", file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")
# NEMO's transcribe method accepts a list of audio paths and returns a list of transcripts
results = self.model.transcribe([audio_path])
if not results or len(results) == 0:
return backend_pb2.TranscriptResult(segments=[], text="")
# Get the transcript text from the first result
text = results[0]
if text:
# Create a single segment with the full transcription
result_segments.append(backend_pb2.TranscriptSegment(
id=0, start=0, end=0, text=text
))
except Exception as err:
print(f"Error in AudioTranscription: {err}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return backend_pb2.TranscriptResult(segments=[], text="")
return backend_pb2.TranscriptResult(segments=result_segments, text=text)
def serve(address):
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024),
('grpc.max_send_message_length', 50 * 1024 * 1024),
('grpc.max_receive_message_length', 50 * 1024 * 1024),
])
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.")
args = parser.parse_args()
serve(args.addr)

21
backend/python/nemo/install.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -e
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"
installRequirements

11
backend/python/nemo/protogen.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu128
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/xpu
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,2 @@
torch
nemo_toolkit[asr]

View File

@@ -0,0 +1,5 @@
grpcio==1.71.0
protobuf
certifi
packaging==24.1
setuptools

9
backend/python/nemo/run.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -0,0 +1,99 @@
"""
Tests for the NEMO Toolkit ASR gRPC backend.
"""
import unittest
import subprocess
import time
import os
import tempfile
import shutil
import backend_pb2
import backend_pb2_grpc
import grpc
# Skip heavy transcription test in CI (model download + inference)
SKIP_ASR_TESTS = os.environ.get("SKIP_ASR_TESTS", "false").lower() == "true"
class TestBackendServicer(unittest.TestCase):
def setUp(self):
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(15)
def tearDown(self):
self.service.terminate()
self.service.wait()
def test_server_startup(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="nvidia/parakeet-tdt-0.6b-v3"))
self.assertTrue(response.success, response.message)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
@unittest.skipIf(SKIP_ASR_TESTS, "ASR transcription test skipped (SKIP_ASR_TESTS=true)")
def test_audio_transcription(self):
temp_dir = tempfile.mkdtemp()
audio_file = os.path.join(temp_dir, 'audio.wav')
try:
# Download a sample audio file for testing
url = "https://audio-samples.github.io/samples/mp3/crowd-cheering-and-applause-sound-effect.mp3"
result = subprocess.run(
["wget", "-q", url, "-O", audio_file],
capture_output=True,
text=True,
timeout=30,
)
if result.returncode != 0:
self.skipTest(f"Could not download sample audio: {result.stderr}")
if not os.path.exists(audio_file):
self.skipTest("Sample audio file not found after download")
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="nvidia/parakeet-tdt-0.6b-v3"))
self.assertTrue(load_response.success, load_response.message)
transcript_response = stub.AudioTranscription(
backend_pb2.TranscriptRequest(dst=audio_file)
)
self.assertIsNotNone(transcript_response)
self.assertIsNotNone(transcript_response.text)
self.assertGreaterEqual(len(transcript_response.segments), 0)
all_text = ""
for segment in transcript_response.segments:
all_text += segment.text
print(f"Transcription result: {all_text}")
self.assertIn("big", all_text)
if transcript_response.segments:
self.assertIsNotNone(transcript_response.segments[0].text)
finally:
self.tearDown()
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
if __name__ == '__main__':
unittest.main()

11
backend/python/nemo/test.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests