From df7623fd87cf516e9d050fc997cd930283848b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=95=AA=E8=8C=84=E6=91=94=E6=88=90=E7=95=AA=E8=8C=84?= =?UTF-8?q?=E9=85=B1?= <68098251+fqscfqj@users.noreply.github.com> Date: Wed, 27 May 2026 04:35:23 +0800 Subject: [PATCH] fix(nemo): extract Hypothesis.text for TDT/RNNT ASR models (#10012) * fix(nemo): extract Hypothesis.text for TDT/RNNT ASR models CTC models (e.g. Whisper) return List[str] from transcribe(), but TDT/RNNT models (e.g. parakeet-tdt-0.6b-v3) return List[Hypothesis] where the decoded text lives in the Hypothesis.text attribute. Previously, results[0] was assigned directly to the protobuf string field, causing silent empty output for non-CTC models. Now checks the return type and extracts .text from Hypothesis objects, with a safe fallback via getattr(). * refactor: simplify Hypothesis text extraction per Copilot review Use single getattr() call instead of hasattr() + double access, and return empty string for unknown types instead of str(result) to avoid leaking internal repr to clients. --- backend/python/nemo/backend.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/python/nemo/backend.py b/backend/python/nemo/backend.py index 270e8fb3a..ccbff7cd2 100644 --- a/backend/python/nemo/backend.py +++ b/backend/python/nemo/backend.py @@ -99,8 +99,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if not results or len(results) == 0: return backend_pb2.TranscriptResult(segments=[], text="") - # Get the transcript text from the first result - text = results[0] + # Get the transcript text from the first result. + # CTC models return List[str], TDT/RNNT models return List[Hypothesis] + # where the actual text lives in Hypothesis.text. + result = results[0] + if isinstance(result, str): + text = result + else: + text = getattr(result, 'text', None) or "" + if text: # Create a single segment with the full transcription result_segments.append(backend_pb2.TranscriptSegment(