mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 11:42:57 -05:00
* feat(proto): add speaker field to TranscriptSegment for diarization
Add speaker field to the gRPC TranscriptSegment message and map it
through the Go schema, enabling backends to return speaker labels.
Signed-off-by: eureka928 <meobius123@gmail.com>
* feat(whisperx): add whisperx backend for transcription with diarization
Add Python gRPC backend using WhisperX for speech-to-text with
word-level timestamps, forced alignment, and speaker diarization
via pyannote-audio when HF_TOKEN is provided.
Signed-off-by: eureka928 <meobius123@gmail.com>
* feat(whisperx): register whisperx backend in Makefile
Signed-off-by: eureka928 <meobius123@gmail.com>
* feat(whisperx): add whisperx meta and image entries to index.yaml
Signed-off-by: eureka928 <meobius123@gmail.com>
* ci(whisperx): add build matrix entries for CPU, CUDA 12/13, and ROCm
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(whisperx): unpin torch versions and use CPU index for cpu requirements
Address review feedback:
- Use --extra-index-url for CPU torch wheels to reduce size
- Remove torch version pins, let uv resolve compatible versions
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(whisperx): pin torch ROCm variant to fix CI build failure
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(whisperx): pin torch CPU variant to fix uv resolution failure
Pin torch==2.8.0+cpu so uv resolves the CPU wheel from the extra
index instead of picking torch==2.8.0+cu128 from PyPI, which pulls
unresolvable CUDA dependencies.
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(whisperx): use unsafe-best-match index strategy to fix uv resolution failure
uv's default first-match strategy finds torch on PyPI before checking
the extra index, causing it to pick torch==2.8.0+cu128 instead of the
CPU variant. This makes whisperx's transitive torch dependency
unresolvable. Using unsafe-best-match lets uv consider all indexes.
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(whisperx): drop +cpu local version suffix to fix uv resolution failure
PEP 440 ==2.8.0 matches 2.8.0+cpu from the extra index, avoiding the
issue where uv cannot locate an explicit +cpu local version specifier.
This aligns with the pattern used by all other CPU backends.
Signed-off-by: eureka928 <meobius123@gmail.com>
* fix(backends): drop +rocm local version suffixes from hipblas requirements to fix uv resolution
uv cannot resolve PEP 440 local version specifiers (e.g. +rocm6.4,
+rocm6.3) in pinned requirements. The --extra-index-url already points
to the correct ROCm wheel index and --index-strategy unsafe-best-match
(set in libbackend.sh) ensures the ROCm variant is preferred.
Applies the same fix as 7f5d72e8 (which resolved this for +cpu) across
all 14 hipblas requirements files.
Signed-off-by: eureka928 <meobius123@gmail.com>
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: eureka928 <meobius123@gmail.com>
* revert: scope hipblas suffix fix to whisperx only
Reverts changes to non-whisperx hipblas requirements files per
maintainer review — other backends are building fine with the +rocm
local version suffix.
Signed-off-by: eureka928 <meobius123@gmail.com>
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: eureka928 <meobius123@gmail.com>
---------
Signed-off-by: eureka928 <meobius123@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
125 lines
4.4 KiB
Python
125 lines
4.4 KiB
Python
"""
|
|
A test script to test the gRPC service for WhisperX transcription
|
|
"""
|
|
import unittest
|
|
import subprocess
|
|
import time
|
|
import os
|
|
import tempfile
|
|
import shutil
|
|
import backend_pb2
|
|
import backend_pb2_grpc
|
|
|
|
import grpc
|
|
|
|
|
|
class TestBackendServicer(unittest.TestCase):
|
|
"""
|
|
TestBackendServicer is the class that tests the gRPC service
|
|
"""
|
|
def setUp(self):
|
|
"""
|
|
This method sets up the gRPC service by starting the server
|
|
"""
|
|
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
time.sleep(10)
|
|
|
|
def tearDown(self) -> None:
|
|
"""
|
|
This method tears down the gRPC service by terminating the server
|
|
"""
|
|
self.service.terminate()
|
|
self.service.wait()
|
|
|
|
def test_server_startup(self):
|
|
"""
|
|
This method tests if the server starts up successfully
|
|
"""
|
|
try:
|
|
self.setUp()
|
|
with grpc.insecure_channel("localhost:50051") as channel:
|
|
stub = backend_pb2_grpc.BackendStub(channel)
|
|
response = stub.Health(backend_pb2.HealthMessage())
|
|
self.assertEqual(response.message, b'OK')
|
|
except Exception as err:
|
|
print(err)
|
|
self.fail("Server failed to start")
|
|
finally:
|
|
self.tearDown()
|
|
|
|
def test_load_model(self):
|
|
"""
|
|
This method tests if the model is loaded successfully
|
|
"""
|
|
try:
|
|
self.setUp()
|
|
with grpc.insecure_channel("localhost:50051") as channel:
|
|
stub = backend_pb2_grpc.BackendStub(channel)
|
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))
|
|
self.assertTrue(response.success)
|
|
self.assertEqual(response.message, "Model loaded successfully")
|
|
except Exception as err:
|
|
print(err)
|
|
self.fail("LoadModel service failed")
|
|
finally:
|
|
self.tearDown()
|
|
|
|
def test_audio_transcription(self):
|
|
"""
|
|
This method tests if audio transcription works successfully
|
|
"""
|
|
# Create a temporary directory for the audio file
|
|
temp_dir = tempfile.mkdtemp()
|
|
audio_file = os.path.join(temp_dir, 'audio.wav')
|
|
|
|
try:
|
|
# Download the audio file to the temporary directory
|
|
print(f"Downloading audio file to {audio_file}...")
|
|
url = "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"
|
|
result = subprocess.run(
|
|
["wget", "-q", url, "-O", audio_file],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
if result.returncode != 0:
|
|
self.fail(f"Failed to download audio file: {result.stderr}")
|
|
|
|
# Verify the file was downloaded
|
|
if not os.path.exists(audio_file):
|
|
self.fail(f"Audio file was not downloaded to {audio_file}")
|
|
|
|
self.setUp()
|
|
with grpc.insecure_channel("localhost:50051") as channel:
|
|
stub = backend_pb2_grpc.BackendStub(channel)
|
|
# Load the model first
|
|
load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))
|
|
self.assertTrue(load_response.success)
|
|
|
|
# Perform transcription without diarization
|
|
transcript_request = backend_pb2.TranscriptRequest(dst=audio_file)
|
|
transcript_response = stub.AudioTranscription(transcript_request)
|
|
|
|
# Print the transcribed text for debugging
|
|
print(f"Transcribed text: {transcript_response.text}")
|
|
print(f"Number of segments: {len(transcript_response.segments)}")
|
|
|
|
# Verify response structure
|
|
self.assertIsNotNone(transcript_response)
|
|
self.assertIsNotNone(transcript_response.text)
|
|
self.assertGreater(len(transcript_response.text), 0)
|
|
self.assertGreater(len(transcript_response.segments), 0)
|
|
|
|
# Verify segments have timing info
|
|
segment = transcript_response.segments[0]
|
|
self.assertIsNotNone(segment.text)
|
|
self.assertIsInstance(segment.id, int)
|
|
|
|
except Exception as err:
|
|
print(err)
|
|
self.fail("AudioTranscription service failed")
|
|
finally:
|
|
self.tearDown()
|
|
# Clean up the temporary directory
|
|
if os.path.exists(temp_dir):
|
|
shutil.rmtree(temp_dir)
|