feat: support word-level timestamps for faster-whisper (#9621)

Signed-off-by: Andreas Egli <github@kharan.ch>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
Andreas Egli
2026-05-06 00:32:52 +02:00
committed by GitHub
parent a315c321c1
commit af83518532
6 changed files with 146 additions and 11 deletions

View File

@@ -55,11 +55,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
resultSegments = []
text = ""
try:
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
word_timestamps = "word" in request.timestamp_granularities
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False, word_timestamps=word_timestamps)
id = 0
for segment in segments:
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=int(segment.start*1e9), end=int(segment.end*1e9), text=segment.text))
words = []
if word_timestamps and hasattr(segment, 'words'):
for word in segment.words:
words.append(backend_pb2.TranscriptWord(
start=int(word.start * 1e9),
end=int(word.end * 1e9),
text=word.word
))
resultSegments.append(backend_pb2.TranscriptSegment(
id=id,
start=int(segment.start * 1e9),
end=int(segment.end * 1e9),
text=segment.text,
words=words
))
text += segment.text
id += 1
except Exception as err: