mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318)
* WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
41
pkg/format/transcription.go
Normal file
41
pkg/format/transcription.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package format
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func TranscriptionResponse(tr *schema.TranscriptionResult, resFmt schema.TranscriptionResponseFormatType) string {
|
||||
var out string
|
||||
if resFmt == schema.TranscriptionResponseFormatLrc {
|
||||
out = "[by:LocalAI]\n[re:LocalAI]\n"
|
||||
} else if resFmt == schema.TranscriptionResponseFormatVtt {
|
||||
out = "WEBVTT"
|
||||
}
|
||||
|
||||
for i, s := range tr.Segments {
|
||||
switch resFmt {
|
||||
case schema.TranscriptionResponseFormatLrc:
|
||||
m := s.Start.Milliseconds()
|
||||
out += fmt.Sprintf("\n[%02d:%02d:%02d] %s", m/60000, (m/1000)%60, (m%1000)/10, strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatSrt:
|
||||
out += fmt.Sprintf("\n\n%d\n%s --> %s\n%s", i+1, durationStr(s.Start, ','), durationStr(s.End, ','), strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatVtt:
|
||||
out += fmt.Sprintf("\n\n%s --> %s\n%s\n", durationStr(s.Start, '.'), durationStr(s.End, '.'), strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatText:
|
||||
fallthrough
|
||||
default:
|
||||
out += fmt.Sprintf("\n%s", strings.TrimSpace(s.Text))
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func durationStr(d time.Duration, millisSeparator rune) string {
|
||||
m := d.Milliseconds()
|
||||
return fmt.Sprintf("%02d:%02d:%02d%c%03d", m/3600000, m/60000, int(d.Seconds())%60, millisSeparator, m%1000)
|
||||
}
|
||||
Reference in New Issue
Block a user