mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-19 14:17:21 -04:00
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318)
* WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
41
pkg/format/transcription.go
Normal file
41
pkg/format/transcription.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package format
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func TranscriptionResponse(tr *schema.TranscriptionResult, resFmt schema.TranscriptionResponseFormatType) string {
|
||||
var out string
|
||||
if resFmt == schema.TranscriptionResponseFormatLrc {
|
||||
out = "[by:LocalAI]\n[re:LocalAI]\n"
|
||||
} else if resFmt == schema.TranscriptionResponseFormatVtt {
|
||||
out = "WEBVTT"
|
||||
}
|
||||
|
||||
for i, s := range tr.Segments {
|
||||
switch resFmt {
|
||||
case schema.TranscriptionResponseFormatLrc:
|
||||
m := s.Start.Milliseconds()
|
||||
out += fmt.Sprintf("\n[%02d:%02d:%02d] %s", m/60000, (m/1000)%60, (m%1000)/10, strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatSrt:
|
||||
out += fmt.Sprintf("\n\n%d\n%s --> %s\n%s", i+1, durationStr(s.Start, ','), durationStr(s.End, ','), strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatVtt:
|
||||
out += fmt.Sprintf("\n\n%s --> %s\n%s\n", durationStr(s.Start, '.'), durationStr(s.End, '.'), strings.TrimSpace(s.Text))
|
||||
case schema.TranscriptionResponseFormatText:
|
||||
fallthrough
|
||||
default:
|
||||
out += fmt.Sprintf("\n%s", strings.TrimSpace(s.Text))
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func durationStr(d time.Duration, millisSeparator rune) string {
|
||||
m := d.Milliseconds()
|
||||
return fmt.Sprintf("%02d:%02d:%02d%c%03d", m/3600000, m/60000, int(d.Seconds())%60, millisSeparator, m%1000)
|
||||
}
|
||||
@@ -17,7 +17,7 @@ const (
|
||||
LLamaCPP = "llama-cpp"
|
||||
)
|
||||
|
||||
var Aliases map[string]string = map[string]string{
|
||||
var Aliases = map[string]string{
|
||||
"go-llama": LLamaCPP,
|
||||
"llama": LLamaCPP,
|
||||
"embedded-store": LocalStoreBackend,
|
||||
@@ -29,7 +29,7 @@ var Aliases map[string]string = map[string]string{
|
||||
"stablediffusion": StableDiffusionGGMLBackend,
|
||||
}
|
||||
|
||||
var TypeAlias map[string]string = map[string]string{
|
||||
var TypeAlias = map[string]string{
|
||||
"sentencetransformers": "SentenceTransformer",
|
||||
"huggingface-embeddings": "SentenceTransformer",
|
||||
"mamba": "Mamba",
|
||||
@@ -75,7 +75,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
||||
// Check if the backend is provided as external
|
||||
if uri, ok := ml.GetAllExternalBackends(o)[backend]; ok {
|
||||
xlog.Debug("Loading external backend", "uri", uri)
|
||||
// check if uri is a file or a address
|
||||
// check if uri is a file or an address
|
||||
if fi, err := os.Stat(uri); err == nil {
|
||||
xlog.Debug("external backend is file", "file", fi)
|
||||
serverAddress, err := getFreeAddress()
|
||||
|
||||
Reference in New Issue
Block a user