mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-24 02:36:11 -05:00
108 lines
2.7 KiB
Go
108 lines
2.7 KiB
Go
package backend
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/trace"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
|
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
)
|
|
|
|
func ModelTranscription(audio, language string, translate, diarize bool, prompt string, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
|
if modelConfig.Backend == "" {
|
|
modelConfig.Backend = model.WhisperBackend
|
|
}
|
|
|
|
opts := ModelOptions(modelConfig, appConfig)
|
|
|
|
transcriptionModel, err := ml.Load(opts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if transcriptionModel == nil {
|
|
return nil, fmt.Errorf("could not load transcription model")
|
|
}
|
|
|
|
var startTime time.Time
|
|
if appConfig.EnableTracing {
|
|
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
|
startTime = time.Now()
|
|
}
|
|
|
|
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
|
Dst: audio,
|
|
Language: language,
|
|
Translate: translate,
|
|
Diarize: diarize,
|
|
Threads: uint32(*modelConfig.Threads),
|
|
Prompt: prompt,
|
|
})
|
|
if err != nil {
|
|
if appConfig.EnableTracing {
|
|
trace.RecordBackendTrace(trace.BackendTrace{
|
|
Timestamp: startTime,
|
|
Duration: time.Since(startTime),
|
|
Type: trace.BackendTraceTranscription,
|
|
ModelName: modelConfig.Name,
|
|
Backend: modelConfig.Backend,
|
|
Summary: trace.TruncateString(audio, 200),
|
|
Error: err.Error(),
|
|
Data: map[string]any{
|
|
"audio_file": audio,
|
|
"language": language,
|
|
"translate": translate,
|
|
"diarize": diarize,
|
|
"prompt": prompt,
|
|
},
|
|
})
|
|
}
|
|
return nil, err
|
|
}
|
|
tr := &schema.TranscriptionResult{
|
|
Text: r.Text,
|
|
}
|
|
for _, s := range r.Segments {
|
|
var tks []int
|
|
for _, t := range s.Tokens {
|
|
tks = append(tks, int(t))
|
|
}
|
|
tr.Segments = append(tr.Segments,
|
|
schema.TranscriptionSegment{
|
|
Text: s.Text,
|
|
Id: int(s.Id),
|
|
Start: time.Duration(s.Start),
|
|
End: time.Duration(s.End),
|
|
Tokens: tks,
|
|
Speaker: s.Speaker,
|
|
})
|
|
}
|
|
|
|
if appConfig.EnableTracing {
|
|
trace.RecordBackendTrace(trace.BackendTrace{
|
|
Timestamp: startTime,
|
|
Duration: time.Since(startTime),
|
|
Type: trace.BackendTraceTranscription,
|
|
ModelName: modelConfig.Name,
|
|
Backend: modelConfig.Backend,
|
|
Summary: trace.TruncateString(audio+" -> "+tr.Text, 200),
|
|
Data: map[string]any{
|
|
"audio_file": audio,
|
|
"language": language,
|
|
"translate": translate,
|
|
"diarize": diarize,
|
|
"prompt": prompt,
|
|
"result_text": tr.Text,
|
|
"segments_count": len(tr.Segments),
|
|
},
|
|
})
|
|
}
|
|
|
|
return tr, err
|
|
}
|