Files
LocalAI/core/backend/transcript.go
Richard Palethorpe 716dba94b4 feat(whisper): Add prompt to condition transcription output (#7624)
* chore(makefile): Add buildargs for sd and cuda when building backend

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat(whisper): Add prompt to condition transcription output

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-12-18 14:40:45 +01:00

62 lines
1.4 KiB
Go

package backend
import (
"context"
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)
func ModelTranscription(audio, language string, translate bool, diarize bool, prompt string, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
if modelConfig.Backend == "" {
modelConfig.Backend = model.WhisperBackend
}
opts := ModelOptions(modelConfig, appConfig)
transcriptionModel, err := ml.Load(opts...)
if err != nil {
return nil, err
}
if transcriptionModel == nil {
return nil, fmt.Errorf("could not load transcription model")
}
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio,
Language: language,
Translate: translate,
Diarize: diarize,
Threads: uint32(*modelConfig.Threads),
Prompt: prompt,
})
if err != nil {
return nil, err
}
tr := &schema.TranscriptionResult{
Text: r.Text,
}
for _, s := range r.Segments {
var tks []int
for _, t := range s.Tokens {
tks = append(tks, int(t))
}
tr.Segments = append(tr.Segments,
schema.TranscriptionSegment{
Text: s.Text,
Id: int(s.Id),
Start: time.Duration(s.Start),
End: time.Duration(s.End),
Tokens: tks,
})
}
return tr, err
}