feat: Realtime API support reboot (#5392)

* feat(realtime): Initial Realtime API implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: go mod tidy

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat: Implement transcription only mode for realtime API

Reduce the scope of the real time API for the initial realease and make
transcription only mode functional.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* chore(build): Build backends on a separate layer to speed up core only changes

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Richard Palethorpe
2025-05-25 21:25:05 +01:00
committed by GitHub
parent 4a91950848
commit bf6426aef2
18 changed files with 2953 additions and 70 deletions

View File

@@ -5,6 +5,8 @@ import (
"os"
"os/exec"
"strings"
"github.com/go-audio/wav"
)
func ffmpegCommand(args []string) (string, error) {
@@ -17,6 +19,21 @@ func ffmpegCommand(args []string) (string, error) {
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func AudioToWav(src, dst string) error {
if strings.HasSuffix(src, ".wav") {
f, err := os.Open(src)
if err != nil {
return fmt.Errorf("open: %w", err)
}
dec := wav.NewDecoder(f)
dec.ReadInfo()
f.Close()
if dec.BitDepth == 16 && dec.NumChans == 1 && dec.SampleRate == 16000 {
os.Rename(src, dst)
return nil
}
}
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {