mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-02 06:04:09 -04:00
* feat(realtime): WebRTC support Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(tracing): Show full LLM opts and deltas Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
185 lines
4.3 KiB
Go
185 lines
4.3 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
"github.com/mudler/LocalAI/pkg/sound"
|
|
)
|
|
|
|
const (
|
|
opusSampleRate = 48000
|
|
opusChannels = 1
|
|
opusFrameSize = 960 // 20ms at 48kHz
|
|
opusMaxPacketSize = 4000
|
|
opusMaxFrameSize = 5760 // 120ms at 48kHz
|
|
|
|
decoderIdleTTL = 60 * time.Second
|
|
decoderEvictTick = 30 * time.Second
|
|
)
|
|
|
|
type cachedDecoder struct {
|
|
mu sync.Mutex
|
|
dec *Decoder
|
|
lastUsed time.Time
|
|
}
|
|
|
|
type Opus struct {
|
|
base.Base
|
|
|
|
decodersMu sync.Mutex
|
|
decoders map[string]*cachedDecoder
|
|
}
|
|
|
|
func (o *Opus) Load(opts *pb.ModelOptions) error {
|
|
o.decoders = make(map[string]*cachedDecoder)
|
|
go o.evictLoop()
|
|
return Init()
|
|
}
|
|
|
|
func (o *Opus) evictLoop() {
|
|
ticker := time.NewTicker(decoderEvictTick)
|
|
defer ticker.Stop()
|
|
for range ticker.C {
|
|
o.decodersMu.Lock()
|
|
now := time.Now()
|
|
for id, cd := range o.decoders {
|
|
if now.Sub(cd.lastUsed) > decoderIdleTTL {
|
|
cd.dec.Close()
|
|
delete(o.decoders, id)
|
|
}
|
|
}
|
|
o.decodersMu.Unlock()
|
|
}
|
|
}
|
|
|
|
// getOrCreateDecoder returns a cached decoder for the given session ID,
|
|
// creating one if it doesn't exist yet.
|
|
func (o *Opus) getOrCreateDecoder(sessionID string) (*cachedDecoder, error) {
|
|
o.decodersMu.Lock()
|
|
defer o.decodersMu.Unlock()
|
|
|
|
if cd, ok := o.decoders[sessionID]; ok {
|
|
cd.lastUsed = time.Now()
|
|
return cd, nil
|
|
}
|
|
|
|
dec, err := NewDecoder(opusSampleRate, opusChannels)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cd := &cachedDecoder{dec: dec, lastUsed: time.Now()}
|
|
o.decoders[sessionID] = cd
|
|
return cd, nil
|
|
}
|
|
|
|
func (o *Opus) AudioEncode(req *pb.AudioEncodeRequest) (*pb.AudioEncodeResult, error) {
|
|
enc, err := NewEncoder(opusSampleRate, opusChannels, ApplicationAudio)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opus encoder create: %w", err)
|
|
}
|
|
defer enc.Close()
|
|
|
|
if err := enc.SetBitrate(64000); err != nil {
|
|
return nil, fmt.Errorf("opus set bitrate: %w", err)
|
|
}
|
|
if err := enc.SetComplexity(10); err != nil {
|
|
return nil, fmt.Errorf("opus set complexity: %w", err)
|
|
}
|
|
|
|
samples := sound.BytesToInt16sLE(req.PcmData)
|
|
if len(samples) == 0 {
|
|
return &pb.AudioEncodeResult{
|
|
SampleRate: opusSampleRate,
|
|
SamplesPerFrame: opusFrameSize,
|
|
}, nil
|
|
}
|
|
|
|
if req.SampleRate != 0 && int(req.SampleRate) != opusSampleRate {
|
|
samples = sound.ResampleInt16(samples, int(req.SampleRate), opusSampleRate)
|
|
}
|
|
|
|
var frames [][]byte
|
|
packet := make([]byte, opusMaxPacketSize)
|
|
|
|
for offset := 0; offset+opusFrameSize <= len(samples); offset += opusFrameSize {
|
|
frame := samples[offset : offset+opusFrameSize]
|
|
n, err := enc.Encode(frame, opusFrameSize, packet)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opus encode: %w", err)
|
|
}
|
|
out := make([]byte, n)
|
|
copy(out, packet[:n])
|
|
frames = append(frames, out)
|
|
}
|
|
|
|
return &pb.AudioEncodeResult{
|
|
Frames: frames,
|
|
SampleRate: opusSampleRate,
|
|
SamplesPerFrame: opusFrameSize,
|
|
}, nil
|
|
}
|
|
|
|
func (o *Opus) AudioDecode(req *pb.AudioDecodeRequest) (*pb.AudioDecodeResult, error) {
|
|
if len(req.Frames) == 0 {
|
|
return &pb.AudioDecodeResult{
|
|
SampleRate: opusSampleRate,
|
|
SamplesPerFrame: opusFrameSize,
|
|
}, nil
|
|
}
|
|
|
|
// Use a persistent decoder when a session ID is provided so that Opus
|
|
// prediction state carries across batches. Fall back to a fresh decoder
|
|
// for backward compatibility.
|
|
sessionID := req.Options["session_id"]
|
|
|
|
var cd *cachedDecoder
|
|
var ownedDec *Decoder
|
|
|
|
if sessionID != "" && o.decoders != nil {
|
|
var err error
|
|
cd, err = o.getOrCreateDecoder(sessionID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opus decoder create: %w", err)
|
|
}
|
|
cd.mu.Lock()
|
|
defer cd.mu.Unlock()
|
|
} else {
|
|
dec, err := NewDecoder(opusSampleRate, opusChannels)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opus decoder create: %w", err)
|
|
}
|
|
ownedDec = dec
|
|
defer ownedDec.Close()
|
|
}
|
|
|
|
dec := ownedDec
|
|
if cd != nil {
|
|
dec = cd.dec
|
|
}
|
|
|
|
var allSamples []int16
|
|
var samplesPerFrame int32
|
|
|
|
pcm := make([]int16, opusMaxFrameSize)
|
|
for _, frame := range req.Frames {
|
|
n, err := dec.Decode(frame, pcm, opusMaxFrameSize, false)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opus decode: %w", err)
|
|
}
|
|
if samplesPerFrame == 0 {
|
|
samplesPerFrame = int32(n)
|
|
}
|
|
allSamples = append(allSamples, pcm[:n]...)
|
|
}
|
|
|
|
return &pb.AudioDecodeResult{
|
|
PcmData: sound.Int16toBytesLE(allSamples),
|
|
SampleRate: opusSampleRate,
|
|
SamplesPerFrame: samplesPerFrame,
|
|
}, nil
|
|
}
|