Files
LocalAI/backend/go/opus/opus.go
Richard Palethorpe f9a850c02a feat(realtime): WebRTC support (#8790)
* feat(realtime): WebRTC support

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(tracing): Show full LLM opts and deltas

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-03-13 21:37:15 +01:00

185 lines
4.3 KiB
Go

package main
import (
"fmt"
"sync"
"time"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/sound"
)
const (
opusSampleRate = 48000
opusChannels = 1
opusFrameSize = 960 // 20ms at 48kHz
opusMaxPacketSize = 4000
opusMaxFrameSize = 5760 // 120ms at 48kHz
decoderIdleTTL = 60 * time.Second
decoderEvictTick = 30 * time.Second
)
type cachedDecoder struct {
mu sync.Mutex
dec *Decoder
lastUsed time.Time
}
type Opus struct {
base.Base
decodersMu sync.Mutex
decoders map[string]*cachedDecoder
}
func (o *Opus) Load(opts *pb.ModelOptions) error {
o.decoders = make(map[string]*cachedDecoder)
go o.evictLoop()
return Init()
}
func (o *Opus) evictLoop() {
ticker := time.NewTicker(decoderEvictTick)
defer ticker.Stop()
for range ticker.C {
o.decodersMu.Lock()
now := time.Now()
for id, cd := range o.decoders {
if now.Sub(cd.lastUsed) > decoderIdleTTL {
cd.dec.Close()
delete(o.decoders, id)
}
}
o.decodersMu.Unlock()
}
}
// getOrCreateDecoder returns a cached decoder for the given session ID,
// creating one if it doesn't exist yet.
func (o *Opus) getOrCreateDecoder(sessionID string) (*cachedDecoder, error) {
o.decodersMu.Lock()
defer o.decodersMu.Unlock()
if cd, ok := o.decoders[sessionID]; ok {
cd.lastUsed = time.Now()
return cd, nil
}
dec, err := NewDecoder(opusSampleRate, opusChannels)
if err != nil {
return nil, err
}
cd := &cachedDecoder{dec: dec, lastUsed: time.Now()}
o.decoders[sessionID] = cd
return cd, nil
}
func (o *Opus) AudioEncode(req *pb.AudioEncodeRequest) (*pb.AudioEncodeResult, error) {
enc, err := NewEncoder(opusSampleRate, opusChannels, ApplicationAudio)
if err != nil {
return nil, fmt.Errorf("opus encoder create: %w", err)
}
defer enc.Close()
if err := enc.SetBitrate(64000); err != nil {
return nil, fmt.Errorf("opus set bitrate: %w", err)
}
if err := enc.SetComplexity(10); err != nil {
return nil, fmt.Errorf("opus set complexity: %w", err)
}
samples := sound.BytesToInt16sLE(req.PcmData)
if len(samples) == 0 {
return &pb.AudioEncodeResult{
SampleRate: opusSampleRate,
SamplesPerFrame: opusFrameSize,
}, nil
}
if req.SampleRate != 0 && int(req.SampleRate) != opusSampleRate {
samples = sound.ResampleInt16(samples, int(req.SampleRate), opusSampleRate)
}
var frames [][]byte
packet := make([]byte, opusMaxPacketSize)
for offset := 0; offset+opusFrameSize <= len(samples); offset += opusFrameSize {
frame := samples[offset : offset+opusFrameSize]
n, err := enc.Encode(frame, opusFrameSize, packet)
if err != nil {
return nil, fmt.Errorf("opus encode: %w", err)
}
out := make([]byte, n)
copy(out, packet[:n])
frames = append(frames, out)
}
return &pb.AudioEncodeResult{
Frames: frames,
SampleRate: opusSampleRate,
SamplesPerFrame: opusFrameSize,
}, nil
}
func (o *Opus) AudioDecode(req *pb.AudioDecodeRequest) (*pb.AudioDecodeResult, error) {
if len(req.Frames) == 0 {
return &pb.AudioDecodeResult{
SampleRate: opusSampleRate,
SamplesPerFrame: opusFrameSize,
}, nil
}
// Use a persistent decoder when a session ID is provided so that Opus
// prediction state carries across batches. Fall back to a fresh decoder
// for backward compatibility.
sessionID := req.Options["session_id"]
var cd *cachedDecoder
var ownedDec *Decoder
if sessionID != "" && o.decoders != nil {
var err error
cd, err = o.getOrCreateDecoder(sessionID)
if err != nil {
return nil, fmt.Errorf("opus decoder create: %w", err)
}
cd.mu.Lock()
defer cd.mu.Unlock()
} else {
dec, err := NewDecoder(opusSampleRate, opusChannels)
if err != nil {
return nil, fmt.Errorf("opus decoder create: %w", err)
}
ownedDec = dec
defer ownedDec.Close()
}
dec := ownedDec
if cd != nil {
dec = cd.dec
}
var allSamples []int16
var samplesPerFrame int32
pcm := make([]int16, opusMaxFrameSize)
for _, frame := range req.Frames {
n, err := dec.Decode(frame, pcm, opusMaxFrameSize, false)
if err != nil {
return nil, fmt.Errorf("opus decode: %w", err)
}
if samplesPerFrame == 0 {
samplesPerFrame = int32(n)
}
allSamples = append(allSamples, pcm[:n]...)
}
return &pb.AudioDecodeResult{
PcmData: sound.Int16toBytesLE(allSamples),
SampleRate: opusSampleRate,
SamplesPerFrame: samplesPerFrame,
}, nil
}