mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-06 07:46:15 -04:00
feat(realtime): sentence segmenter for streamed LLM->TTS pipelining
streamSegmenter accumulates streamed LLM tokens and emits complete sentence/clause segments (terminator+whitespace, or newline) so TTS can synthesize each segment as it completes instead of waiting for the whole reply. Pure helper; the streaming handler wiring consumes it next. Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
61
core/http/endpoints/openai/realtime_segmenter.go
Normal file
61
core/http/endpoints/openai/realtime_segmenter.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package openai
|
||||
|
||||
import "strings"
|
||||
|
||||
// streamSegmenter accumulates streamed LLM text and emits complete utterance
|
||||
// segments (sentence/clause boundaries) so the realtime pipeline can hand each
|
||||
// segment to TTS as soon as it's complete, overlapping generation, synthesis
|
||||
// and playback instead of waiting for the whole reply.
|
||||
//
|
||||
// A segment is committed when a sentence terminator (. ! ?) is followed by
|
||||
// whitespace, or at a newline. Terminators not followed by whitespace (e.g.
|
||||
// decimals like "3.14" mid-stream) stay buffered until more text arrives or the
|
||||
// stream is flushed.
|
||||
type streamSegmenter struct {
|
||||
buf strings.Builder
|
||||
}
|
||||
|
||||
func isSentenceTerminator(b byte) bool {
|
||||
return b == '.' || b == '!' || b == '?'
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
return b == ' ' || b == '\t' || b == '\n' || b == '\r'
|
||||
}
|
||||
|
||||
// Push appends text to the buffer and returns any newly-completed segments,
|
||||
// trimmed of surrounding whitespace. Incomplete trailing text stays buffered.
|
||||
func (s *streamSegmenter) Push(text string) []string {
|
||||
s.buf.WriteString(text)
|
||||
cur := s.buf.String()
|
||||
|
||||
var segments []string
|
||||
start := 0
|
||||
for i := 0; i < len(cur); i++ {
|
||||
cut := -1
|
||||
switch {
|
||||
case cur[i] == '\n':
|
||||
cut = i // segment excludes the newline
|
||||
case isSentenceTerminator(cur[i]) && i+1 < len(cur) && isSpace(cur[i+1]):
|
||||
cut = i + 1 // segment includes the terminator
|
||||
}
|
||||
if cut >= 0 {
|
||||
if seg := strings.TrimSpace(cur[start:cut]); seg != "" {
|
||||
segments = append(segments, seg)
|
||||
}
|
||||
start = cut
|
||||
}
|
||||
}
|
||||
|
||||
rem := cur[start:]
|
||||
s.buf.Reset()
|
||||
s.buf.WriteString(rem)
|
||||
return segments
|
||||
}
|
||||
|
||||
// Flush returns the remaining buffered text (trimmed) and clears the buffer.
|
||||
func (s *streamSegmenter) Flush() string {
|
||||
seg := strings.TrimSpace(s.buf.String())
|
||||
s.buf.Reset()
|
||||
return seg
|
||||
}
|
||||
41
core/http/endpoints/openai/realtime_segmenter_test.go
Normal file
41
core/http/endpoints/openai/realtime_segmenter_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// streamSegmenter turns a stream of LLM token text into complete sentence/clause
|
||||
// segments so TTS can start synthesizing before the full reply is generated.
|
||||
var _ = Describe("streamSegmenter", func() {
|
||||
It("buffers partial text until a sentence terminator followed by space", func() {
|
||||
var s streamSegmenter
|
||||
Expect(s.Push("Hello")).To(BeEmpty())
|
||||
Expect(s.Push(" world")).To(BeEmpty())
|
||||
Expect(s.Push(". ")).To(Equal([]string{"Hello world."}))
|
||||
})
|
||||
|
||||
It("emits each complete sentence and keeps the trailing partial buffered", func() {
|
||||
var s streamSegmenter
|
||||
Expect(s.Push("One. Two! Three")).To(Equal([]string{"One.", "Two!"}))
|
||||
Expect(s.Flush()).To(Equal("Three"))
|
||||
})
|
||||
|
||||
It("splits on newlines", func() {
|
||||
var s streamSegmenter
|
||||
Expect(s.Push("Line one\nLine two")).To(Equal([]string{"Line one"}))
|
||||
Expect(s.Flush()).To(Equal("Line two"))
|
||||
})
|
||||
|
||||
It("does not split decimals or mid-token punctuation", func() {
|
||||
var s streamSegmenter
|
||||
Expect(s.Push("Pi is 3.14 today")).To(BeEmpty())
|
||||
Expect(s.Flush()).To(Equal("Pi is 3.14 today"))
|
||||
})
|
||||
|
||||
It("flushes to empty when the buffer holds only consumed text", func() {
|
||||
var s streamSegmenter
|
||||
s.Push("Done. ")
|
||||
Expect(s.Flush()).To(Equal(""))
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user