feat(realtime): configurable pipeline.max_history_items (#10331)

Composed realtime pipelines (VAD+STT+LLM+TTS) defaulted to unlimited history,
so a long-running session grew every turn and fed the whole conversation to the
LLM until its context window filled. Add an optional pipeline.max_history_items
to cap the trailing items per turn; explicit value (including 0=unlimited) wins
over the per-model-type default. Self-contained any-to-any models keep their
6-item default.

Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
LocalAI [bot]
2026-06-14 18:13:09 +02:00
committed by GitHub
parent 61cde6fd77
commit 7d2a762b53
3 changed files with 42 additions and 1 deletions

View File

@@ -510,6 +510,13 @@ type Pipeline struct {
// LLM model config. Unset leaves the LLM model config in charge.
DisableThinking *bool `yaml:"disable_thinking,omitempty" json:"disable_thinking,omitempty"`
// MaxHistoryItems caps how many trailing conversation items are fed to the
// LLM each realtime turn (0 = unlimited, rely on the LLM's context window).
// Unset (nil) uses the per-model-type default. Set it on a composed pipeline
// (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the LLM's
// context fills.
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
// VoiceRecognition gates the pipeline behind speaker verification. Nil
// (block absent) means no gate, preserving existing behavior.
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`

View File

@@ -340,6 +340,17 @@ func defaultMaxHistoryItems(cfg *config.ModelConfig) int {
return 0
}
// resolveMaxHistoryItems honors an explicit pipeline.max_history_items when set,
// otherwise falls back to the per-model-type default. This lets a composed
// pipeline (VAD+STT+LLM+TTS) cap its history so a long-running session doesn't
// grow until the LLM's context window fills.
func resolveMaxHistoryItems(cfg *config.ModelConfig) int {
if cfg != nil && cfg.Pipeline.MaxHistoryItems != nil {
return *cfg.Pipeline.MaxHistoryItems
}
return defaultMaxHistoryItems(cfg)
}
// trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap).
// Walks backwards keeping function_call + function_call_output pairs together
// so we never feed the LLM an orphaned tool result that references a call it
@@ -492,7 +503,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
Conversations: make(map[string]*Conversation),
InputSampleRate: defaultRemoteSampleRate,
OutputSampleRate: defaultRemoteSampleRate,
MaxHistoryItems: defaultMaxHistoryItems(cfg),
MaxHistoryItems: resolveMaxHistoryItems(cfg),
}
// Create a default conversation

View File

@@ -107,6 +107,29 @@ var _ = Describe("defaultMaxHistoryItems", func() {
})
})
var _ = Describe("resolveMaxHistoryItems", func() {
ptr := func(i int) *int { return &i }
It("uses an explicit pipeline.max_history_items", func() {
cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama", MaxHistoryItems: ptr(10)}}
Expect(resolveMaxHistoryItems(cfg)).To(Equal(10))
})
It("honors an explicit 0 (unlimited) over the type default", func() {
cfg := &config.ModelConfig{
KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
Pipeline: config.Pipeline{MaxHistoryItems: ptr(0)},
}
Expect(resolveMaxHistoryItems(cfg)).To(Equal(0))
})
It("falls back to the type default when unset", func() {
cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)}
Expect(resolveMaxHistoryItems(cfg)).To(Equal(6))
})
It("tolerates nil", func() {
Expect(resolveMaxHistoryItems(nil)).To(Equal(0))
})
})
var _ = Describe("trimRealtimeItems", func() {
user := func(id string) *types.MessageItemUnion {
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}