From 7d2a762b538e342b372110015e61bd5490f5bd5b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:13:09 +0200 Subject: [PATCH] feat(realtime): configurable pipeline.max_history_items (#10331) Composed realtime pipelines (VAD+STT+LLM+TTS) defaulted to unlimited history, so a long-running session grew every turn and fed the whole conversation to the LLM until its context window filled. Add an optional pipeline.max_history_items to cap the trailing items per turn; explicit value (including 0=unlimited) wins over the per-model-type default. Self-contained any-to-any models keep their 6-item default. Co-authored-by: Ettore Di Giacinto Co-authored-by: Claude Opus 4.8 (1M context) --- core/config/model_config.go | 7 ++++++ core/http/endpoints/openai/realtime.go | 13 ++++++++++- .../endpoints/openai/realtime_gate_test.go | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/core/config/model_config.go b/core/config/model_config.go index 195739654..755280cc3 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -510,6 +510,13 @@ type Pipeline struct { // LLM model config. Unset leaves the LLM model config in charge. DisableThinking *bool `yaml:"disable_thinking,omitempty" json:"disable_thinking,omitempty"` + // MaxHistoryItems caps how many trailing conversation items are fed to the + // LLM each realtime turn (0 = unlimited, rely on the LLM's context window). + // Unset (nil) uses the per-model-type default. Set it on a composed pipeline + // (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the LLM's + // context fills. + MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"` + // VoiceRecognition gates the pipeline behind speaker verification. Nil // (block absent) means no gate, preserving existing behavior. VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"` diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go index f626a895c..343ef4c07 100644 --- a/core/http/endpoints/openai/realtime.go +++ b/core/http/endpoints/openai/realtime.go @@ -340,6 +340,17 @@ func defaultMaxHistoryItems(cfg *config.ModelConfig) int { return 0 } +// resolveMaxHistoryItems honors an explicit pipeline.max_history_items when set, +// otherwise falls back to the per-model-type default. This lets a composed +// pipeline (VAD+STT+LLM+TTS) cap its history so a long-running session doesn't +// grow until the LLM's context window fills. +func resolveMaxHistoryItems(cfg *config.ModelConfig) int { + if cfg != nil && cfg.Pipeline.MaxHistoryItems != nil { + return *cfg.Pipeline.MaxHistoryItems + } + return defaultMaxHistoryItems(cfg) +} + // trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap). // Walks backwards keeping function_call + function_call_output pairs together // so we never feed the LLM an orphaned tool result that references a call it @@ -492,7 +503,7 @@ func runRealtimeSession(application *application.Application, t Transport, model Conversations: make(map[string]*Conversation), InputSampleRate: defaultRemoteSampleRate, OutputSampleRate: defaultRemoteSampleRate, - MaxHistoryItems: defaultMaxHistoryItems(cfg), + MaxHistoryItems: resolveMaxHistoryItems(cfg), } // Create a default conversation diff --git a/core/http/endpoints/openai/realtime_gate_test.go b/core/http/endpoints/openai/realtime_gate_test.go index e49eb71eb..0b86e7f1f 100644 --- a/core/http/endpoints/openai/realtime_gate_test.go +++ b/core/http/endpoints/openai/realtime_gate_test.go @@ -107,6 +107,29 @@ var _ = Describe("defaultMaxHistoryItems", func() { }) }) +var _ = Describe("resolveMaxHistoryItems", func() { + ptr := func(i int) *int { return &i } + + It("uses an explicit pipeline.max_history_items", func() { + cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama", MaxHistoryItems: ptr(10)}} + Expect(resolveMaxHistoryItems(cfg)).To(Equal(10)) + }) + It("honors an explicit 0 (unlimited) over the type default", func() { + cfg := &config.ModelConfig{ + KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO), + Pipeline: config.Pipeline{MaxHistoryItems: ptr(0)}, + } + Expect(resolveMaxHistoryItems(cfg)).To(Equal(0)) + }) + It("falls back to the type default when unset", func() { + cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)} + Expect(resolveMaxHistoryItems(cfg)).To(Equal(6)) + }) + It("tolerates nil", func() { + Expect(resolveMaxHistoryItems(nil)).To(Equal(0)) + }) +}) + var _ = Describe("trimRealtimeItems", func() { user := func(id string) *types.MessageItemUnion { return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}