feat(realtime): configurable pipeline.max_history_items (#10331)

Composed realtime pipelines (VAD+STT+LLM+TTS) defaulted to unlimited history, so a long-running session grew every turn and fed the whole conversation to the LLM until its context window filled. Add an optional pipeline.max_history_items to cap the trailing items per turn; explicit value (including 0=unlimited) wins over the per-model-type default. Self-contained any-to-any models keep their 6-item default. Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-08-01 11:00:24 -04:00 · 2026-06-14 18:13:09 +02:00
parent 61cde6fd77
commit 7d2a762b53
3 changed files with 42 additions and 1 deletions
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -510,6 +510,13 @@ type Pipeline struct {
 	// LLM model config. Unset leaves the LLM model config in charge.
 	DisableThinking *bool `yaml:"disable_thinking,omitempty" json:"disable_thinking,omitempty"`

+	// MaxHistoryItems caps how many trailing conversation items are fed to the
+	// LLM each realtime turn (0 = unlimited, rely on the LLM's context window).
+	// Unset (nil) uses the per-model-type default. Set it on a composed pipeline
+	// (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the LLM's
+	// context fills.
+	MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
+
 	// VoiceRecognition gates the pipeline behind speaker verification. Nil
 	// (block absent) means no gate, preserving existing behavior.
 	VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -340,6 +340,17 @@ func defaultMaxHistoryItems(cfg *config.ModelConfig) int {
 	return 0
 }

+// resolveMaxHistoryItems honors an explicit pipeline.max_history_items when set,
+// otherwise falls back to the per-model-type default. This lets a composed
+// pipeline (VAD+STT+LLM+TTS) cap its history so a long-running session doesn't
+// grow until the LLM's context window fills.
+func resolveMaxHistoryItems(cfg *config.ModelConfig) int {
+	if cfg != nil && cfg.Pipeline.MaxHistoryItems != nil {
+		return *cfg.Pipeline.MaxHistoryItems
+	}
+	return defaultMaxHistoryItems(cfg)
+}
+
 // trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap).
 // Walks backwards keeping function_call + function_call_output pairs together
 // so we never feed the LLM an orphaned tool result that references a call it
@@ -492,7 +503,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
 		Conversations:    make(map[string]*Conversation),
 		InputSampleRate:  defaultRemoteSampleRate,
 		OutputSampleRate: defaultRemoteSampleRate,
-		MaxHistoryItems:  defaultMaxHistoryItems(cfg),
+		MaxHistoryItems:  resolveMaxHistoryItems(cfg),
 	}

 	// Create a default conversation
--- a/core/http/endpoints/openai/realtime_gate_test.go
+++ b/core/http/endpoints/openai/realtime_gate_test.go
@@ -107,6 +107,29 @@ var _ = Describe("defaultMaxHistoryItems", func() {
 	})
 })

+var _ = Describe("resolveMaxHistoryItems", func() {
+	ptr := func(i int) *int { return &i }
+
+	It("uses an explicit pipeline.max_history_items", func() {
+		cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama", MaxHistoryItems: ptr(10)}}
+		Expect(resolveMaxHistoryItems(cfg)).To(Equal(10))
+	})
+	It("honors an explicit 0 (unlimited) over the type default", func() {
+		cfg := &config.ModelConfig{
+			KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
+			Pipeline:      config.Pipeline{MaxHistoryItems: ptr(0)},
+		}
+		Expect(resolveMaxHistoryItems(cfg)).To(Equal(0))
+	})
+	It("falls back to the type default when unset", func() {
+		cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)}
+		Expect(resolveMaxHistoryItems(cfg)).To(Equal(6))
+	})
+	It("tolerates nil", func() {
+		Expect(resolveMaxHistoryItems(nil)).To(Equal(0))
+	})
+})
+
 var _ = Describe("trimRealtimeItems", func() {
 	user := func(id string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}