mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-16 04:38:50 -04:00
feat(realtime): configurable pipeline.max_history_items (#10331)
Composed realtime pipelines (VAD+STT+LLM+TTS) defaulted to unlimited history, so a long-running session grew every turn and fed the whole conversation to the LLM until its context window filled. Add an optional pipeline.max_history_items to cap the trailing items per turn; explicit value (including 0=unlimited) wins over the per-model-type default. Self-contained any-to-any models keep their 6-item default. Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -510,6 +510,13 @@ type Pipeline struct {
|
||||
// LLM model config. Unset leaves the LLM model config in charge.
|
||||
DisableThinking *bool `yaml:"disable_thinking,omitempty" json:"disable_thinking,omitempty"`
|
||||
|
||||
// MaxHistoryItems caps how many trailing conversation items are fed to the
|
||||
// LLM each realtime turn (0 = unlimited, rely on the LLM's context window).
|
||||
// Unset (nil) uses the per-model-type default. Set it on a composed pipeline
|
||||
// (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the LLM's
|
||||
// context fills.
|
||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||
|
||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||
// (block absent) means no gate, preserving existing behavior.
|
||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||
|
||||
@@ -340,6 +340,17 @@ func defaultMaxHistoryItems(cfg *config.ModelConfig) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// resolveMaxHistoryItems honors an explicit pipeline.max_history_items when set,
|
||||
// otherwise falls back to the per-model-type default. This lets a composed
|
||||
// pipeline (VAD+STT+LLM+TTS) cap its history so a long-running session doesn't
|
||||
// grow until the LLM's context window fills.
|
||||
func resolveMaxHistoryItems(cfg *config.ModelConfig) int {
|
||||
if cfg != nil && cfg.Pipeline.MaxHistoryItems != nil {
|
||||
return *cfg.Pipeline.MaxHistoryItems
|
||||
}
|
||||
return defaultMaxHistoryItems(cfg)
|
||||
}
|
||||
|
||||
// trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap).
|
||||
// Walks backwards keeping function_call + function_call_output pairs together
|
||||
// so we never feed the LLM an orphaned tool result that references a call it
|
||||
@@ -492,7 +503,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
Conversations: make(map[string]*Conversation),
|
||||
InputSampleRate: defaultRemoteSampleRate,
|
||||
OutputSampleRate: defaultRemoteSampleRate,
|
||||
MaxHistoryItems: defaultMaxHistoryItems(cfg),
|
||||
MaxHistoryItems: resolveMaxHistoryItems(cfg),
|
||||
}
|
||||
|
||||
// Create a default conversation
|
||||
|
||||
@@ -107,6 +107,29 @@ var _ = Describe("defaultMaxHistoryItems", func() {
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("resolveMaxHistoryItems", func() {
|
||||
ptr := func(i int) *int { return &i }
|
||||
|
||||
It("uses an explicit pipeline.max_history_items", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama", MaxHistoryItems: ptr(10)}}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(10))
|
||||
})
|
||||
It("honors an explicit 0 (unlimited) over the type default", func() {
|
||||
cfg := &config.ModelConfig{
|
||||
KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
|
||||
Pipeline: config.Pipeline{MaxHistoryItems: ptr(0)},
|
||||
}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(0))
|
||||
})
|
||||
It("falls back to the type default when unset", func() {
|
||||
cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(6))
|
||||
})
|
||||
It("tolerates nil", func() {
|
||||
Expect(resolveMaxHistoryItems(nil)).To(Equal(0))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("trimRealtimeItems", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
|
||||
Reference in New Issue
Block a user