Respect explicit reasoning config during GGUF thinking probe (#9463)

Signed-off-by: leinasi2014 <leinasi2014@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
leinasi2014
2026-04-22 03:53:10 +08:00
committed by GitHub
parent 39573ecd2a
commit d18d434bb2
4 changed files with 170 additions and 17 deletions

View File

@@ -40,6 +40,12 @@ type TokenUsage struct {
ChatDeltas []*proto.ChatDelta // per-chunk deltas from C++ autoparser (only set during streaming)
}
func needsThinkingProbe(c *config.ModelConfig) bool {
return c.TemplateConfig.UseTokenizerTemplate &&
(c.ReasoningConfig.DisableReasoning == nil ||
c.ReasoningConfig.DisableReasoningTagPrefill == nil)
}
// HasChatDeltaContent returns true if any chat delta carries content or reasoning text.
// Used to decide whether to prefer C++ autoparser deltas over Go-side tag extraction.
func (t TokenUsage) HasChatDeltaContent() bool {
@@ -100,11 +106,9 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
// tokenizer template path is active) and the multimodal media marker (needed
// by custom chat templates so markers line up with what mtmd expects).
// We probe whenever any of those slots is still empty.
needsThinkingProbe := c.TemplateConfig.UseTokenizerTemplate &&
c.ReasoningConfig.DisableReasoning == nil &&
c.ReasoningConfig.DisableReasoningTagPrefill == nil
shouldProbeThinking := needsThinkingProbe(c)
needsMarkerProbe := c.MediaMarker == ""
if needsThinkingProbe || needsMarkerProbe {
if shouldProbeThinking || needsMarkerProbe {
modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath)
config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts)
// Update the config in the loader so it persists for future requests

View File

@@ -0,0 +1,29 @@
package backend
import (
"github.com/mudler/LocalAI/core/config"
"github.com/gpustack/gguf-parser-go/util/ptr"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("thinking probe gating", func() {
It("probes tokenizer-template models when any reasoning default is still unset", func() {
cfg := &config.ModelConfig{
TemplateConfig: config.TemplateConfig{UseTokenizerTemplate: true},
}
Expect(needsThinkingProbe(cfg)).To(BeTrue())
cfg.ReasoningConfig.DisableReasoning = ptr.To(true)
Expect(needsThinkingProbe(cfg)).To(BeTrue())
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
Expect(needsThinkingProbe(cfg)).To(BeFalse())
})
It("does not probe when tokenizer templates are disabled", func() {
cfg := &config.ModelConfig{}
Expect(needsThinkingProbe(cfg)).To(BeFalse())
})
})

View File

@@ -125,19 +125,7 @@ func DetectThinkingSupportFromBackend(ctx context.Context, cfg *ModelConfig, bac
return
}
cfg.ReasoningConfig.DisableReasoning = ptr.To(!metadata.SupportsThinking)
// Use the rendered template to detect if thinking token is at the end
// This reuses the existing DetectThinkingStartToken function
if metadata.RenderedTemplate != "" {
thinkingStartToken := reasoning.DetectThinkingStartToken(metadata.RenderedTemplate, &cfg.ReasoningConfig)
thinkingForcedOpen := thinkingStartToken != ""
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(!thinkingForcedOpen)
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", thinkingForcedOpen, "thinking_start_token", thinkingStartToken)
} else {
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", false)
}
applyDetectedThinkingConfig(cfg, metadata)
// Extract tool format markers from autoparser analysis
if tf := metadata.GetToolFormat(); tf != nil && tf.FormatType != "" {
@@ -180,3 +168,34 @@ func DetectThinkingSupportFromBackend(ctx context.Context, cfg *ModelConfig, bac
}
}
}
func applyDetectedThinkingConfig(cfg *ModelConfig, metadata *pb.ModelMetadataResponse) {
if cfg == nil || metadata == nil {
return
}
// Respect explicit YAML/user config. Backend probing should only fill defaults
// when the reasoning mode has not already been set.
if cfg.ReasoningConfig.DisableReasoning == nil {
cfg.ReasoningConfig.DisableReasoning = ptr.To(!metadata.SupportsThinking)
}
// Respect explicit prefill config for the same reason. Only infer the
// default prefill behavior when the user did not set it.
if cfg.ReasoningConfig.DisableReasoningTagPrefill == nil {
// Use the rendered template to detect if thinking token is at the end.
// This reuses the existing DetectThinkingStartToken function.
if metadata.RenderedTemplate != "" {
thinkingStartToken := reasoning.DetectThinkingStartToken(metadata.RenderedTemplate, &cfg.ReasoningConfig)
thinkingForcedOpen := thinkingStartToken != ""
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(!thinkingForcedOpen)
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", thinkingForcedOpen, "thinking_start_token", thinkingStartToken)
} else {
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", false)
}
return
}
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: preserving explicit reasoning config", "supports_thinking", metadata.SupportsThinking, "disable_reasoning", *cfg.ReasoningConfig.DisableReasoning, "disable_reasoning_tag_prefill", *cfg.ReasoningConfig.DisableReasoningTagPrefill)
}

View File

@@ -0,0 +1,101 @@
package config
import (
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/gpustack/gguf-parser-go/util/ptr"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("GGUF backend metadata reasoning defaults", func() {
It("fills reasoning defaults when unset", func() {
cfg := &ModelConfig{
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
}
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
SupportsThinking: true,
RenderedTemplate: "{{ bos_token }}<think>",
})
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeFalse())
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeFalse())
})
It("preserves fully explicit reasoning settings", func() {
cfg := &ModelConfig{
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
ReasoningConfig: reasoning.Config{
DisableReasoning: ptr.To(true),
DisableReasoningTagPrefill: ptr.To(true),
},
}
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
SupportsThinking: true,
RenderedTemplate: "{{ bos_token }}<think>",
})
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
})
It("preserves explicit disable while still inferring missing prefill", func() {
cfg := &ModelConfig{
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
ReasoningConfig: reasoning.Config{
DisableReasoning: ptr.To(true),
},
}
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
SupportsThinking: true,
RenderedTemplate: "{{ bos_token }}<think>",
})
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeFalse())
})
It("preserves explicit prefill while still inferring missing disable flag", func() {
cfg := &ModelConfig{
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
ReasoningConfig: reasoning.Config{
DisableReasoningTagPrefill: ptr.To(true),
},
}
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
SupportsThinking: true,
RenderedTemplate: "{{ bos_token }}<think>",
})
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeFalse())
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
})
It("defaults to disabling reasoning when backend does not support thinking", func() {
cfg := &ModelConfig{
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
}
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
SupportsThinking: false,
})
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
})
})