mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-29 03:24:49 -04:00
Respect explicit reasoning config during GGUF thinking probe (#9463)
Signed-off-by: leinasi2014 <leinasi2014@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
@@ -40,6 +40,12 @@ type TokenUsage struct {
|
||||
ChatDeltas []*proto.ChatDelta // per-chunk deltas from C++ autoparser (only set during streaming)
|
||||
}
|
||||
|
||||
func needsThinkingProbe(c *config.ModelConfig) bool {
|
||||
return c.TemplateConfig.UseTokenizerTemplate &&
|
||||
(c.ReasoningConfig.DisableReasoning == nil ||
|
||||
c.ReasoningConfig.DisableReasoningTagPrefill == nil)
|
||||
}
|
||||
|
||||
// HasChatDeltaContent returns true if any chat delta carries content or reasoning text.
|
||||
// Used to decide whether to prefer C++ autoparser deltas over Go-side tag extraction.
|
||||
func (t TokenUsage) HasChatDeltaContent() bool {
|
||||
@@ -100,11 +106,9 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
// tokenizer template path is active) and the multimodal media marker (needed
|
||||
// by custom chat templates so markers line up with what mtmd expects).
|
||||
// We probe whenever any of those slots is still empty.
|
||||
needsThinkingProbe := c.TemplateConfig.UseTokenizerTemplate &&
|
||||
c.ReasoningConfig.DisableReasoning == nil &&
|
||||
c.ReasoningConfig.DisableReasoningTagPrefill == nil
|
||||
shouldProbeThinking := needsThinkingProbe(c)
|
||||
needsMarkerProbe := c.MediaMarker == ""
|
||||
if needsThinkingProbe || needsMarkerProbe {
|
||||
if shouldProbeThinking || needsMarkerProbe {
|
||||
modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath)
|
||||
config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts)
|
||||
// Update the config in the loader so it persists for future requests
|
||||
|
||||
29
core/backend/llm_probe_test.go
Normal file
29
core/backend/llm_probe_test.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
|
||||
"github.com/gpustack/gguf-parser-go/util/ptr"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("thinking probe gating", func() {
|
||||
It("probes tokenizer-template models when any reasoning default is still unset", func() {
|
||||
cfg := &config.ModelConfig{
|
||||
TemplateConfig: config.TemplateConfig{UseTokenizerTemplate: true},
|
||||
}
|
||||
Expect(needsThinkingProbe(cfg)).To(BeTrue())
|
||||
|
||||
cfg.ReasoningConfig.DisableReasoning = ptr.To(true)
|
||||
Expect(needsThinkingProbe(cfg)).To(BeTrue())
|
||||
|
||||
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
|
||||
Expect(needsThinkingProbe(cfg)).To(BeFalse())
|
||||
})
|
||||
|
||||
It("does not probe when tokenizer templates are disabled", func() {
|
||||
cfg := &config.ModelConfig{}
|
||||
Expect(needsThinkingProbe(cfg)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
@@ -125,19 +125,7 @@ func DetectThinkingSupportFromBackend(ctx context.Context, cfg *ModelConfig, bac
|
||||
return
|
||||
}
|
||||
|
||||
cfg.ReasoningConfig.DisableReasoning = ptr.To(!metadata.SupportsThinking)
|
||||
|
||||
// Use the rendered template to detect if thinking token is at the end
|
||||
// This reuses the existing DetectThinkingStartToken function
|
||||
if metadata.RenderedTemplate != "" {
|
||||
thinkingStartToken := reasoning.DetectThinkingStartToken(metadata.RenderedTemplate, &cfg.ReasoningConfig)
|
||||
thinkingForcedOpen := thinkingStartToken != ""
|
||||
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(!thinkingForcedOpen)
|
||||
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", thinkingForcedOpen, "thinking_start_token", thinkingStartToken)
|
||||
} else {
|
||||
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
|
||||
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", false)
|
||||
}
|
||||
applyDetectedThinkingConfig(cfg, metadata)
|
||||
|
||||
// Extract tool format markers from autoparser analysis
|
||||
if tf := metadata.GetToolFormat(); tf != nil && tf.FormatType != "" {
|
||||
@@ -180,3 +168,34 @@ func DetectThinkingSupportFromBackend(ctx context.Context, cfg *ModelConfig, bac
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func applyDetectedThinkingConfig(cfg *ModelConfig, metadata *pb.ModelMetadataResponse) {
|
||||
if cfg == nil || metadata == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Respect explicit YAML/user config. Backend probing should only fill defaults
|
||||
// when the reasoning mode has not already been set.
|
||||
if cfg.ReasoningConfig.DisableReasoning == nil {
|
||||
cfg.ReasoningConfig.DisableReasoning = ptr.To(!metadata.SupportsThinking)
|
||||
}
|
||||
|
||||
// Respect explicit prefill config for the same reason. Only infer the
|
||||
// default prefill behavior when the user did not set it.
|
||||
if cfg.ReasoningConfig.DisableReasoningTagPrefill == nil {
|
||||
// Use the rendered template to detect if thinking token is at the end.
|
||||
// This reuses the existing DetectThinkingStartToken function.
|
||||
if metadata.RenderedTemplate != "" {
|
||||
thinkingStartToken := reasoning.DetectThinkingStartToken(metadata.RenderedTemplate, &cfg.ReasoningConfig)
|
||||
thinkingForcedOpen := thinkingStartToken != ""
|
||||
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(!thinkingForcedOpen)
|
||||
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", thinkingForcedOpen, "thinking_start_token", thinkingStartToken)
|
||||
} else {
|
||||
cfg.ReasoningConfig.DisableReasoningTagPrefill = ptr.To(true)
|
||||
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: thinking support detected", "supports_thinking", metadata.SupportsThinking, "thinking_forced_open", false)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
xlog.Debug("[gguf] DetectThinkingSupportFromBackend: preserving explicit reasoning config", "supports_thinking", metadata.SupportsThinking, "disable_reasoning", *cfg.ReasoningConfig.DisableReasoning, "disable_reasoning_tag_prefill", *cfg.ReasoningConfig.DisableReasoningTagPrefill)
|
||||
}
|
||||
|
||||
101
core/config/gguf_reasoning_test.go
Normal file
101
core/config/gguf_reasoning_test.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
|
||||
"github.com/gpustack/gguf-parser-go/util/ptr"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("GGUF backend metadata reasoning defaults", func() {
|
||||
It("fills reasoning defaults when unset", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
}
|
||||
|
||||
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
|
||||
SupportsThinking: true,
|
||||
RenderedTemplate: "{{ bos_token }}<think>",
|
||||
})
|
||||
|
||||
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeFalse())
|
||||
})
|
||||
|
||||
It("preserves fully explicit reasoning settings", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
ReasoningConfig: reasoning.Config{
|
||||
DisableReasoning: ptr.To(true),
|
||||
DisableReasoningTagPrefill: ptr.To(true),
|
||||
},
|
||||
}
|
||||
|
||||
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
|
||||
SupportsThinking: true,
|
||||
RenderedTemplate: "{{ bos_token }}<think>",
|
||||
})
|
||||
|
||||
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
|
||||
})
|
||||
|
||||
It("preserves explicit disable while still inferring missing prefill", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
ReasoningConfig: reasoning.Config{
|
||||
DisableReasoning: ptr.To(true),
|
||||
},
|
||||
}
|
||||
|
||||
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
|
||||
SupportsThinking: true,
|
||||
RenderedTemplate: "{{ bos_token }}<think>",
|
||||
})
|
||||
|
||||
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeFalse())
|
||||
})
|
||||
|
||||
It("preserves explicit prefill while still inferring missing disable flag", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
ReasoningConfig: reasoning.Config{
|
||||
DisableReasoningTagPrefill: ptr.To(true),
|
||||
},
|
||||
}
|
||||
|
||||
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
|
||||
SupportsThinking: true,
|
||||
RenderedTemplate: "{{ bos_token }}<think>",
|
||||
})
|
||||
|
||||
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
|
||||
})
|
||||
|
||||
It("defaults to disabling reasoning when backend does not support thinking", func() {
|
||||
cfg := &ModelConfig{
|
||||
TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
|
||||
}
|
||||
|
||||
applyDetectedThinkingConfig(cfg, &pb.ModelMetadataResponse{
|
||||
SupportsThinking: false,
|
||||
})
|
||||
|
||||
Expect(cfg.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
Expect(cfg.ReasoningConfig.DisableReasoningTagPrefill).ToNot(BeNil())
|
||||
Expect(*cfg.ReasoningConfig.DisableReasoningTagPrefill).To(BeTrue())
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user