mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-16 04:38:50 -04:00
feat(http): wire request metadata to config.RequestMetadata
The OpenAI request metadata field was parsed but unused; stamp it onto the per-request ModelConfig so gRPCPredictOpts forwards it as chat_template_kwargs overrides. Issue #10329. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -318,6 +318,13 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
|
||||
// (an operator's explicit disable wins over a request asking to think).
|
||||
config.ApplyReasoningEffort(input.ReasoningEffort)
|
||||
|
||||
// Forward the client's request metadata so chat-template kwargs set per-request
|
||||
// (enable_thinking, reasoning_effort, preserve_thinking, ...) reach the backend
|
||||
// and override the model's reasoning-config defaults. See gRPCPredictOpts.
|
||||
if len(input.Metadata) > 0 {
|
||||
config.RequestMetadata = input.Metadata
|
||||
}
|
||||
|
||||
// Collapse the modern max_completion_tokens alias into the
|
||||
// legacy Maxtokens field so downstream code reads exactly one.
|
||||
// MaxCompletionTokens wins on conflict — it's the canonical
|
||||
|
||||
@@ -731,3 +731,60 @@ var _ = Describe("SetModelAndConfig reasoning_effort parsing (chat completions)"
|
||||
Expect(*(*captured2).ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("SetModelAndConfig metadata passthrough (chat completions)", func() {
|
||||
var modelDir string
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
modelDir, err = os.MkdirTemp("", "localai-test-models-*")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() { _ = os.RemoveAll(modelDir) })
|
||||
|
||||
buildApp := func() (*echo.Echo, **config.ModelConfig) {
|
||||
Expect(os.WriteFile(filepath.Join(modelDir, "test-model.yaml"),
|
||||
[]byte("name: test-model\nbackend: llama\n"), 0644)).To(Succeed())
|
||||
ss := &system.SystemState{Model: system.Model{ModelsPath: modelDir}}
|
||||
appConfig := config.NewApplicationConfig()
|
||||
appConfig.SystemState = ss
|
||||
mcl := config.NewModelConfigLoader(modelDir)
|
||||
ml := model.NewModelLoader(ss)
|
||||
re := NewRequestExtractor(mcl, ml, appConfig)
|
||||
|
||||
captured := new(*config.ModelConfig)
|
||||
app := echo.New()
|
||||
app.POST("/v1/chat/completions",
|
||||
func(c echo.Context) error {
|
||||
if cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig); ok {
|
||||
*captured = cfg
|
||||
}
|
||||
return c.String(http.StatusOK, "ok")
|
||||
},
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
if err := re.SetOpenAIRequest(c); err != nil {
|
||||
return err
|
||||
}
|
||||
return next(c)
|
||||
}
|
||||
},
|
||||
)
|
||||
return app, captured
|
||||
}
|
||||
|
||||
It("stamps request metadata onto the config", func() {
|
||||
app, captured := buildApp()
|
||||
body := `{"model":"test-model","messages":[{"role":"user","content":"hi"}],` +
|
||||
`"metadata":{"preserve_thinking":"true"}}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
app.ServeHTTP(rec, req)
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(*captured).ToNot(BeNil())
|
||||
Expect((*captured).RequestMetadata).To(HaveKeyWithValue("preserve_thinking", "true"))
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user