diff --git a/backend/go/cloud-proxy/provider_anthropic.go b/backend/go/cloud-proxy/provider_anthropic.go index d8382d454..d86f2ab8e 100644 --- a/backend/go/cloud-proxy/provider_anthropic.go +++ b/backend/go/cloud-proxy/provider_anthropic.go @@ -142,19 +142,12 @@ func buildAnthropicRequest(opts *pb.PredictOptions, cfg *proxyConfig, stream boo if req.MaxTokens <= 0 { req.MaxTokens = anthropicDefaultMaxTokens } - // Newer Anthropic models 400 when both temperature and top_p are - // set ("`temperature` and `top_p` cannot both be specified for - // this model. Please use only one.") even though their docs only - // "recommend" picking one. The OpenAI-compatible chat UI almost - // always sends both with default values, so prefer temperature - // and drop top_p when both are present. - if t := opts.GetTemperature(); t != 0 { - v := float64(t) - req.Temperature = &v - } else if t := opts.GetTopP(); t != 0 { - v := float64(t) - req.TopP = &v - } + // Do not forward temperature/top_p. Newer Anthropic reasoning models reject + // requests that carry temperature ("`temperature` is deprecated for this + // model"), and the OpenAI-compatible clients typically send only the + // server-side DEFAULT sampling values rather than user intent — dropping + // them loses nothing and lets the upstream apply its own defaults. + _ = opts req.Tools = convertOpenAITools(opts.GetTools()) req.ToolChoice = convertOpenAIToolChoice(opts.GetToolChoice()) diff --git a/backend/go/cloud-proxy/provider_anthropic_test.go b/backend/go/cloud-proxy/provider_anthropic_test.go index d46db7b12..6119c97cf 100644 --- a/backend/go/cloud-proxy/provider_anthropic_test.go +++ b/backend/go/cloud-proxy/provider_anthropic_test.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "io" - "math" "net/http" "net/http/httptest" "strings" @@ -75,15 +74,16 @@ func TestPredict_Anthropic_BasicMessages(t *testing.T) { g.Expect(captured.Messages).To(HaveLen(1)) g.Expect(captured.Messages[0].Role).To(Equal("user")) g.Expect(captured.MaxTokens).To(Equal(int32(32))) - g.Expect(captured.Temperature).NotTo(BeNil()) - g.Expect(*captured.Temperature).To(Equal(0.5)) - // Anthropic 400s when both temperature and top_p are set; the - // translator must prefer temperature and drop top_p. + // Newer Anthropic reasoning models reject requests carrying temperature + // ("`temperature` is deprecated for this model"); clients typically send + // only default sampling values, so the translator forwards neither. + g.Expect(captured.Temperature).To(BeNil()) g.Expect(captured.TopP).To(BeNil()) g.Expect(captured.Stream).To(BeFalse()) } -// When only top_p is set, it should be forwarded. +// Sampling parameters are not forwarded at all — the upstream applies its +// own defaults (newest models reject explicit temperature/top_p). func TestPredict_Anthropic_TopPOnly(t *testing.T) { g := NewWithT(t) srv, captured := fakeAnthropicUpstream(t, func(_ anthropicRequest) (int, string, string) { @@ -99,11 +99,7 @@ func TestPredict_Anthropic_TopPOnly(t *testing.T) { }) g.Expect(err).NotTo(HaveOccurred()) g.Expect(captured.Temperature).To(BeNil()) - // PredictOptions.TopP is float32 on the wire; the translator widens - // to float64 so 0.9 round-trips as 0.8999999761581421… — compare - // with a small tolerance rather than exact equality. - g.Expect(captured.TopP).NotTo(BeNil()) - g.Expect(math.Abs(*captured.TopP - 0.9)).To(BeNumerically("<=", 1e-6)) + g.Expect(captured.TopP).To(BeNil()) } func TestPredict_Anthropic_DefaultsMaxTokens(t *testing.T) { diff --git a/backend/go/cloud-proxy/provider_openai.go b/backend/go/cloud-proxy/provider_openai.go index d4911b6b7..2fab6c06f 100644 --- a/backend/go/cloud-proxy/provider_openai.go +++ b/backend/go/cloud-proxy/provider_openai.go @@ -30,7 +30,7 @@ type openAIRequest struct { Stream bool `json:"stream,omitempty"` Temperature *float64 `json:"temperature,omitempty"` TopP *float64 `json:"top_p,omitempty"` - MaxTokens *int32 `json:"max_tokens,omitempty"` + MaxTokens *int32 `json:"max_completion_tokens,omitempty"` // newer OpenAI models reject max_tokens ("use max_completion_tokens instead") Stop []string `json:"stop,omitempty"` FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` PresencePenalty *float64 `json:"presence_penalty,omitempty"` @@ -107,14 +107,10 @@ func buildOpenAIRequest(opts *pb.PredictOptions, cfg *proxyConfig, stream bool) Tools: parseRawJSON(opts.GetTools()), ToolChoice: parseRawJSON(opts.GetToolChoice()), } - if t := opts.GetTemperature(); t != 0 { - v := float64(t) - req.Temperature = &v - } - if t := opts.GetTopP(); t != 0 { - v := float64(t) - req.TopP = &v - } + // Do not forward temperature/top_p. Newer OpenAI reasoning models reject + // temperature as deprecated, and clients typically send only default + // sampling values rather than user intent — let the upstream apply its + // own defaults. if n := opts.GetTokens(); n > 0 { req.MaxTokens = &n } diff --git a/backend/go/cloud-proxy/provider_openai_test.go b/backend/go/cloud-proxy/provider_openai_test.go index 9ce4334db..6db03a664 100644 --- a/backend/go/cloud-proxy/provider_openai_test.go +++ b/backend/go/cloud-proxy/provider_openai_test.go @@ -74,8 +74,9 @@ func TestPredict_OpenAI_BasicChat(t *testing.T) { g.Expect(captured.Messages).To(HaveLen(2)) g.Expect(captured.Messages[0].Role).To(Equal("system")) g.Expect(captured.Messages[1].Role).To(Equal("user")) - g.Expect(captured.Temperature).NotTo(BeNil()) - g.Expect(*captured.Temperature).To(Equal(0.5)) + // Sampling parameters are not forwarded (newest models reject explicit + // temperature); token limit is serialized as max_completion_tokens. + g.Expect(captured.Temperature).To(BeNil()) g.Expect(captured.MaxTokens).NotTo(BeNil()) g.Expect(*captured.MaxTokens).To(Equal(int32(32))) g.Expect(captured.Stream).To(BeFalse())