diff --git a/core/config/defaults.go b/core/config/defaults.go index 18625fab3..bb993d075 100644 --- a/core/config/defaults.go +++ b/core/config/defaults.go @@ -12,14 +12,12 @@ package config // these; config never imports backend. const ( // DefaultContextSize is the fallback context window when none is configured - // or estimable from the model. + // or estimable from the model. It is also the fallback for a GGUF whose + // metadata yields no usable estimate or that the parser cannot read at all + // (e.g. a quant type it does not know, such as NVFP4): a model-agnostic + // safe default beats a tiny, surprising window that truncates real prompts. DefaultContextSize = 4096 - // GGUFFallbackContextSize is the context window for a GGUF model whose - // metadata yields no usable estimate (see guessGGUFFromFile). Deliberately - // smaller than DefaultContextSize to stay conservative on memory there. - GGUFFallbackContextSize = 1024 - // DefaultNGPULayers means "offload all layers"; the backend (fit_params) // clamps to what actually fits in device memory. DefaultNGPULayers = 99999999 diff --git a/core/config/gguf.go b/core/config/gguf.go index 16e43c914..177e68749 100644 --- a/core/config/gguf.go +++ b/core/config/gguf.go @@ -33,7 +33,7 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) { cSize := int(ctxSize) cfg.ContextSize = &cSize } else { - defaultCtx = GGUFFallbackContextSize + defaultCtx = DefaultContextSize cfg.ContextSize = &defaultCtx } } diff --git a/core/config/hooks_llamacpp.go b/core/config/hooks_llamacpp.go index 09bdbe868..07ccdda7b 100644 --- a/core/config/hooks_llamacpp.go +++ b/core/config/hooks_llamacpp.go @@ -34,7 +34,7 @@ func llamaCppDefaults(cfg *ModelConfig, modelPath string) { // Default context size if not set, regardless of whether GGUF parsing succeeds defer func() { if cfg.ContextSize == nil { - ctx := GGUFFallbackContextSize + ctx := DefaultContextSize cfg.ContextSize = &ctx } }() diff --git a/core/config/hooks_test.go b/core/config/hooks_test.go index 6e18ad7cc..a1b30b8d9 100644 --- a/core/config/hooks_test.go +++ b/core/config/hooks_test.go @@ -248,7 +248,11 @@ var _ = Describe("Backend hooks and parser defaults", func() { } cfg.SetDefaults(ModelPath(dir)) + // An unreadable/unparseable GGUF (e.g. a quant type the parser does + // not know, such as NVFP4) yields no estimate, so the hook must fall + // back to DefaultContextSize rather than a tiny, surprising value. Expect(cfg.ContextSize).NotTo(BeNil()) + Expect(*cfg.ContextSize).To(Equal(DefaultContextSize)) }) }) diff --git a/gallery/index.yaml b/gallery/index.yaml index cc975a83a..f39993333 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -579,6 +579,10 @@ icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3.6/Figures/qwen3.6_35b_a3b_score.png overrides: backend: llama-cpp + # NVFP4 GGUFs use a quant type the GGUF metadata parser cannot read, so + # context size cannot be auto-derived; set it explicitly (the model trains + # to 262144, 32768 is a safe default operators can raise). + context_size: 32768 function: automatic_tool_parsing_fallback: true grammar: @@ -611,6 +615,9 @@ - gguf overrides: backend: llama-cpp + # NVFP4 GGUFs use a quant type the GGUF metadata parser cannot read, so + # context size cannot be auto-derived; set it explicitly. + context_size: 32768 function: automatic_tool_parsing_fallback: true grammar: @@ -638,6 +645,9 @@ icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png overrides: backend: llama-cpp + # NVFP4 GGUFs use a quant type the GGUF metadata parser cannot read, so + # context size cannot be auto-derived; set it explicitly. + context_size: 32768 function: automatic_tool_parsing_fallback: true grammar: @@ -688,6 +698,10 @@ icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3.6/Figures/qwen3.6_27b_score.png overrides: backend: llama-cpp + # NVFP4 GGUFs use a quant type the GGUF metadata parser cannot read, so + # context size cannot be auto-derived; set it explicitly (the model trains + # to 262144, 32768 is a safe default operators can raise). + context_size: 32768 function: automatic_tool_parsing_fallback: true grammar: