diff --git a/bench/eval_config.toml b/bench/eval_config.toml index 0df90355..71691526 100644 --- a/bench/eval_config.toml +++ b/bench/eval_config.toml @@ -75,8 +75,8 @@ fast = true evaluate = true # Number of parallel API requests multiprocess = 32 -# Cache generated outputs for resumption -use_cache = true +# Cache generated outputs for resumption (disabled by default to avoid stale results) +use_cache = false # Timeout per problem in seconds (default: 1800 = 30 min) timeout = 1800 # OpenAI API request timeout in seconds (default: 3600 = 1 hour) diff --git a/bench/exo_eval.py b/bench/exo_eval.py index 881b0f2e..38264efa 100644 --- a/bench/exo_eval.py +++ b/bench/exo_eval.py @@ -490,7 +490,7 @@ def run_livecodebench( temperature = lcb_config.get("temperature", 0) # 0 for non-reasoning models n_samples = lcb_config.get("n_samples", 1) # pass@1 max_tokens = lcb_config.get("max_tokens", 16384) - use_cache = lcb_config.get("use_cache", True) + use_cache = lcb_config.get("use_cache", False) fast = lcb_config.get("fast", True) # Use code_generation_lite by default evaluate = lcb_config.get("evaluate", True) multiprocess = lcb_config.get("multiprocess", 4)