diff --git a/bench/eval_config.toml b/bench/eval_config.toml index c48480e84..0df903557 100644 --- a/bench/eval_config.toml +++ b/bench/eval_config.toml @@ -77,6 +77,10 @@ evaluate = true multiprocess = 32 # Cache generated outputs for resumption use_cache = true +# Timeout per problem in seconds (default: 1800 = 30 min) +timeout = 1800 +# OpenAI API request timeout in seconds (default: 3600 = 1 hour) +openai_timeout = 3600 # Output path for results output_path = "bench/lcb_results" diff --git a/bench/exo_eval.py b/bench/exo_eval.py index f138b2547..ae70ecb96 100644 --- a/bench/exo_eval.py +++ b/bench/exo_eval.py @@ -494,6 +494,9 @@ def run_livecodebench( fast = lcb_config.get("fast", True) # Use code_generation_lite by default evaluate = lcb_config.get("evaluate", True) multiprocess = lcb_config.get("multiprocess", 4) + # Timeouts (high defaults for slow inference) + timeout = lcb_config.get("timeout", 1800) # 30 min per problem + openai_timeout = lcb_config.get("openai_timeout", 3600) # 1 hour per request exo_base_url = f"http://{host}:{port}/v1" effective_output = output_path or lcb_config.get("output_path", "bench/lcb_results") @@ -545,6 +548,10 @@ def run_livecodebench( if multiprocess > 1: args.extend(["--multiprocess", str(multiprocess)]) + # Add timeouts + args.extend(["--timeout", str(timeout)]) + args.extend(["--openai_timeout", str(openai_timeout)]) + if limit is not None: args.extend(["--limit", str(limit)])