mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-05 03:33:30 -05:00
284 lines
10 KiB
Python
284 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
|
|
"""
|
|
LiveCodeBench runner wrapper for exo.
|
|
|
|
This wrapper allows running LiveCodeBench with custom OpenAI-compatible endpoints
|
|
by dynamically registering models and configuring the OpenAI client.
|
|
|
|
Usage:
|
|
python -m bench.livecodebench_runner --model my-model --base-url http://localhost:52415/v1 [lcb args...]
|
|
|
|
The wrapper:
|
|
1. Registers the custom model in LiveCodeBench's model registry
|
|
2. Sets up environment variables for the OpenAI client
|
|
3. Runs the standard LiveCodeBench runner
|
|
|
|
Requires LiveCodeBench to be installed:
|
|
git clone https://github.com/LiveCodeBench/LiveCodeBench
|
|
cd LiveCodeBench && uv pip install -e .
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import multiprocessing
|
|
import os
|
|
import signal
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, NoReturn
|
|
|
|
if TYPE_CHECKING:
|
|
from typing import Any
|
|
|
|
|
|
def _cleanup_and_exit(exit_code: int = 130) -> NoReturn:
|
|
"""Terminate all child processes and exit."""
|
|
# Terminate any active multiprocessing pools
|
|
for child in multiprocessing.active_children():
|
|
child.terminate()
|
|
child.join(timeout=1)
|
|
if child.is_alive():
|
|
child.kill()
|
|
# Force exit to avoid hanging on cleanup
|
|
os._exit(exit_code)
|
|
|
|
|
|
def _signal_handler(signum: int, frame: object) -> NoReturn:
|
|
"""Handle interrupt signals by terminating all child processes."""
|
|
_cleanup_and_exit(130)
|
|
|
|
|
|
def get_lcb_directory() -> Path | None:
|
|
"""Find the LiveCodeBench installation directory.
|
|
|
|
LiveCodeBench uses relative paths like 'lcb_runner/prompts/few_shot_examples/...'
|
|
which require running from the LiveCodeBench directory.
|
|
"""
|
|
# Check environment variable first
|
|
if env_path := os.environ.get("LIVECODEBENCH_DIR"):
|
|
lcb_path = Path(env_path)
|
|
if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
|
|
return lcb_path
|
|
|
|
# Use importlib to find package location without executing module code
|
|
# This avoids triggering the relative path imports that would fail
|
|
try:
|
|
import importlib.util
|
|
|
|
spec = importlib.util.find_spec("lcb_runner")
|
|
if spec and spec.origin:
|
|
# spec.origin is the __init__.py path, go up two levels
|
|
lcb_path = Path(spec.origin).parent.parent
|
|
if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
|
|
return lcb_path
|
|
except (ImportError, ModuleNotFoundError):
|
|
pass
|
|
|
|
# Check common locations relative to this script
|
|
script_dir = Path(__file__).parent.parent # exo/
|
|
common_locations = [
|
|
script_dir / "LiveCodeBench", # exo/LiveCodeBench
|
|
script_dir.parent / "LiveCodeBench", # sibling to exo
|
|
]
|
|
for loc in common_locations:
|
|
if (loc / "lcb_runner" / "prompts" / "few_shot_examples").exists():
|
|
return loc
|
|
|
|
return None
|
|
|
|
|
|
def setup_custom_model(model_name: str, base_url: str) -> None:
|
|
"""Register a custom model in LiveCodeBench's registry."""
|
|
try:
|
|
from lcb_runner.lm_styles import ( # pyright: ignore[reportMissingImports]
|
|
LanguageModel,
|
|
LanguageModelList,
|
|
LanguageModelStore,
|
|
LMStyle,
|
|
)
|
|
except ImportError as e:
|
|
print(
|
|
"Error: LiveCodeBench not installed. Install with:\n"
|
|
" git clone https://github.com/LiveCodeBench/LiveCodeBench\n"
|
|
" cd LiveCodeBench && uv pip install -e .",
|
|
file=sys.stderr,
|
|
)
|
|
raise SystemExit(1) from e
|
|
|
|
# Check if model already exists
|
|
if model_name in LanguageModelStore:
|
|
return
|
|
|
|
# Create a new model entry using OpenAIChat style
|
|
# This will route through the oai_runner which respects OPENAI_BASE_URL
|
|
custom_model = LanguageModel(
|
|
model_name=model_name,
|
|
model_repr=model_name,
|
|
model_style=LMStyle.OpenAIChat,
|
|
release_date=datetime.now(),
|
|
link=base_url,
|
|
)
|
|
|
|
# Add to the model list and store
|
|
LanguageModelList.append(custom_model)
|
|
LanguageModelStore[model_name] = custom_model
|
|
|
|
|
|
def patch_openai_client(base_url: str) -> None:
|
|
"""Patch the OpenAI client to use a custom base URL.
|
|
|
|
This patches the oai_runner module to use our custom base URL.
|
|
"""
|
|
try:
|
|
from lcb_runner.runner import oai_runner # noqa: I001 # pyright: ignore[reportMissingImports]
|
|
except ImportError as e:
|
|
print(f"Error importing required modules: {e}", file=sys.stderr)
|
|
raise SystemExit(1) from e
|
|
|
|
# Store original client creation
|
|
original_init = oai_runner.OpenAI
|
|
|
|
def patched_openai(*args: Any, **kwargs: Any) -> Any:
|
|
"""Create OpenAI client with custom base_url."""
|
|
# Inject base_url if not already set
|
|
if "base_url" not in kwargs:
|
|
kwargs["base_url"] = base_url
|
|
# Use dummy API key if not set (exo doesn't require auth)
|
|
if "api_key" not in kwargs and not os.getenv("OPENAI_KEY"):
|
|
kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "exo-local")
|
|
return original_init(*args, **kwargs)
|
|
|
|
# Apply the patch
|
|
oai_runner.OpenAI = patched_openai
|
|
|
|
|
|
def main() -> int:
|
|
"""Main entry point."""
|
|
# Set up signal handlers for clean exit
|
|
signal.signal(signal.SIGINT, _signal_handler)
|
|
signal.signal(signal.SIGTERM, _signal_handler)
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="LiveCodeBench runner wrapper for exo",
|
|
epilog="Additional arguments are passed to lcb_runner.runner.main",
|
|
)
|
|
parser.add_argument(
|
|
"--base-url",
|
|
default=os.environ.get("OPENAI_BASE_URL", "http://localhost:52415/v1"),
|
|
help="OpenAI-compatible API base URL (default: OPENAI_BASE_URL or localhost:52415/v1)",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
required=True,
|
|
help="Model name to use",
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
default=None,
|
|
help="Output directory for results (maps to LiveCodeBench's --custom_output_save_name)",
|
|
)
|
|
parser.add_argument(
|
|
"--limit",
|
|
type=int,
|
|
default=None,
|
|
help="Limit number of problems to evaluate (for testing)",
|
|
)
|
|
|
|
# Parse known args, pass rest to LiveCodeBench
|
|
args, remaining = parser.parse_known_args()
|
|
|
|
# Set up environment
|
|
os.environ["OPENAI_BASE_URL"] = args.base_url
|
|
if "OPENAI_API_KEY" not in os.environ and "OPENAI_KEY" not in os.environ:
|
|
os.environ["OPENAI_API_KEY"] = "exo-local"
|
|
os.environ["OPENAI_KEY"] = "exo-local"
|
|
|
|
# Save original directory for output path resolution
|
|
original_cwd = os.getcwd()
|
|
|
|
# Change to LiveCodeBench directory before imports that use relative paths
|
|
# LiveCodeBench uses paths like 'lcb_runner/prompts/few_shot_examples/...'
|
|
lcb_dir = get_lcb_directory()
|
|
if lcb_dir:
|
|
os.chdir(lcb_dir)
|
|
else:
|
|
print(
|
|
"Warning: Could not find LiveCodeBench directory. "
|
|
"Relative path imports may fail.",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
# Setup custom model and patch client
|
|
setup_custom_model(args.model, args.base_url)
|
|
patch_openai_client(args.base_url)
|
|
|
|
# Build arguments for LiveCodeBench runner
|
|
lcb_args = ["--model", args.model]
|
|
|
|
# Resolve output directory to absolute path (relative to original cwd)
|
|
output_base: str | None = None
|
|
if args.output_dir:
|
|
output_base = str(Path(original_cwd) / args.output_dir)
|
|
|
|
lcb_args.extend(remaining)
|
|
|
|
# Run LiveCodeBench
|
|
try:
|
|
from lcb_runner.runner import main as lcb_main_module # noqa: I001 # pyright: ignore[reportMissingImports]
|
|
from lcb_runner.utils import path_utils # noqa: I001 # pyright: ignore[reportMissingImports]
|
|
|
|
# Patch output path to use our output directory
|
|
if output_base:
|
|
original_get_output_path = path_utils.get_output_path
|
|
|
|
def patched_get_output_path(model_repr: str, runner_args: Any) -> str:
|
|
# Get the original path and replace 'output/' with our base
|
|
original_path = original_get_output_path(model_repr, runner_args)
|
|
# Replace 'output/' prefix with our custom base
|
|
if original_path.startswith("output/"):
|
|
new_path = str(Path(output_base) / original_path[7:]) # Skip 'output/'
|
|
else:
|
|
new_path = str(Path(output_base) / original_path)
|
|
path_utils.ensure_dir(new_path)
|
|
print(f"Saving results to: {new_path}")
|
|
return new_path
|
|
|
|
path_utils.get_output_path = patched_get_output_path
|
|
# Also patch in main module since it may have imported directly
|
|
if hasattr(lcb_main_module, "get_output_path"):
|
|
lcb_main_module.get_output_path = patched_get_output_path
|
|
|
|
# Patch benchmark loading to support --limit
|
|
# Must patch in the main module since it imports the function directly
|
|
if args.limit is not None:
|
|
original_build = lcb_main_module.build_prompt_benchmark
|
|
|
|
def limited_build(*a: Any, **kw: Any) -> Any:
|
|
benchmark, format_prompt = original_build(*a, **kw)
|
|
if args.limit and len(benchmark) > args.limit:
|
|
print(f"Limiting benchmark from {len(benchmark)} to {args.limit} problems")
|
|
benchmark = benchmark[: args.limit]
|
|
return benchmark, format_prompt
|
|
|
|
lcb_main_module.build_prompt_benchmark = limited_build
|
|
|
|
# Patch sys.argv for argparse in lcb_main
|
|
sys.argv = [sys.argv[0], *lcb_args]
|
|
lcb_main_module.main()
|
|
return 0
|
|
except KeyboardInterrupt:
|
|
print("\nInterrupted by user", file=sys.stderr)
|
|
_cleanup_and_exit(130)
|
|
except SystemExit as e:
|
|
return e.code if isinstance(e.code, int) else 1
|
|
except Exception as e:
|
|
print(f"Error running LiveCodeBench: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|