exo/bench/livecodebench_runner.py

#!/usr/bin/env python3
# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
"""
LiveCodeBench runner wrapper for exo.

This wrapper allows running LiveCodeBench with custom OpenAI-compatible endpoints
by dynamically registering models and configuring the OpenAI client.

Usage:
    python -m bench.livecodebench_runner --model my-model --base-url http://localhost:52415/v1 [lcb args...]

The wrapper:
1. Registers the custom model in LiveCodeBench's model registry
2. Sets up environment variables for the OpenAI client
3. Runs the standard LiveCodeBench runner

Requires LiveCodeBench to be installed:
    git clone https://github.com/LiveCodeBench/LiveCodeBench
    cd LiveCodeBench && uv pip install -e .
"""

from __future__ import annotations

import argparse
import multiprocessing
import os
import signal
import sys
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, NoReturn

if TYPE_CHECKING:
    from typing import Any


def _cleanup_and_exit(exit_code: int = 130) -> NoReturn:
    """Terminate all child processes and exit."""
    # Terminate any active multiprocessing pools
    for child in multiprocessing.active_children():
        child.terminate()
        child.join(timeout=1)
        if child.is_alive():
            child.kill()
    # Force exit to avoid hanging on cleanup
    os._exit(exit_code)


def _signal_handler(signum: int, frame: object) -> NoReturn:
    """Handle interrupt signals by terminating all child processes."""
    _cleanup_and_exit(130)


def get_lcb_directory() -> Path | None:
    """Find the LiveCodeBench installation directory.

    LiveCodeBench uses relative paths like 'lcb_runner/prompts/few_shot_examples/...'
    which require running from the LiveCodeBench directory.
    """
    # Check environment variable first
    if env_path := os.environ.get("LIVECODEBENCH_DIR"):
        lcb_path = Path(env_path)
        if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
            return lcb_path

    # Use importlib to find package location without executing module code
    # This avoids triggering the relative path imports that would fail
    try:
        import importlib.util

        spec = importlib.util.find_spec("lcb_runner")
        if spec and spec.origin:
            # spec.origin is the __init__.py path, go up two levels
            lcb_path = Path(spec.origin).parent.parent
            if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
                return lcb_path
    except (ImportError, ModuleNotFoundError):
        pass

    # Check common locations relative to this script
    script_dir = Path(__file__).parent.parent  # exo/
    common_locations = [
        script_dir / "LiveCodeBench",  # exo/LiveCodeBench
        script_dir.parent / "LiveCodeBench",  # sibling to exo
    ]
    for loc in common_locations:
        if (loc / "lcb_runner" / "prompts" / "few_shot_examples").exists():
            return loc

    return None


def setup_custom_model(model_name: str, base_url: str) -> None:
    """Register a custom model in LiveCodeBench's registry."""
    try:
        from lcb_runner.lm_styles import (  # pyright: ignore[reportMissingImports]
            LanguageModel,
            LanguageModelList,
            LanguageModelStore,
            LMStyle,
        )
    except ImportError as e:
        print(
            "Error: LiveCodeBench not installed. Install with:\n"
            "  git clone https://github.com/LiveCodeBench/LiveCodeBench\n"
            "  cd LiveCodeBench && uv pip install -e .",
            file=sys.stderr,
        )
        raise SystemExit(1) from e

    # Check if model already exists
    if model_name in LanguageModelStore:
        return

    # Create a new model entry using OpenAIChat style
    # This will route through the oai_runner which respects OPENAI_BASE_URL
    custom_model = LanguageModel(
        model_name=model_name,
        model_repr=model_name,
        model_style=LMStyle.OpenAIChat,
        release_date=datetime.now(),
        link=base_url,
    )

    # Add to the model list and store
    LanguageModelList.append(custom_model)
    LanguageModelStore[model_name] = custom_model


def patch_openai_client(base_url: str) -> None:
    """Patch the OpenAI client to use a custom base URL.

    This patches the oai_runner module to use our custom base URL.
    """
    try:
        from lcb_runner.runner import oai_runner  # noqa: I001 # pyright: ignore[reportMissingImports]
    except ImportError as e:
        print(f"Error importing required modules: {e}", file=sys.stderr)
        raise SystemExit(1) from e

    # Store original client creation
    original_init = oai_runner.OpenAI

    def patched_openai(*args: Any, **kwargs: Any) -> Any:
        """Create OpenAI client with custom base_url."""
        # Inject base_url if not already set
        if "base_url" not in kwargs:
            kwargs["base_url"] = base_url
        # Use dummy API key if not set (exo doesn't require auth)
        if "api_key" not in kwargs and not os.getenv("OPENAI_KEY"):
            kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "exo-local")
        return original_init(*args, **kwargs)

    # Apply the patch
    oai_runner.OpenAI = patched_openai


def main() -> int:
    """Main entry point."""
    # Set up signal handlers for clean exit
    signal.signal(signal.SIGINT, _signal_handler)
    signal.signal(signal.SIGTERM, _signal_handler)

    parser = argparse.ArgumentParser(
        description="LiveCodeBench runner wrapper for exo",
        epilog="Additional arguments are passed to lcb_runner.runner.main",
    )
    parser.add_argument(
        "--base-url",
        default=os.environ.get("OPENAI_BASE_URL", "http://localhost:52415/v1"),
        help="OpenAI-compatible API base URL (default: OPENAI_BASE_URL or localhost:52415/v1)",
    )
    parser.add_argument(
        "--model",
        required=True,
        help="Model name to use",
    )
    parser.add_argument(
        "--output-dir",
        default=None,
        help="Output directory for results (maps to LiveCodeBench's --custom_output_save_name)",
    )
    parser.add_argument(
        "--limit",
        type=int,
        default=None,
        help="Limit number of problems to evaluate (for testing)",
    )

    # Parse known args, pass rest to LiveCodeBench
    args, remaining = parser.parse_known_args()

    # Set up environment
    os.environ["OPENAI_BASE_URL"] = args.base_url
    if "OPENAI_API_KEY" not in os.environ and "OPENAI_KEY" not in os.environ:
        os.environ["OPENAI_API_KEY"] = "exo-local"
        os.environ["OPENAI_KEY"] = "exo-local"

    # Save original directory for output path resolution
    original_cwd = os.getcwd()

    # Change to LiveCodeBench directory before imports that use relative paths
    # LiveCodeBench uses paths like 'lcb_runner/prompts/few_shot_examples/...'
    lcb_dir = get_lcb_directory()
    if lcb_dir:
        os.chdir(lcb_dir)
    else:
        print(
            "Warning: Could not find LiveCodeBench directory. "
            "Relative path imports may fail.",
            file=sys.stderr,
        )

    # Setup custom model and patch client
    setup_custom_model(args.model, args.base_url)
    patch_openai_client(args.base_url)

    # Build arguments for LiveCodeBench runner
    lcb_args = ["--model", args.model]

    # Resolve output directory to absolute path (relative to original cwd)
    output_base: str | None = None
    if args.output_dir:
        output_base = str(Path(original_cwd) / args.output_dir)

    lcb_args.extend(remaining)

    # Run LiveCodeBench
    try:
        from lcb_runner.runner import main as lcb_main_module  # noqa: I001 # pyright: ignore[reportMissingImports]
        from lcb_runner.utils import path_utils  # noqa: I001 # pyright: ignore[reportMissingImports]

        # Patch output path to use our output directory
        if output_base:
            original_get_output_path = path_utils.get_output_path

            def patched_get_output_path(model_repr: str, runner_args: Any) -> str:
                # Get the original path and replace 'output/' with our base
                original_path = original_get_output_path(model_repr, runner_args)
                # Replace 'output/' prefix with our custom base
                if original_path.startswith("output/"):
                    new_path = str(
                        Path(output_base) / original_path[7:]
                    )  # Skip 'output/'
                else:
                    new_path = str(Path(output_base) / original_path)
                path_utils.ensure_dir(new_path)
                print(f"Saving results to: {new_path}")
                return new_path

            path_utils.get_output_path = patched_get_output_path
            # Also patch in main module since it may have imported directly
            if hasattr(lcb_main_module, "get_output_path"):
                lcb_main_module.get_output_path = patched_get_output_path

        # Patch benchmark loading to support --limit
        # Must patch in the main module since it imports the function directly
        if args.limit is not None:
            original_build = lcb_main_module.build_prompt_benchmark

            def limited_build(*a: Any, **kw: Any) -> Any:
                benchmark, format_prompt = original_build(*a, **kw)
                if args.limit and len(benchmark) > args.limit:
                    print(
                        f"Limiting benchmark from {len(benchmark)} to {args.limit} problems"
                    )
                    benchmark = benchmark[: args.limit]
                return benchmark, format_prompt

            lcb_main_module.build_prompt_benchmark = limited_build

        # Patch sys.argv for argparse in lcb_main
        sys.argv = [sys.argv[0], *lcb_args]
        lcb_main_module.main()
        return 0
    except KeyboardInterrupt:
        print("\nInterrupted by user", file=sys.stderr)
        _cleanup_and_exit(130)
    except SystemExit as e:
        return e.code if isinstance(e.code, int) else 1
    except Exception as e:
        print(f"Error running LiveCodeBench: {e}", file=sys.stderr)
        return 1


if __name__ == "__main__":
    raise SystemExit(main())