mirror of
https://github.com/exo-explore/exo.git
synced 2026-04-29 02:07:50 -04:00
306 lines
13 KiB
YAML
306 lines
13 KiB
YAML
name: bench
|
|
|
|
on: [push]
|
|
|
|
jobs:
|
|
plan:
|
|
if: contains(github.event.head_commit.message, '/bench')
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
matrix: ${{ steps.build.outputs.matrix }}
|
|
config_file: ${{ steps.build.outputs.config_file }}
|
|
timeout_seconds: ${{ steps.build.outputs.timeout_seconds }}
|
|
environment: ${{ steps.build.outputs.environment }}
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Build matrix from config file
|
|
id: build
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
CONFIG_FILE='.github/configs/bench_simple.yaml'
|
|
export CONFIG_FILE
|
|
echo "Config file: $CONFIG_FILE"
|
|
python3 .github/scripts/build_matrix.py
|
|
|
|
bench_worker:
|
|
needs: plan
|
|
strategy:
|
|
fail-fast: false
|
|
matrix: ${{ fromJSON(needs.plan.outputs.matrix) }}
|
|
name: "bench on ${{ matrix.label }} [${{ matrix.index }}]"
|
|
runs-on: [self-hosted, macOS, "${{ matrix.label }}"]
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
lfs: false
|
|
|
|
- name: Configure git user
|
|
run: |
|
|
git config --local user.email "github-actions@users.noreply.github.com"
|
|
git config --local user.name "github-actions bot"
|
|
shell: bash
|
|
|
|
# TODO: this is mega hacky and I'd like a simpler solution.
|
|
- name: Setup Nix Environment
|
|
run: |
|
|
echo "Checking for nix installation..."
|
|
|
|
# Check if nix is already available
|
|
if command -v nix >/dev/null 2>&1; then
|
|
echo "Nix already in PATH"
|
|
# Try sourcing profile scripts to set up environment properly
|
|
elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
|
|
echo "Sourcing multi-user nix-daemon profile script"
|
|
source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
|
elif [ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]; then
|
|
echo "Sourcing single-user nix profile script"
|
|
source "$HOME/.nix-profile/etc/profile.d/nix.sh"
|
|
elif [ -f /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh ]; then
|
|
echo "Sourcing per-user nix profile script"
|
|
source /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh
|
|
elif [ -f /etc/profile.d/nix.sh ]; then
|
|
echo "Sourcing system-wide nix profile script"
|
|
source /etc/profile.d/nix.sh
|
|
# Fallback: manually add nix to PATH if binary exists
|
|
elif [ -f /nix/var/nix/profiles/default/bin/nix ]; then
|
|
echo "Found nix binary, manually adding to PATH"
|
|
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
|
|
elif [ -f "$HOME/.nix-profile/bin/nix" ]; then
|
|
echo "Found nix binary in user profile, manually adding to PATH"
|
|
export PATH="$HOME/.nix-profile/bin:$PATH"
|
|
else
|
|
echo "Nix not found. Debugging info:"
|
|
echo "USER: $USER"
|
|
echo "HOME: $HOME"
|
|
echo "Current PATH: $PATH"
|
|
echo ""
|
|
echo "Checking common Nix locations:"
|
|
echo " /nix/var/nix/profiles/default/bin/nix:"
|
|
ls -la /nix/var/nix/profiles/default/bin/nix 2>/dev/null || echo " Not found"
|
|
echo " /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh:"
|
|
ls -la /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh 2>/dev/null || echo " Not found"
|
|
echo " ~/.nix-profile/etc/profile.d/nix.sh:"
|
|
ls -la "$HOME/.nix-profile/etc/profile.d/nix.sh" 2>/dev/null || echo " Not found"
|
|
echo " /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh:"
|
|
ls -la "/nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh" 2>/dev/null || echo " Not found"
|
|
echo ""
|
|
echo "/nix directory structure:"
|
|
ls -la /nix 2>/dev/null || echo " /nix directory not found"
|
|
echo ""
|
|
echo "/nix/var:"
|
|
ls -la /nix/var 2>/dev/null || echo " /nix/var not found"
|
|
echo ""
|
|
echo "/nix/store:"
|
|
ls -la /nix/store 2>/dev/null | head -20 || echo " /nix/store not found"
|
|
echo ""
|
|
echo "GitHub Actions runner is running as user '$USER'."
|
|
echo "If Nix is installed for a different user, either:"
|
|
echo " 1. Install Nix for user '$USER' (multi-user install recommended)"
|
|
echo " 2. Configure the runner service to run as the user with Nix installed"
|
|
echo " 3. Ensure Nix is installed system-wide with proper daemon setup"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify nix is available and persist to GITHUB_ENV
|
|
if command -v nix >/dev/null 2>&1; then
|
|
echo "✓ Nix is available"
|
|
nix --version
|
|
echo "PATH=$PATH" >> $GITHUB_ENV
|
|
if [ -n "$NIX_PATH" ]; then
|
|
echo "NIX_PATH=$NIX_PATH" >> $GITHUB_ENV
|
|
fi
|
|
else
|
|
echo "ERROR: Failed to set up Nix"
|
|
echo "PATH after setup attempt: $PATH"
|
|
exit 1
|
|
fi
|
|
shell: bash
|
|
|
|
- name: Setup EXO_HOME and API_PORT
|
|
run: |
|
|
EXO_HOME=$(mktemp -d -t exo-e2e-XXXXXXXX)
|
|
API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
|
|
EXO_MODELS_DIR="$HOME/.exo/models"
|
|
EXO_LIBP2P_NAMESPACE="bench-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
|
|
echo "EXO_HOME=$EXO_HOME" >> "$GITHUB_ENV"
|
|
echo "API_PORT=$API_PORT" >> "$GITHUB_ENV"
|
|
echo "EXO_MODELS_DIR=$EXO_MODELS_DIR" >> "$GITHUB_ENV"
|
|
echo "EXO_LIBP2P_NAMESPACE=$EXO_LIBP2P_NAMESPACE" >> "$GITHUB_ENV"
|
|
echo "Created EXO_HOME: $EXO_HOME"
|
|
echo "Generated API_PORT: $API_PORT"
|
|
echo "Using models from: $EXO_MODELS_DIR"
|
|
echo "Using libp2p namespace: $EXO_LIBP2P_NAMESPACE"
|
|
shell: bash
|
|
|
|
- name: Configure local MLX if available
|
|
run: |
|
|
echo "=== DEBUG: Checking for local MLX configuration ==="
|
|
MODIFIED=false
|
|
|
|
echo "Checking for /Users/Shared/mlx directory..."
|
|
if [ -d "/Users/Shared/mlx" ]; then
|
|
echo "✓ Found /Users/Shared/mlx"
|
|
ls -la /Users/Shared/mlx | head -5
|
|
echo "Enabling local mlx path in pyproject.toml"
|
|
sed -i.bak 's|^# mlx = { path = "/Users/Shared/mlx", editable=true }$|mlx = { path = "/Users/Shared/mlx", editable=true }|' pyproject.toml
|
|
MODIFIED=true
|
|
else
|
|
echo "✗ /Users/Shared/mlx not found, will use PyPI version"
|
|
fi
|
|
|
|
echo "Checking for /Users/Shared/mlx-lm directory..."
|
|
if [ -d "/Users/Shared/mlx-lm" ]; then
|
|
echo "✓ Found /Users/Shared/mlx-lm"
|
|
ls -la /Users/Shared/mlx-lm | head -5
|
|
echo "Enabling local mlx-lm path in pyproject.toml"
|
|
sed -i.bak 's|^# mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }$|mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }|' pyproject.toml
|
|
MODIFIED=true
|
|
else
|
|
echo "✗ /Users/Shared/mlx-lm not found, will use PyPI version"
|
|
fi
|
|
|
|
if [ "$MODIFIED" = true ]; then
|
|
echo "=== Modified pyproject.toml [tool.uv.sources] section: ==="
|
|
sed -n '/\[tool\.uv\.sources\]/,/^\[/{/^\[tool\.uv\.sources\]/p; /^\[/!p;}' pyproject.toml
|
|
echo "=== Regenerating uv.lock with local MLX paths... ==="
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command uv lock --upgrade-package mlx --upgrade-package mlx-lm
|
|
echo "✓ Lock file regenerated"
|
|
else
|
|
echo "⚠ No local MLX directories found, using PyPI packages"
|
|
fi
|
|
echo "=== DEBUG: Local MLX configuration complete ==="
|
|
shell: bash
|
|
|
|
- name: Sync dependencies
|
|
run: |
|
|
if [ -d "/Users/Shared/test" ]; then
|
|
pushd /Users/Shared/test
|
|
uv sync --reinstall
|
|
popd
|
|
fi
|
|
echo "Running just sync to ensure clean dependencies..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync
|
|
shell: bash
|
|
|
|
- name: Start EXO and run bench script
|
|
shell: bash
|
|
env:
|
|
IS_PRIMARY: ${{ matrix.is_primary }}
|
|
EXPECTED_NODES: ${{ matrix.expected_nodes }}
|
|
HARDWARE_LABEL: ${{ matrix.label }}
|
|
CONFIG_FILE: ${{ needs.plan.outputs.config_file }}
|
|
TIMEOUT_SECONDS: ${{ needs.plan.outputs.timeout_seconds }}
|
|
ENVIRONMENT_JSON: ${{ needs.plan.outputs.environment }}
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
# Parse environment variables from config
|
|
ENV_VARS=""
|
|
if [ -n "$ENVIRONMENT_JSON" ] && [ "$ENVIRONMENT_JSON" != "{}" ]; then
|
|
ENV_VARS=$(echo "$ENVIRONMENT_JSON" | python3 -c "import sys, json; env = json.load(sys.stdin); print(' '.join([f'{k}={v}' for k, v in env.items()]))")
|
|
fi
|
|
|
|
echo "Starting EXO with API_PORT=${API_PORT} EXO_HOME=${EXO_HOME} EXO_LIBP2P_NAMESPACE=${EXO_LIBP2P_NAMESPACE}"
|
|
echo "Environment variables from config: $ENV_VARS"
|
|
LOG_FILE=/tmp/exo.log
|
|
: > "$LOG_FILE"
|
|
|
|
MASTER_FLAG=""
|
|
if [ "$IS_PRIMARY" = "true" ]; then
|
|
MASTER_FLAG="-m"
|
|
fi
|
|
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c \
|
|
"EXO_HOME=$EXO_HOME EXO_MODELS_DIR=$EXO_MODELS_DIR EXO_LIBP2P_NAMESPACE=$EXO_LIBP2P_NAMESPACE $ENV_VARS PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run exo $MASTER_FLAG --api-port $API_PORT" \
|
|
>> "$LOG_FILE" 2>&1 &
|
|
|
|
EXO_PID=$!
|
|
echo "Started EXO in background with PID: $EXO_PID"
|
|
echo "Log file: $LOG_FILE"
|
|
|
|
cleanup() {
|
|
echo '=== EXO log (tail) ==='
|
|
tail -n 300 "$LOG_FILE" || true
|
|
if ps -p "$EXO_PID" >/dev/null 2>&1; then
|
|
echo "Killing EXO (PID $EXO_PID)"
|
|
kill "$EXO_PID" || true
|
|
fi
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
for i in $(seq 1 60); do
|
|
if curl -s "http://localhost:${API_PORT}/state" >/dev/null 2>&1; then
|
|
echo "EXO API ready"
|
|
break
|
|
fi
|
|
if ! ps -p "$EXO_PID" >/dev/null 2>&1; then
|
|
echo "EXO terminated early"; sed -n '1,200p' "$LOG_FILE" || true; exit 1
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
RESULTS_FILE="/tmp/bench_results_${GITHUB_RUN_ID}_${GITHUB_RUN_ATTEMPT}_$(date +%s).json"
|
|
echo "Results will be saved to: $RESULTS_FILE"
|
|
echo "RESULTS_FILE=$RESULTS_FILE" >> "$GITHUB_ENV"
|
|
|
|
echo "Running bench script with config: $CONFIG_FILE, timeout: $TIMEOUT_SECONDS"
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c \
|
|
"PYTHONUNBUFFERED=1 uv run --no-project --with pyyaml --with pydantic python .github/scripts/bench.py \
|
|
--api-port $API_PORT \
|
|
--config $CONFIG_FILE \
|
|
--expected-nodes ${EXPECTED_NODES} \
|
|
--is-primary ${IS_PRIMARY} \
|
|
--timeout-seconds ${TIMEOUT_SECONDS} \
|
|
--output $RESULTS_FILE \
|
|
--git-commit ${GITHUB_SHA} \
|
|
--hardware-labels ${HARDWARE_LABEL}"
|
|
|
|
- name: Install AWS CLI
|
|
if: always() && env.RESULTS_FILE && matrix.is_primary
|
|
run: |
|
|
if ! command -v aws &> /dev/null; then
|
|
echo "AWS CLI not found, installing..."
|
|
brew install awscli
|
|
else
|
|
echo "AWS CLI already installed"
|
|
fi
|
|
shell: bash
|
|
|
|
- name: Upload results to S3
|
|
if: always() && env.RESULTS_FILE && matrix.is_primary
|
|
env:
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.S3_BENCHMARKS_AWS_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_BENCHMARKS_AWS_SECRET_ACCESS_KEY }}
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
run: |
|
|
echo "Checking for results file: $RESULTS_FILE"
|
|
echo "Is primary: ${{ matrix.is_primary }}"
|
|
|
|
if [ -f "$RESULTS_FILE" ]; then
|
|
TIMESTAMP=$(date -u +%Y/%m/%d/%H%M%S)
|
|
S3_KEY="bench/${TIMESTAMP}_${GITHUB_SHA:0:8}_${GITHUB_RUN_ID}.json"
|
|
echo "Uploading results to s3://exo-benchmark-results/$S3_KEY"
|
|
|
|
aws s3 cp "$RESULTS_FILE" "s3://exo-benchmark-results/$S3_KEY" \
|
|
--content-type application/json \
|
|
--metadata "commit=${GITHUB_SHA},run_id=${GITHUB_RUN_ID},branch=${GITHUB_REF_NAME}"
|
|
|
|
echo "Results uploaded successfully"
|
|
echo "View at: https://exo-benchmark-results.s3.amazonaws.com/$S3_KEY"
|
|
else
|
|
echo "Results file not found at: $RESULTS_FILE"
|
|
echo "Skipping upload"
|
|
fi
|
|
shell: bash
|
|
|
|
- name: Cleanup EXO_HOME
|
|
run: |
|
|
echo "Cleaning up EXO_HOME: $EXO_HOME"
|
|
rm -rf "$EXO_HOME"
|
|
shell: bash
|
|
if: always()
|