Files
exo/.github/workflows/e2e_test.yml
2025-07-31 20:36:47 +01:00

361 lines
14 KiB
YAML

name: macOS System Info
on:
workflow_dispatch: # This allows manual triggering
# push:
# branches: [ '*' ]
# tags: [ '*' ]
jobs:
master:
runs-on: ['self-hosted', 'macOS']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true
- name: Configure git user
run: |
git config --local user.email "github-actions@users.noreply.github.com"
git config --local user.name "github-actions bot"
shell: bash
- name: Pull LFS files
run: |
echo "Pulling Git LFS files..."
git lfs pull
shell: bash
- name: Reset databases
run: |
if [ -d ~/.exo ]; then
rm -rf ~/.exo/*.db*
fi
- name: Setup EXO_HOME and API_PORT
run: |
EXO_HOME=$(mktemp -d -t exo-e2e-master-XXXXXXXX)
# Generate random port (macOS compatible method)
API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
echo "EXO_HOME=$EXO_HOME" >> $GITHUB_ENV
echo "API_PORT=$API_PORT" >> $GITHUB_ENV
echo "Created EXO_HOME: $EXO_HOME"
echo "Generated API_PORT: $API_PORT"
echo "Verifying API_PORT is set: $API_PORT"
shell: bash
- name: Setup Nix Environment
run: |
echo "Checking for nix installation..."
# Check if nix binary exists directly
if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
echo "PATH=$PATH" >> $GITHUB_ENV
nix --version
elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
echo "Found nix profile script, sourcing..."
source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
nix --version
elif command -v nix >/dev/null 2>&1; then
echo "Nix already in PATH"
nix --version
else
echo "Nix not found. Debugging info:"
echo "Contents of /nix/var/nix/profiles/default/:"
ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
echo "Contents of /nix/var/nix/profiles/default/bin/:"
ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
exit 1
fi
shell: bash
- name: Print macOS system information
run: |
echo "=== macOS System Information ==="
echo "OS Version:"
sw_vers
echo -e "\n=== Memory Information ==="
system_profiler SPMemoryDataType
echo -e "\n=== Memory Usage Summary ==="
vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages free: (\d+)/ and printf "Free Memory: %.2f GB\n", $1 * $size / 1024 / 1024 / 1024'
top -l 1 -s 0 | grep PhysMem
echo -e "\n=== CPU Information ==="
sysctl -n machdep.cpu.brand_string
system_profiler SPHardwareDataType | grep -E "Cores|Processors"
echo -e "\n=== Disk Space ==="
df -h /
# - name: Setup Hugging Face token
# run: |
# mkdir -p ~/.cache/huggingface
# echo "${{ secrets.HF_TOKEN }}" > ~/.cache/huggingface/token
- name: Sync dependencies
run: |
echo "Running just sync-clean to ensure clean dependencies..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync-clean
shell: bash
- name: Build forwarder
run: |
echo "Building Go forwarder binary..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just build-forwarder
shell: bash
- name: Start node (master)
run: |
echo "Starting master node with debug enabled..."
echo "Environment check - API_PORT: '$API_PORT'"
echo "Environment check - EXO_HOME: '$EXO_HOME'"
if [ -z "$API_PORT" ]; then
echo "ERROR: API_PORT is not set!"
exit 1
fi
# Run with Python unbuffered output and maximum debug level
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME API_PORT=$API_PORT PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run master/main.py" > /tmp/master_node.log 2>&1 &
MASTER_PID=$!
echo "Started master node in background with PID: $MASTER_PID"
echo "Log file: /tmp/master_node.log"
echo "Starting worker node..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run worker/main.py" > /tmp/worker_node.log 2>&1 &
WORKER_PID=$!
echo "Started worker node in background with PID: $WORKER_PID"
echo "Log file: /tmp/worker_node.log"
for i in {1..30}; do
echo "Attempt $i: Checking if master node is ready..."
if curl -s http://localhost:$API_PORT/state > /dev/null 2>&1; then
echo "Master node is ready!"
break
fi
if [ $i -eq 30 ]; then
echo "Master node failed to start within 30 seconds. Checking logs..."
echo "=== Master node log ==="
cat /tmp/master_node.log || echo "No master log file found"
echo "=== Worker node log ==="
cat /tmp/worker_node.log || echo "No worker log file found"
exit 1
fi
sleep 1
done
# wait for master to have a COMPLETE or FAILED task in the state
for i in {1..30}; do
if curl -s http://localhost:$API_PORT/state | jq -r '.tasks | any(.task_status == "COMPLETE" or .task_status == "FAILED")' > 0; then
echo "Master node has a COMPLETE or FAILED task in the state"
break
fi
sleep 1
done
echo "=== Master node log ==="
cat /tmp/master_node.log || echo "No master log file found"
echo "=== Worker node log ==="
cat /tmp/worker_node.log || echo "No worker log file found"
- name: Cleanup EXO_HOME
run: |
echo "Cleaning up EXO_HOME: $EXO_HOME"
rm -rf "$EXO_HOME"
shell: bash
if: always()
worker:
runs-on: ['self-hosted', 'macOS']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true
- name: Configure git user
run: |
git config --local user.email "github-actions@users.noreply.github.com"
git config --local user.name "github-actions bot"
shell: bash
- name: Pull LFS files
run: |
echo "Pulling Git LFS files..."
git lfs pull
shell: bash
- name: Reset databases
run: |
if [ -d ~/.exo ]; then
rm -rf ~/.exo/*.db*
fi
- name: Setup EXO_HOME and API_PORT
run: |
EXO_HOME=$(mktemp -d -t exo-e2e-worker-XXXXXXXX)
# Generate random port (macOS compatible method)
API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
echo "EXO_HOME=$EXO_HOME" >> $GITHUB_ENV
echo "API_PORT=$API_PORT" >> $GITHUB_ENV
echo "Created EXO_HOME: $EXO_HOME"
echo "Generated API_PORT: $API_PORT"
echo "Verifying API_PORT is set: $API_PORT"
shell: bash
- name: Setup Nix Environment
run: |
echo "Checking for nix installation..."
# Check if nix binary exists directly
if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
echo "PATH=$PATH" >> $GITHUB_ENV
nix --version
elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
echo "Found nix profile script, sourcing..."
source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
nix --version
elif command -v nix >/dev/null 2>&1; then
echo "Nix already in PATH"
nix --version
else
echo "Nix not found. Debugging info:"
echo "Contents of /nix/var/nix/profiles/default/:"
ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
echo "Contents of /nix/var/nix/profiles/default/bin/:"
ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
exit 1
fi
shell: bash
- name: Print macOS system information
run: |
echo "=== macOS System Information ==="
echo "OS Version:"
sw_vers
echo -e "\n=== Memory Information ==="
system_profiler SPMemoryDataType
echo -e "\n=== Memory Usage Summary ==="
vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages free: (\d+)/ and printf "Free Memory: %.2f GB\n", $1 * $size / 1024 / 1024 / 1024'
top -l 1 -s 0 | grep PhysMem
echo -e "\n=== CPU Information ==="
sysctl -n machdep.cpu.brand_string
system_profiler SPHardwareDataType | grep -E "Cores|Processors"
echo -e "\n=== Disk Space ==="
df -h /
# - name: Setup Hugging Face token
# run: |
# mkdir -p ~/.cache/huggingface
# echo "${{ secrets.HF_TOKEN }}" > ~/.cache/huggingface/token
- name: Sync dependencies
run: |
echo "Running just sync-clean to ensure clean dependencies..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync-clean
shell: bash
- name: Build forwarder
run: |
echo "Building Go forwarder binary..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just build-forwarder
shell: bash
- name: Start node (replica)
run: |
echo "Starting master node with debug enabled..."
echo "Environment check - API_PORT: '$API_PORT'"
echo "Environment check - EXO_HOME: '$EXO_HOME'"
if [ -z "$API_PORT" ]; then
echo "ERROR: API_PORT is not set!"
exit 1
fi
# Run with Python unbuffered output and maximum debug level
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_RUN_AS_REPLICA=1 EXO_HOME=$EXO_HOME API_PORT=$API_PORT PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run master/main.py" > /tmp/master_node.log 2>&1 &
MASTER_PID=$!
echo "Started master node in background with PID: $MASTER_PID"
echo "Log file: /tmp/master_node.log"
echo "Starting worker node..."
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run worker/main.py" > /tmp/worker_node.log 2>&1 &
WORKER_PID=$!
echo "Started worker node in background with PID: $WORKER_PID"
echo "Log file: /tmp/worker_node.log"
echo "Waiting for master node to start on port $API_PORT..."
# Wait for the master node to be ready (up to 30 seconds)
for i in {1..30}; do
echo "Attempt $i: Checking if master node is ready..."
if curl -s http://localhost:$API_PORT/state > /dev/null 2>&1; then
echo "Master node is ready!"
break
fi
if [ $i -eq 30 ]; then
echo "Master node failed to start within 30 seconds. Checking logs..."
echo "=== Master node log ==="
cat /tmp/master_node.log || echo "No master log file found"
echo "=== Worker node log ==="
cat /tmp/worker_node.log || echo "No worker log file found"
exit 1
fi
sleep 1
done
resp=$(curl -X POST http://localhost:$API_PORT/instance -H "Content-Type: application/json" -d '{"model_id": "llama-3.2:1b"}')
echo "Response: $resp"
instance_id=$(echo $resp | jq -r '.instance_id')
echo "Instance ID: $instance_id"
for i in {1..50}; do
resp=$(curl -s -w "%{http_code}" -X GET http://localhost:$API_PORT/instance/$instance_id -H "Content-Type: application/json")
http_code="${resp: -3}"
response_body="${resp%???}"
echo "HTTP Code: $http_code"
echo "Response: $response_body"
if [ "$http_code" == "200" ]; then
instance_status=$(echo $response_body | jq -r '.instance_type')
if [ "$instance_status" == "ACTIVE" ]; then
echo "Instance is ready"
break
fi
elif [ "$http_code" == "404" ]; then
echo "Instance not yet created, waiting..."
else
echo "Unexpected HTTP status: $http_code"
fi
sleep 1
done
resp=$(curl http://localhost:$API_PORT/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-3.2:1b", "messages": [{"role": "user", "content": "What is the meaning of exo?"}], "temperature": 0.7}')
echo "Response: $resp"
resp=$(curl -X DELETE http://localhost:$API_PORT/instance/$instance_id -H "Content-Type: application/json")
echo "Response: $resp"
echo "=== Master node log ==="
cat /tmp/master_node.log || echo "No master log file found"
echo "=== Worker node log ==="
cat /tmp/worker_node.log || echo "No worker log file found"
kill $MASTER_PID
kill $WORKER_PID
- name: Cleanup EXO_HOME
run: |
echo "Cleaning up EXO_HOME: $EXO_HOME"
rm -rf "$EXO_HOME"
shell: bash
if: always()