mirror of
https://github.com/exo-explore/exo.git
synced 2025-12-23 22:27:50 -05:00
Co-authored-by: Gelu Vrabie <gelu@exolabs.net> Co-authored-by: Alex Cheema <41707476+AlexCheema@users.noreply.github.com> Co-authored-by: Seth Howes <71157822+sethhowes@users.noreply.github.com> Co-authored-by: Matt Beton <matthew.beton@gmail.com> Co-authored-by: Alex Cheema <alexcheema123@gmail.com>
361 lines
14 KiB
YAML
361 lines
14 KiB
YAML
name: macOS System Info
|
|
|
|
on:
|
|
workflow_dispatch: # This allows manual triggering
|
|
# push:
|
|
# branches: [ '*' ]
|
|
# tags: [ '*' ]
|
|
|
|
jobs:
|
|
master:
|
|
runs-on: ['self-hosted', 'macOS']
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
lfs: true
|
|
|
|
- name: Configure git user
|
|
run: |
|
|
git config --local user.email "github-actions@users.noreply.github.com"
|
|
git config --local user.name "github-actions bot"
|
|
shell: bash
|
|
|
|
- name: Pull LFS files
|
|
run: |
|
|
echo "Pulling Git LFS files..."
|
|
git lfs pull
|
|
shell: bash
|
|
|
|
- name: Reset databases
|
|
run: |
|
|
if [ -d ~/.exo ]; then
|
|
rm -rf ~/.exo/*.db*
|
|
fi
|
|
|
|
- name: Setup EXO_HOME and API_PORT
|
|
run: |
|
|
EXO_HOME=$(mktemp -d -t exo-e2e-master-XXXXXXXX)
|
|
# Generate random port (macOS compatible method)
|
|
API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
|
|
echo "EXO_HOME=$EXO_HOME" >> $GITHUB_ENV
|
|
echo "API_PORT=$API_PORT" >> $GITHUB_ENV
|
|
echo "Created EXO_HOME: $EXO_HOME"
|
|
echo "Generated API_PORT: $API_PORT"
|
|
echo "Verifying API_PORT is set: $API_PORT"
|
|
shell: bash
|
|
|
|
- name: Setup Nix Environment
|
|
run: |
|
|
echo "Checking for nix installation..."
|
|
|
|
# Check if nix binary exists directly
|
|
if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
|
|
echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
|
|
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
|
|
echo "PATH=$PATH" >> $GITHUB_ENV
|
|
nix --version
|
|
elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
|
|
echo "Found nix profile script, sourcing..."
|
|
source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
|
nix --version
|
|
elif command -v nix >/dev/null 2>&1; then
|
|
echo "Nix already in PATH"
|
|
nix --version
|
|
else
|
|
echo "Nix not found. Debugging info:"
|
|
echo "Contents of /nix/var/nix/profiles/default/:"
|
|
ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
|
|
echo "Contents of /nix/var/nix/profiles/default/bin/:"
|
|
ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
|
|
exit 1
|
|
fi
|
|
shell: bash
|
|
|
|
- name: Print macOS system information
|
|
run: |
|
|
echo "=== macOS System Information ==="
|
|
echo "OS Version:"
|
|
sw_vers
|
|
|
|
echo -e "\n=== Memory Information ==="
|
|
system_profiler SPMemoryDataType
|
|
|
|
echo -e "\n=== Memory Usage Summary ==="
|
|
vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages free: (\d+)/ and printf "Free Memory: %.2f GB\n", $1 * $size / 1024 / 1024 / 1024'
|
|
top -l 1 -s 0 | grep PhysMem
|
|
|
|
echo -e "\n=== CPU Information ==="
|
|
sysctl -n machdep.cpu.brand_string
|
|
system_profiler SPHardwareDataType | grep -E "Cores|Processors"
|
|
|
|
echo -e "\n=== Disk Space ==="
|
|
df -h /
|
|
|
|
# - name: Setup Hugging Face token
|
|
# run: |
|
|
# mkdir -p ~/.cache/huggingface
|
|
# echo "${{ secrets.HF_TOKEN }}" > ~/.cache/huggingface/token
|
|
|
|
- name: Sync dependencies
|
|
run: |
|
|
echo "Running just sync-clean to ensure clean dependencies..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync-clean
|
|
shell: bash
|
|
|
|
- name: Build forwarder
|
|
run: |
|
|
echo "Building Go forwarder binary..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just build-forwarder
|
|
shell: bash
|
|
|
|
- name: Start node (master)
|
|
run: |
|
|
echo "Starting master node with debug enabled..."
|
|
echo "Environment check - API_PORT: '$API_PORT'"
|
|
echo "Environment check - EXO_HOME: '$EXO_HOME'"
|
|
if [ -z "$API_PORT" ]; then
|
|
echo "ERROR: API_PORT is not set!"
|
|
exit 1
|
|
fi
|
|
# Run with Python unbuffered output and maximum debug level
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME API_PORT=$API_PORT PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run master/main.py" > /tmp/master_node.log 2>&1 &
|
|
MASTER_PID=$!
|
|
echo "Started master node in background with PID: $MASTER_PID"
|
|
echo "Log file: /tmp/master_node.log"
|
|
|
|
echo "Starting worker node..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run worker/main.py" > /tmp/worker_node.log 2>&1 &
|
|
WORKER_PID=$!
|
|
echo "Started worker node in background with PID: $WORKER_PID"
|
|
echo "Log file: /tmp/worker_node.log"
|
|
|
|
for i in {1..30}; do
|
|
echo "Attempt $i: Checking if master node is ready..."
|
|
if curl -s http://localhost:$API_PORT/state > /dev/null 2>&1; then
|
|
echo "Master node is ready!"
|
|
break
|
|
fi
|
|
if [ $i -eq 30 ]; then
|
|
echo "Master node failed to start within 30 seconds. Checking logs..."
|
|
echo "=== Master node log ==="
|
|
cat /tmp/master_node.log || echo "No master log file found"
|
|
echo "=== Worker node log ==="
|
|
cat /tmp/worker_node.log || echo "No worker log file found"
|
|
exit 1
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
# wait for master to have a COMPLETE or FAILED task in the state
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:$API_PORT/state | jq -r '.tasks | any(.task_status == "COMPLETE" or .task_status == "FAILED")' > 0; then
|
|
echo "Master node has a COMPLETE or FAILED task in the state"
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
echo "=== Master node log ==="
|
|
cat /tmp/master_node.log || echo "No master log file found"
|
|
echo "=== Worker node log ==="
|
|
cat /tmp/worker_node.log || echo "No worker log file found"
|
|
|
|
- name: Cleanup EXO_HOME
|
|
run: |
|
|
echo "Cleaning up EXO_HOME: $EXO_HOME"
|
|
rm -rf "$EXO_HOME"
|
|
shell: bash
|
|
if: always()
|
|
|
|
worker:
|
|
runs-on: ['self-hosted', 'macOS']
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
lfs: true
|
|
|
|
- name: Configure git user
|
|
run: |
|
|
git config --local user.email "github-actions@users.noreply.github.com"
|
|
git config --local user.name "github-actions bot"
|
|
shell: bash
|
|
|
|
- name: Pull LFS files
|
|
run: |
|
|
echo "Pulling Git LFS files..."
|
|
git lfs pull
|
|
shell: bash
|
|
|
|
- name: Reset databases
|
|
run: |
|
|
if [ -d ~/.exo ]; then
|
|
rm -rf ~/.exo/*.db*
|
|
fi
|
|
|
|
- name: Setup EXO_HOME and API_PORT
|
|
run: |
|
|
EXO_HOME=$(mktemp -d -t exo-e2e-worker-XXXXXXXX)
|
|
# Generate random port (macOS compatible method)
|
|
API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
|
|
echo "EXO_HOME=$EXO_HOME" >> $GITHUB_ENV
|
|
echo "API_PORT=$API_PORT" >> $GITHUB_ENV
|
|
echo "Created EXO_HOME: $EXO_HOME"
|
|
echo "Generated API_PORT: $API_PORT"
|
|
echo "Verifying API_PORT is set: $API_PORT"
|
|
shell: bash
|
|
|
|
- name: Setup Nix Environment
|
|
run: |
|
|
echo "Checking for nix installation..."
|
|
|
|
# Check if nix binary exists directly
|
|
if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
|
|
echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
|
|
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
|
|
echo "PATH=$PATH" >> $GITHUB_ENV
|
|
nix --version
|
|
elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
|
|
echo "Found nix profile script, sourcing..."
|
|
source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
|
nix --version
|
|
elif command -v nix >/dev/null 2>&1; then
|
|
echo "Nix already in PATH"
|
|
nix --version
|
|
else
|
|
echo "Nix not found. Debugging info:"
|
|
echo "Contents of /nix/var/nix/profiles/default/:"
|
|
ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
|
|
echo "Contents of /nix/var/nix/profiles/default/bin/:"
|
|
ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
|
|
exit 1
|
|
fi
|
|
shell: bash
|
|
|
|
- name: Print macOS system information
|
|
run: |
|
|
echo "=== macOS System Information ==="
|
|
echo "OS Version:"
|
|
sw_vers
|
|
|
|
echo -e "\n=== Memory Information ==="
|
|
system_profiler SPMemoryDataType
|
|
|
|
echo -e "\n=== Memory Usage Summary ==="
|
|
vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages free: (\d+)/ and printf "Free Memory: %.2f GB\n", $1 * $size / 1024 / 1024 / 1024'
|
|
top -l 1 -s 0 | grep PhysMem
|
|
|
|
echo -e "\n=== CPU Information ==="
|
|
sysctl -n machdep.cpu.brand_string
|
|
system_profiler SPHardwareDataType | grep -E "Cores|Processors"
|
|
|
|
echo -e "\n=== Disk Space ==="
|
|
df -h /
|
|
|
|
# - name: Setup Hugging Face token
|
|
# run: |
|
|
# mkdir -p ~/.cache/huggingface
|
|
# echo "${{ secrets.HF_TOKEN }}" > ~/.cache/huggingface/token
|
|
|
|
- name: Sync dependencies
|
|
run: |
|
|
echo "Running just sync-clean to ensure clean dependencies..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync-clean
|
|
shell: bash
|
|
|
|
- name: Build forwarder
|
|
run: |
|
|
echo "Building Go forwarder binary..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just build-forwarder
|
|
shell: bash
|
|
|
|
- name: Start node (replica)
|
|
run: |
|
|
echo "Starting master node with debug enabled..."
|
|
echo "Environment check - API_PORT: '$API_PORT'"
|
|
echo "Environment check - EXO_HOME: '$EXO_HOME'"
|
|
if [ -z "$API_PORT" ]; then
|
|
echo "ERROR: API_PORT is not set!"
|
|
exit 1
|
|
fi
|
|
# Run with Python unbuffered output and maximum debug level
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_RUN_AS_REPLICA=1 EXO_HOME=$EXO_HOME API_PORT=$API_PORT PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run master/main.py" > /tmp/master_node.log 2>&1 &
|
|
MASTER_PID=$!
|
|
echo "Started master node in background with PID: $MASTER_PID"
|
|
echo "Log file: /tmp/master_node.log"
|
|
|
|
echo "Starting worker node..."
|
|
nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c "EXO_HOME=$EXO_HOME PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run worker/main.py" > /tmp/worker_node.log 2>&1 &
|
|
WORKER_PID=$!
|
|
echo "Started worker node in background with PID: $WORKER_PID"
|
|
echo "Log file: /tmp/worker_node.log"
|
|
|
|
echo "Waiting for master node to start on port $API_PORT..."
|
|
# Wait for the master node to be ready (up to 30 seconds)
|
|
for i in {1..30}; do
|
|
echo "Attempt $i: Checking if master node is ready..."
|
|
if curl -s http://localhost:$API_PORT/state > /dev/null 2>&1; then
|
|
echo "Master node is ready!"
|
|
break
|
|
fi
|
|
if [ $i -eq 30 ]; then
|
|
echo "Master node failed to start within 30 seconds. Checking logs..."
|
|
echo "=== Master node log ==="
|
|
cat /tmp/master_node.log || echo "No master log file found"
|
|
echo "=== Worker node log ==="
|
|
cat /tmp/worker_node.log || echo "No worker log file found"
|
|
exit 1
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
resp=$(curl -X POST http://localhost:$API_PORT/instance -H "Content-Type: application/json" -d '{"model_id": "llama-3.2:1b"}')
|
|
echo "Response: $resp"
|
|
instance_id=$(echo $resp | jq -r '.instance_id')
|
|
echo "Instance ID: $instance_id"
|
|
|
|
for i in {1..50}; do
|
|
resp=$(curl -s -w "%{http_code}" -X GET http://localhost:$API_PORT/instance/$instance_id -H "Content-Type: application/json")
|
|
http_code="${resp: -3}"
|
|
response_body="${resp%???}"
|
|
echo "HTTP Code: $http_code"
|
|
echo "Response: $response_body"
|
|
|
|
if [ "$http_code" == "200" ]; then
|
|
instance_status=$(echo $response_body | jq -r '.instance_type')
|
|
if [ "$instance_status" == "ACTIVE" ]; then
|
|
echo "Instance is ready"
|
|
break
|
|
fi
|
|
elif [ "$http_code" == "404" ]; then
|
|
echo "Instance not yet created, waiting..."
|
|
else
|
|
echo "Unexpected HTTP status: $http_code"
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
resp=$(curl http://localhost:$API_PORT/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-3.2:1b", "messages": [{"role": "user", "content": "What is the meaning of exo?"}], "temperature": 0.7}')
|
|
echo "Response: $resp"
|
|
|
|
resp=$(curl -X DELETE http://localhost:$API_PORT/instance/$instance_id -H "Content-Type: application/json")
|
|
echo "Response: $resp"
|
|
|
|
echo "=== Master node log ==="
|
|
cat /tmp/master_node.log || echo "No master log file found"
|
|
echo "=== Worker node log ==="
|
|
cat /tmp/worker_node.log || echo "No worker log file found"
|
|
|
|
kill $MASTER_PID
|
|
kill $WORKER_PID
|
|
|
|
- name: Cleanup EXO_HOME
|
|
run: |
|
|
echo "Cleaning up EXO_HOME: $EXO_HOME"
|
|
rm -rf "$EXO_HOME"
|
|
shell: bash
|
|
if: always()
|