mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-22 16:07:24 -04:00
feat(infra): add support for autogenerate CI runners
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
16
.github/workflows/ci.yml
vendored
16
.github/workflows/ci.yml
vendored
@@ -26,15 +26,17 @@ jobs:
|
||||
- name: setup tooling
|
||||
run: |
|
||||
python -m pip install uv
|
||||
uv pip install --system httpx
|
||||
uv pip install --system httpx orjson
|
||||
- name: startup machine
|
||||
run: python tools/machines.py --start ${{ secrets.PAPERSPACE_MACHINE_ID }} || true
|
||||
id: paperspace-machine
|
||||
run: |
|
||||
echo "$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}})" >> $GITHUB_OUTPUT
|
||||
- name: running regression tests (PR)
|
||||
uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
host: ${{secrets.PAPERSPACE_HOST}}
|
||||
username: ${{secrets.PAPERSPACE_USERNAME}}
|
||||
host: ${{steps.outputs.paperspace-machine.outputs.publicIp}}
|
||||
username: paperspace
|
||||
key: ${{secrets.PAPERSPACE_SSH_KEY}}
|
||||
port: ${{secrets.PAPERSPACE_PORT}}
|
||||
script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --refresh -- --pr ${{github.event.number}}
|
||||
@@ -42,11 +44,13 @@ jobs:
|
||||
uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3
|
||||
if: ${{ !github.event.repository.fork && github.event_name == 'push' }}
|
||||
with:
|
||||
host: ${{secrets.PAPERSPACE_HOST}}
|
||||
username: ${{secrets.PAPERSPACE_USERNAME}}
|
||||
host: ${{steps.outputs.paperspace-machine.outputs.ip}}
|
||||
username: paperspace
|
||||
key: ${{secrets.PAPERSPACE_SSH_KEY}}
|
||||
port: ${{secrets.PAPERSPACE_PORT}}
|
||||
script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --refresh -- --head
|
||||
- name: shutdown machine
|
||||
run: python tools/machines.py --delete ${{ steps.outputs.paperspace-machine.outputs.ip }}
|
||||
evergreen: # https://github.com/marketplace/actions/alls-green#why
|
||||
if: always()
|
||||
needs:
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx,os,dataclasses,logging,time,argparse,typing as t
|
||||
import httpx,os,dataclasses,datetime,time,argparse,typing as t
|
||||
|
||||
if (ENV := os.getenv("PAPERSPACE_API_KEY")) is None: raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"')
|
||||
HEADERS = httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'})
|
||||
API_URL = 'https://api.paperspace.com/v1'
|
||||
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Machine:
|
||||
id: str
|
||||
inner: httpx.Client = dataclasses.field(default_factory=lambda: httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60), repr=False)
|
||||
|
||||
def close(self): self.inner.close()
|
||||
def __del__(self): self.close()
|
||||
def __enter__(self): return self
|
||||
@@ -25,44 +21,70 @@ class Machine:
|
||||
def start(self) -> bool:
|
||||
response = self.inner.patch(f'/machines/{self.id}/start')
|
||||
if response.status_code == 400 or self.status == 'ready':
|
||||
logger.error('machine is already running')
|
||||
print('machine is already running')
|
||||
return False
|
||||
elif response.status_code != 200:
|
||||
logger.error('Error while starting machine "%s": %s', self.id, response.json())
|
||||
elif response.status_code != 200: raise ValueError(f'Error while starting machine: {response.json()}')
|
||||
return True
|
||||
def stop(self) -> bool:
|
||||
response = self.inner.patch(f'/machines/{self.id}/stop')
|
||||
if response.status_code == 400 or self.status == 'off':
|
||||
logger.error('machine is already off')
|
||||
print('machine is already off')
|
||||
return False
|
||||
elif response.status_code != 200:
|
||||
logger.error('Error while stopping machine "%s": %s', self.id, response.json())
|
||||
elif response.status_code != 200: raise ValueError(f'Error while stopping machine {response.json()}')
|
||||
return True
|
||||
@classmethod
|
||||
def ci(cls, template_id: str):
|
||||
client = httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60)
|
||||
machines = client.get('/machines', params=dict(limit=1, name='openllm-ci')).json()
|
||||
if len(machines['items']) == 1:
|
||||
return cls(id=machines['items'][0]['id'], inner=client)
|
||||
response = client.post('/machines', json=dict(
|
||||
name=f'openllm-ci-{datetime.datetime.now().timestamp()}',
|
||||
machineType='A100-80G', templateId=template_id,
|
||||
networkId=os.getenv("PAPERSPACE_NETWORK_ID"),
|
||||
diskSize=500, region='ny2', publicIpType="dynamic", startOnCreate=True,
|
||||
))
|
||||
if response.status_code != 200: raise ValueError(f'Failed while creating a machine: {response.json()}')
|
||||
return cls(id=response.json()['data']['id'], inner=client)
|
||||
def actions(self): return f'publicIp={self.metadata["publicIp"]}'
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--start', metavar='ID')
|
||||
group.add_argument('--stop', metavar='ID')
|
||||
group.add_argument('--delete', metavar='ID')
|
||||
group.add_argument('--ci-template', metavar='ID')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.start:
|
||||
if args.ci_template:
|
||||
machine = Machine.ci(args.ci_template)
|
||||
while machine.status != 'ready': time.sleep(5)
|
||||
print(machine.actions())
|
||||
machine.close()
|
||||
elif args.delete:
|
||||
with httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60) as client:
|
||||
response = client.delete(f'/machines/{args.delete}')
|
||||
if response.status_code != 200:
|
||||
print('Error while deleting machine %s', response.json())
|
||||
return 1
|
||||
elif args.start:
|
||||
with Machine(id=args.start) as machine:
|
||||
if machine.start():
|
||||
while machine.status != 'ready':
|
||||
logger.info('Waiting for machine "%s" to be ready...', machine.id)
|
||||
print('Waiting for machine "%s" to be ready...', machine.id)
|
||||
time.sleep(5)
|
||||
else:
|
||||
logger.error('Failed to start machine "%s"', machine.id)
|
||||
print('Failed to start machine "%s"', machine.id)
|
||||
return 1
|
||||
elif args.stop:
|
||||
with Machine(id=args.stop) as machine:
|
||||
if machine.stop():
|
||||
while machine.status != 'ready':
|
||||
logger.info('Waiting for machine "%s" to stop...', machine.id)
|
||||
print('Waiting for machine "%s" to stop...', machine.id)
|
||||
time.sleep(5)
|
||||
else:
|
||||
logger.error('Failed to stopmachine "%s"', machine.id)
|
||||
print('Failed to stop machine "%s"', machine.id)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
@@ -3,3 +3,4 @@ jupyter
|
||||
tomlkit
|
||||
ghapi
|
||||
pre-commit
|
||||
orjson
|
||||
|
||||
Reference in New Issue
Block a user