diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da29fc78..7f3f1616 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,9 +31,12 @@ jobs: - name: startup machine id: paperspace-machine run: | - PUBLIC_IP=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}}) + PUBLIC_IP=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-ip) + MACHINE_ID=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-id) echo "::add-mask::$PUBLIC_IP" + echo "::add-mask::$MACHINE_ID" echo "publicIp=$PUBLIC_IP" >> $GITHUB_OUTPUT + echo "machineId=$MACHINE_ID" >> $GITHUB_OUTPUT - name: running regression tests (PR) uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3 if: github.event_name == 'pull_request' @@ -53,7 +56,7 @@ jobs: port: ${{ secrets.PAPERSPACE_PORT }} script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --refresh -- --head - name: shutdown machine - run: python tools/machines.py --delete ${{ steps.paperspace-machine.outputs.publicIp }} + run: python tools/machines.py --delete ${{ steps.paperspace-machine.outputs.machineId }} evergreen: # https://github.com/marketplace/actions/alls-green#why if: always() needs: diff --git a/tools/machines.py b/tools/machines.py index 244e3899..aedc5a70 100644 --- a/tools/machines.py +++ b/tools/machines.py @@ -1,78 +1,78 @@ from __future__ import annotations import httpx,os,dataclasses,datetime,time,argparse,typing as t - -if (ENV := os.getenv("PAPERSPACE_API_KEY")) is None: raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"') -HEADERS = httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'}) -API_URL = 'https://api.paperspace.com/v1' +if (ENV:=os.getenv("PAPERSPACE_API_KEY")) is None:raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"') +HEADERS=httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'}) +API_URL='https://api.paperspace.com/v1' @dataclasses.dataclass class Machine: - id: str - inner: httpx.Client = dataclasses.field(default_factory=lambda: httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60), repr=False) - def close(self): self.inner.close() - def __del__(self): self.close() - def __enter__(self): return self - def __exit__(self, *_: t.Any) -> None: self.close() + id:str + inner:httpx.Client=dataclasses.field(default_factory=lambda:httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60),repr=False) + def close(self):self.inner.close() + def __del__(self):self.close() + def __enter__(self):return self + def __exit__(self, *_: t.Any)->None:self.close() @property - def metadata(self) -> dict[str, t.Any]: return self.inner.get(f'/machines/{self.id}').json() + def metadata(self)->dict[str,t.Any]:return self.inner.get(f'/machines/{self.id}').json() @property - def status(self) -> t.Literal['off', 'ready', 'stopping', 'starting']: return self.metadata['state'] - def start(self) -> bool: - response = self.inner.patch(f'/machines/{self.id}/start') - if response.status_code == 400 or self.status == 'ready': + def status(self)->t.Literal['off','ready','stopping','starting']:return self.metadata['state'] + def start(self)->bool: + response=self.inner.patch(f'/machines/{self.id}/start') + if response.status_code==400 or self.status=='ready': print('machine is already running') return False - elif response.status_code != 200: raise ValueError(f'Error while starting machine: {response.json()}') + elif response.status_code!=200:raise ValueError(f'Error while starting machine: {response.json()}') return True - def stop(self) -> bool: - response = self.inner.patch(f'/machines/{self.id}/stop') - if response.status_code == 400 or self.status == 'off': + def stop(self)->bool: + response=self.inner.patch(f'/machines/{self.id}/stop') + if response.status_code==400 or self.status=='off': print('machine is already off') return False - elif response.status_code != 200: raise ValueError(f'Error while stopping machine {response.json()}') + elif response.status_code!=200:raise ValueError(f'Error while stopping machine {response.json()}') return True @classmethod - def ci(cls, template_id: str): - client = httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60) - machines = client.get('/machines', params=dict(limit=1, name='openllm-ci')).json() + def ci(cls,template_id:str)->Machine: + client = httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60) + machines=client.get('/machines',params=dict(limit=1,name='openllm-ci')).json() if len(machines['items']) == 1: - return cls(id=machines['items'][0]['id'], inner=client) - response = client.post('/machines', json=dict( - name=f'openllm-ci-{datetime.datetime.now().timestamp()}', - machineType='A4000', templateId=template_id, - networkId=os.getenv("PAPERSPACE_NETWORK_ID"), - diskSize=500, region='ny2', publicIpType="dynamic", startOnCreate=True, - )) - if response.status_code != 200: raise ValueError(f'Failed while creating a machine: {response.json()}') - return cls(id=response.json()['data']['id'], inner=client) - def actions(self): return self.metadata["publicIp"] - + return cls(id=machines['items'][0]['id'],inner=client) + response=client.post('/machines',json=dict( + name=f'openllm-ci-{datetime.datetime.now().timestamp()}', machineType='A4000',templateId=template_id, + networkId=os.getenv("PAPERSPACE_NETWORK_ID"), diskSize=500,region='ny2',publicIpType='dynamic',startOnCreate=True)) + if response.status_code!=200:raise ValueError(f'Failed while creating a machine: {response.json()}') + return cls(id=response.json()['data']['id'],inner=client) + def actions(self,ip:bool=False,id:bool=False)->str: + if ip:return self.metadata["publicIp"] + if id:return self.id + raise ValueError('cannot be all false.') def main(): - parser = argparse.ArgumentParser() - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('--start', metavar='ID') - group.add_argument('--stop', metavar='ID') - group.add_argument('--delete', metavar='ID') - group.add_argument('--ci-template', metavar='ID') - args = parser.parse_args() + parser=argparse.ArgumentParser() + group=parser.add_mutually_exclusive_group(required=True) + group.add_argument('--start',metavar='ID') + group.add_argument('--stop',metavar='ID') + group.add_argument('--delete',metavar='ID') + group.add_argument('--ci-template',metavar='ID') + parser.add_argument('--output-ip',action='store_true') + parser.add_argument('--output-id',action='store_true') + args=parser.parse_args() if args.ci_template: - machine = Machine.ci(args.ci_template) - while machine.status != 'ready': time.sleep(5) - print(machine.actions()) + machine=Machine.ci(args.ci_template) + while machine.status!='ready':time.sleep(5) + print(machine.actions(args.output_ip,args.output_id)) machine.close() elif args.delete: with httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60) as client: - response = client.delete(f'/machines/{args.delete}') - if response.status_code != 200: - print('Error while deleting machine %s', response.json()) + response=client.delete(f'/machines/{args.delete}') + if response.status_code!=200: + print('Error while deleting machine', response.json()) return 1 elif args.start: with Machine(id=args.start) as machine: if machine.start(): - while machine.status != 'ready': - print('Waiting for machine "%s" to be ready...', machine.id) + while machine.status!='ready': + print('Waiting for machine to be ready...') time.sleep(5) else: print('Failed to start machine "%s"', machine.id) @@ -80,12 +80,12 @@ def main(): elif args.stop: with Machine(id=args.stop) as machine: if machine.stop(): - while machine.status != 'ready': - print('Waiting for machine "%s" to stop...', machine.id) + while machine.status!='ready': + print('Waiting for machine to stop...') time.sleep(5) else: - print('Failed to stop machine "%s"', machine.id) + print('Failed to stop machine') return 1 return 0 -if __name__ == "__main__": raise SystemExit(main()) +if __name__ == "__main__":raise SystemExit(main())