diff --git a/.github/workflows/binary-releases.yml b/.github/workflows/binary-releases.yml index c5b487f9..d36b3abf 100644 --- a/.github/workflows/binary-releases.yml +++ b/.github/workflows/binary-releases.yml @@ -8,8 +8,6 @@ on: paths-ignore: - '*.md' - 'docs/**' - - 'bazel/**' - - 'typings/**' - 'changelog.d/**' - 'assets/**' - 'openllm-node/**' @@ -19,8 +17,6 @@ on: paths-ignore: - '*.md' - 'docs/**' - - 'bazel/**' - - 'typings/**' - 'changelog.d/**' - 'assets/**' - 'openllm-node/**' @@ -74,10 +70,10 @@ jobs: run: python -m pip install --upgrade build - name: Build run: | - bash local.sh + bash local.sh -e vllm python -m build -sw openllm-python/ - name: Upload artifacts - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: name: binary-artefacts path: openllm-python/dist/* @@ -146,7 +142,7 @@ jobs: - name: Install Hatch run: pip install -U hatch - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@78c6b5541adb5849f5d72d15da722aedb26327ca # ratchet:dtolnay/rust-toolchain@stable + uses: dtolnay/rust-toolchain@d388a4836fcdbde0e50e395dc79a2670ccdef13f # ratchet:dtolnay/rust-toolchain@stable with: targets: ${{ matrix.job.target }} - name: Set up cross compiling @@ -165,7 +161,7 @@ jobs: fi - name: Download Python artifacts if: ${{ !startsWith(github.event.ref, 'refs/tags') }} - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4 with: name: binary-artefacts path: openllm-python/dist @@ -211,14 +207,14 @@ jobs: done - name: Upload staged archive if: runner.os != 'Linux' - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: name: staged-${{ runner.os }} path: openllm-python/packaging/* if-no-files-found: error - name: Upload archive if: runner.os == 'Linux' - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: name: standalone path: openllm-python/packaging/* @@ -245,7 +241,7 @@ jobs: - name: Install PyOxidizer ${{ env.PYOXIDIZER_VERSION }} run: pip install pyoxidizer==${{ env.PYOXIDIZER_VERSION }} - name: Download staged binaries - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4 with: name: staged-${{ runner.os }} path: openllm-python/archives @@ -274,13 +270,13 @@ jobs: mkdir installers mv build/*/release/*/*.{exe,msi} installers - name: Upload binaries - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: name: standalone path: openllm-python/archives/* if-no-files-found: error - name: Upload installers - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: name: installers path: openllm-python/installers/* diff --git a/.github/workflows/build-pypi.yml b/.github/workflows/build-pypi.yml index d3962b2a..a4a6cf3b 100644 --- a/.github/workflows/build-pypi.yml +++ b/.github/workflows/build-pypi.yml @@ -86,9 +86,9 @@ jobs: run: hatch build working-directory: ${{ matrix.directory }} - name: Upload artifacts - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4 with: - name: python-artefacts + name: python-artefacts-${{ matrix.directory }} path: ${{ matrix.directory }}/dist/* if-no-files-found: error check-download-artefacts: @@ -98,9 +98,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Download Python artifacts - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4 with: - name: python-artefacts + pattern: python-artefacts-* + merge-multiple: true path: dist - name: dry ls run: ls -rthlaR @@ -110,13 +111,13 @@ jobs: runs-on: ubuntu-latest permissions: id-token: write - # needs: [pure-wheels-sdist, mypyc] needs: [pure-wheels-sdist] steps: - name: Download Python artifacts - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4 with: - name: python-artefacts + pattern: python-artefacts-* + merge-multiple: true path: dist - name: Publish nightly wheels to test.pypi.org uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # ratchet:pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index f4d9e72a..00000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,222 +0,0 @@ -name: Build and push OpenLLM base container -on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - '*' - paths: - - 'openllm-python/src/openllm/**' - - 'openllm-python/src/openllm_cli/**' - - 'openllm-core/src/openllm_core/**' - - 'openllm-client/src/openllm_client/**' - pull_request: - branches: - - 'main' - paths: - - 'openllm-python/src/openllm/**' - - 'openllm-python/src/openllm_cli/**' - - 'openllm-core/src/openllm_core/**' - - 'openllm-client/src/openllm_client/**' - types: [labeled, opened, synchronize, reopened] - workflow_call: - inputs: - tags: - required: true - type: string -env: - LINES: 120 - COLUMNS: 120 - OPENLLM_DO_NOT_TRACK: True - PYTHONUNBUFFERED: '1' - AWS_REGION: us-west-2 -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: true -jobs: - get_commit_message: - name: Get commit message - runs-on: ubuntu-latest - if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository - outputs: - message: ${{ steps.commit_message.outputs.message }} - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 - # Gets the correct commit message for pull request - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Get commit message - id: commit_message - run: | - set -xe - COMMIT_MSG=$(git log --no-merges -1 --oneline) - echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT - echo github.ref ${{ github.ref }} - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - needs: get_commit_message - if: >- - contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main'))) - env: - EC2_INSTANCE_TYPE: t3.2xlarge - EC2_AMI_ID: ami-089dafe9af191a0fd - EC2_SUBNET_ID: subnet-0ca63188fe98788c1,subnet-05997205433b249d0,subnet-07ef5d3e974275fed,subnet-0161ef0151089bb0b - EC2_SECURITY_GROUP: sg-051366641bf2b8049 - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # ratchet:aws-actions/configure-aws-credentials@v4.0.2 - with: - aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 Runner - id: start-ec2-runner - uses: aarnphm/ec2-github-runner@main # ratchet:exclude - with: - mode: start - github-token: ${{ secrets.OPENLLM_PAT }} - ec2-region: ${{ env.AWS_REGION }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - build-and-push-image: - name: Build and push OpenLLM base image - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: start-runner - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 - with: - fetch-depth: 0 - ref: '${{ inputs.tags }}' - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 # ratchet:rlespinasse/github-slug-action@v4.5.0 - - name: Set up QEMU - uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # ratchet:docker/setup-qemu-action@v3.0.0 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # ratchet:docker/setup-buildx-action@v3.3.0 - with: - install: true - driver-opts: | - image=moby/buildkit:master - network=host - - name: Install cosign - if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # ratchet:sigstore/cosign-installer@v3.5.0 - with: - cosign-release: 'v2.1.1' - - name: Login to GitHub Container Registry - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # ratchet:docker/login-action@v3.1.0 - if: github.event_name != 'pull_request' - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Extract metadata tags and labels for main, release or tag - if: github.event_name != 'pull_request' - id: meta - uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # ratchet:docker/metadata-action@v5.5.1 - with: - flavor: | - latest=auto - images: | - ghcr.io/bentoml/openllm - tags: | - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} - labels: | - maintainer=aarnphm - org.opencontainers.image.source="https://github.com/bentoml/OpenLLM" - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # ratchet:docker/build-push-action@v5.3.0 - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} - with: - context: . - file: Dockerfile - push: true - platforms: 'linux/amd64' - build-args: | - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} - labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} - # TODO: Once https://github.com/aws/containers-roadmap/issues/876 is supported with OCI 1.1 - # then move back to saving cache within the public repo. For now we will save the cache manifest within our internal S3 buckets. - # NOTE: the region of the S3 on prod is us-east-1, where the EC2 machine is at us-west-2 - cache-from: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6 - # @aarnphm: max is fine here, since we didn't do any custom code yet, so it is ok to cache every layer for optimal build time - # We also ignore-error for now, just upload anything to the blob storage - cache-to: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6,mode=max,compression=zstd,ignore-error=true - - name: Sign the released image - if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: 'true' - run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} - - name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph - uses: aquasecurity/trivy-action@b2933f565dbc598b29947660e66259e3c7bc8561 # ratchet:aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'github' - output: 'dependency-results.sbom.json' - github-pat: ${{ secrets.GITHUB_TOKEN }} - scanners: 'vuln' - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@b2933f565dbc598b29947660e66259e3c7bc8561 # ratchet:aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'sarif' - output: 'trivy-results.sarif' - severity: 'CRITICAL' - scanners: 'vuln' - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # ratchet:github/codeql-action/upload-sarif@v3.25.5 - if: ${{ github.event_name != 'pull_request' }} - with: - sarif_file: 'trivy-results.sarif' - # TODO: Add snapshot tests here. - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - build-and-push-image - - get_commit_message - runs-on: ubuntu-latest - if: >- - (contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))) && always() - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # ratchet:aws-actions/configure-aws-credentials@v4.0.2 - with: - aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.OPENLLM_PAT }} - ec2-region: ${{ env.AWS_REGION }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f03fcac8..e23a5dda 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,31 +1,12 @@ -name: Continuous Integration +name: CI on: workflow_call: push: branches: [main] - paths-ignore: - - 'docs/**' - - 'bazel/**' - - 'typings/**' - - '*.md' - - 'changelog.d/**' - - 'assets/**' pull_request: branches: [main] - paths-ignore: - - 'docs/**' - - 'bazel/**' - - 'typings/**' - - '*.md' - - 'changelog.d/**' - - 'assets/**' env: - LINES: 120 - COLUMNS: 120 - OPENLLM_DO_NOT_TRACK: True - PYTHONUNBUFFERED: '1' - HATCH_VERBOSE: 2 -# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun + PAPERSPACE_API_KEY: ${{secrets.PAPERSPACE_API_KEY}} defaults: run: shell: bash --noprofile --norc -exo pipefail {0} @@ -34,111 +15,35 @@ concurrency: cancel-in-progress: true jobs: tests: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest if: ${{ github.event_name == 'pull_request' || github.event_name == 'push'|| github.event_name == 'workflow_call' }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ['3.9', '3.12'] - name: tests (${{ matrix.python-version }}.${{ matrix.os }}) + name: General API tests steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4 with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha }} - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 + python-version-file: '.python-version-default' + - name: setup tooling + run: | + python -m pip install uv + uv pip install --system httpx + - name: startup machine + run: python tools/machines.py --start ${{ secrets.PAPERSPACE_MACHINE_ID }} || true + - name: executing remote ssh commands using password + uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3 with: - bentoml-version: 'main' - python-version: ${{ matrix.python-version }} - # - name: Run tests - # run: hatch run tests:python - # - name: Disambiguate coverage filename - # run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}" - # - name: Upload coverage data - # uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 - # with: - # name: coverage-data - # path: .coverage.* - # coverage: - # name: report-coverage - # runs-on: ubuntu-latest - # if: false - # needs: tests - # steps: - # - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1 - # with: - # fetch-depth: 0 - # ref: ${{ github.event.pull_request.head.sha }} - # - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - # with: - # bentoml-version: 'main' - # python-version-file: .python-version-default - # - name: Download coverage data - # uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 - # with: - # name: coverage-data - # - name: Combine coverage data - # run: hatch run coverage:combine - # - name: Export coverage reports - # run: | - # hatch run coverage:report-xml openllm-python - # hatch run coverage:report-uncovered-html openllm-python - # - name: Upload uncovered HTML report - # uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3 - # with: - # name: uncovered-html-report - # path: htmlcov - # - name: Generate coverage summary - # run: hatch run coverage:generate-summary - # - name: Write coverage summary report - # if: github.event_name == 'pull_request' - # run: hatch run coverage:write-summary-report - # - name: Update coverage pull request comment - # if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork - # uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # ratchet:marocchino/sticky-pull-request-comment@v2 - # with: - # path: coverage-report.md - # cli-benchmark: - # name: Check for CLI responsiveness - # runs-on: ubuntu-latest - # env: - # HYPERFINE_VERSION: '1.12.0' - # steps: - # - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1 - # with: - # fetch-depth: 0 - # - name: Install hyperfine - # run: | - # wget https://github.com/sharkdp/hyperfine/releases/download/v${HYPERFINE_VERSION}/hyperfine_${HYPERFINE_VERSION}_amd64.deb - # sudo dpkg -i hyperfine_${HYPERFINE_VERSION}_amd64.deb - # - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - # with: - # bentoml-version: 'main' - # python-version-file: .python-version-default - # - name: Install self - # run: bash local.sh - # - name: Speed - # run: hyperfine -m 100 --warmup 10 openllm - # brew-dry-run: - # name: Running dry-run tests for brew - # runs-on: macos-latest - # steps: - # - name: Install tap and dry-run - # run: | - # brew tap bentoml/openllm https://github.com/bentoml/openllm - # brew install openllm - # openllm --help - # openllm models --show-available - # evergreen: # https://github.com/marketplace/actions/alls-green#why - # if: always() - # needs: - # - tests - # # - cli-benchmark - # # - brew-dry-run - # runs-on: ubuntu-latest - # steps: - # - name: Decide whether the needed jobs succeeded or failed - # uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 - # with: - # jobs: ${{ toJSON(needs) }} + host: ${{secrets.PAPERSPACE_HOST}} + username: ${{secrets.PAPERSPACE_USERNAME}} + key: ${{secrets.PAPERSPACE_SSH_KEY}} + port: ${{secrets.PAPERSPACE_PORT}} + script: bash ci.sh --pr ${{github.event.number}} + evergreen: # https://github.com/marketplace/actions/alls-green#why + if: always() + needs: + - tests + runs-on: ubuntu-latest + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml deleted file mode 100644 index d826a593..00000000 --- a/.github/workflows/cleanup.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Cleanup PR cache -on: - pull_request: - types: - - closed -jobs: - cleanup: - runs-on: ubuntu-latest - if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository - steps: - - name: Check out code - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 - - name: Cleanup - run: "gh extension install actions/gh-actions-cache\n\nREPO=${{ github.repository }}\nBRANCH=\"refs/pull/${{ github.event.pull_request.number }}/merge\"\n\necho \"Fetching list of cache key\"\ncacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )\n\n## Setting this to not fail the workflow while deleting cache keys. \nset +e\necho \"Deleting caches...\"\nfor cacheKey in $cacheKeysForPR\ndo\n gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm\ndone\necho \"Done\"\n" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml index 179806d6..b3d0b932 100644 --- a/.github/workflows/create-releases.yml +++ b/.github/workflows/create-releases.yml @@ -102,7 +102,6 @@ jobs: needs: - release - publish-python - - binary-distribution runs-on: ubuntu-latest permissions: contents: write @@ -155,62 +154,12 @@ jobs: popd &>/dev/null git add package.json openllm-node/package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]" git push origin HEAD:main - binary-distribution: - if: github.repository_owner == 'bentoml' - needs: build-pypi - name: Create binary/wheels distribution - uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main # ratchet:exclude release-notes: if: github.repository_owner == 'bentoml' needs: - release - publish-python - - binary-distribution name: Create release notes and setup for next cycle uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@main # ratchet:exclude with: tags: ${{ needs.release.outputs.version }} - bump-homebrew-tap: - needs: - - release-notes - - prepare-next-dev-cycle - - release - runs-on: ubuntu-latest - permissions: - contents: write - id-token: write - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 - with: - fetch-depth: 0 - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - with: - bentoml-version: 'main' - python-version-file: .python-version-default - - name: Install jq and curl - run: sudo apt-get install -y jq curl - - name: Import bot's GPG key for signing commits - id: import-gpg-key - uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6 - with: - gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} - passphrase: ${{ secrets.GPG_PASSPHRASE }} - git_config_global: true - git_user_signingkey: true - git_commit_gpgsign: true - git_tag_gpgsign: true - - name: Update current formula - env: - GIT_AUTHOR_NAME: ${{ steps.import-gpg-key.outputs.name }} - GIT_AUTHOR_EMAIL: ${{ steps.import-gpg-key.outputs.email }} - GIT_COMMITTER_NAME: ${{ steps.import-gpg-key.outputs.name }} - GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }} - GITHUB_TOKEN: ${{ secrets.HOMEBREW_GITHUB_TOKEN }} - run: | - git pull --autostash --no-edit --gpg-sign --ff origin main - SEMVER="${{ needs.release.outputs.version }}" - SEMVER="${SEMVER#v}" - pip install fs jinja2 ghapi plumbum - ./tools/update-brew-tap.py - git add Formula && git commit -S -sm "infra: bump to homebrew tap release to ${SEMVER} [generated] [skip ci]" - git push origin HEAD:main diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml deleted file mode 100644 index ed728df5..00000000 --- a/.github/workflows/cron.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Cron update -on: - workflow_dispatch: - schedule: - # ┌───────────── minute (0 - 59) - # │ ┌───────────── hour (0 - 23) - # │ │ ┌───────────── day of the month (1 - 31) - # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) - # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - # │ │ │ │ │ - - cron: '42 2 * * SUN-WED' -# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun -defaults: - run: - shell: bash --noprofile --norc -exo pipefail {0} -concurrency: - group: cron-${{ github.head_ref || github.run_id }} - cancel-in-progress: true -jobs: - update-actions: - runs-on: 'ubuntu-latest' - name: Ratchet update - if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository - env: - ACTIONS_TOKEN: ${{ secrets.OPENLLM_PAT }} - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6 - with: - fetch-depth: 0 - - name: Install jq and curl - run: sudo apt-get install -y jq curl - - name: Import bot's GPG key for signing commits - id: import-gpg-key - uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6 - with: - gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} - passphrase: ${{ secrets.GPG_PASSPHRASE }} - git_config_global: true - git_user_signingkey: true - git_commit_gpgsign: true - - name: Locking dependencies - run: bash ./tools/update-actions.sh - - name: Create a PR - uses: peter-evans/create-pull-request@6d6857d36972b65feb161a90e484f2984215f83e # ratchet:peter-evans/create-pull-request@v6.0.5 - env: - GIT_AUTHOR_NAME: ${{ steps.import-gpg-key.outputs.name }} - GIT_AUTHOR_EMAIL: ${{ steps.import-gpg-key.outputs.email }} - GIT_COMMITTER_NAME: ${{ steps.import-gpg-key.outputs.name }} - GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }} - BRANCH_NAME: cron/ratchet - with: - title: 'ci: update lock actions [generated]' - commit-message: 'cron: ratchet update' - branch-suffix: timestamp - signoff: true - delete-branch: true - reviewers: aarnphm - author: ${{ env.GIT_AUTHOR_NAME }} <${{ env.GIT_AUTHOR_EMAIL }}> - branch: ${{ env.BRANCH_NAME }} diff --git a/.github/workflows/release-notes.yml b/.github/workflows/release-notes.yml index ca5cbec0..351a1bad 100644 --- a/.github/workflows/release-notes.yml +++ b/.github/workflows/release-notes.yml @@ -35,46 +35,11 @@ jobs: - name: Create release notes run: ./.github/actions/create_release_and_archive.sh ${{ inputs.tags }} - name: Download Python artifacts - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4 with: - name: python-artefacts + pattern: python-artefacts-* + merge-multiple: true path: dist - # - name: Download Linux x86_64 compiled artifacts - # uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3 - # with: - # name: linux-x86_64-mypyc-wheels - # path: dist - # - name: Download MacOS x86_64 compiled artifacts - # uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3 - # with: - # name: macos-x86_64-mypyc-wheels - # path: dist - # - name: Download MacOS arm64 compiled artifacts - # uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3 - # with: - # name: macos-arm64-mypyc-wheels - # path: dist - # - name: Download MacOS universal2 compiled artifacts - # uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3 - # with: - # name: macos-universal2-mypyc-wheels - # path: dist - - name: Download binaries - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 - with: - name: standalone - path: archives - - name: Download standalone MacOS - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 - with: - name: staged-macOS - path: archives - # TODO: Uncomment me when I decided to bring back Windows support - # - name: Download installers - # uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3 - # with: - # name: installers - # path: installers - name: Create release uses: softprops/action-gh-release@69320dbe05506a9a39fc8ae11030b214ec2d1f87 # ratchet:softprops/action-gh-release@v2.0.5 with: @@ -83,7 +48,7 @@ jobs: body_path: release_notes.txt fail_on_unmatched_files: true tag_name: '${{ inputs.tags }}' - # TODO: add instasllers/* once windows support is back + # TODO: add installers/* archives/* once windows support is back files: |- dist/* archives/* diff --git a/Formula/README.md b/Formula/README.md deleted file mode 100644 index b72472ca..00000000 --- a/Formula/README.md +++ /dev/null @@ -1,13 +0,0 @@ -This directory hosts the brew tap for OpenLLM - -```bash -brew tap bentoml/openllm https://github.com/bentoml/openllm - -brew install openllm -``` - -> [!NOTE] -> This will install the generated release binary from GitHub release. If any releases doesn't include the binary, then you will need to install from pip - -> [!IMPORTANT] -> If you want to install from source, please refer to [Development Guide](https://github.com/bentoml/openllm/tree/main/DEVELOPMENT.md). diff --git a/Formula/openllm.rb b/Formula/openllm.rb deleted file mode 100644 index cd5a8416..00000000 --- a/Formula/openllm.rb +++ /dev/null @@ -1,47 +0,0 @@ -# Generated by tools/update-brew-tap.py. DO NOT EDIT! -# Please refers to the original template file Formula/openllm.rb.j2 -# vim: set ft=ruby: -class Openllm < Formula - desc "OpenLLM: Operating LLMs in production" - homepage "https://github.com/bentoml/OpenLLM" - version "0.4.44" - license "Apache-2.0" - head "https://github.com/bentoml/OpenLLM, branch: main" - url "https://github.com/bentoml/OpenLLM/archive/v0.4.44.tar.gz" - sha256 "5158eee3c4b771d3cabd8827439c148d3d710a9b63c7ad98a58a607f587d3acc" - - on_linux do - url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-x86_64-unknown-linux-musl.tar.gz" - sha256 "d56d14b032ffdb0de0ea2a7575f6039726987adccc74b634b0e943a73679232a" - end - on_macos do - on_arm do - url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-aarch64-apple-darwin.tar.gz" - sha256 "ef57c27ab684179c6cccc02cd60506ea6a721908b56e7f483dda0b9931c74cdd" - end - on_intel do - url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-x86_64-apple-darwin.tar.gz" - sha256 "65c5f176362f00cf29187c6c2062141abfc8165e63a89845038d2921715d270c" - end - end - - def install - on_linux do - bin.install "openllm-0.4.44-x86_64-unknown-linux-musl" => "openllm" - end - on_macos do - on_arm do - bin.install "openllm-0.4.44-aarch64-apple-darwin" => "openllm" - end - on_intel do - bin.install "openllm-0.4.44-x86_64-apple-darwin" => "openllm" - end - end - ohai "To get started, run: 'openllm --help'" - ohai "To see supported models, run: 'openllm models'" - end - - test do - shell_output "#{bin}/openllm --version" - end -end diff --git a/Formula/openllm.rb.j2 b/Formula/openllm.rb.j2 deleted file mode 100644 index db99df48..00000000 --- a/Formula/openllm.rb.j2 +++ /dev/null @@ -1,47 +0,0 @@ -# Generated by {{ __cmd__ }}. DO NOT EDIT! -# Please refers to the original template file {{ __template_file__ }} -# vim: set ft=ruby: -class Openllm < Formula - desc "{{ name }}: {{ description }}" - homepage "{{ html_url }}" - version "{{ __tag__|replace('v', '') }}" - license "{{ license["spdx_id"] }}" - head "{{ html_url }}, branch: {{ default_branch }}" - url "{{ determine_release_url(svn_url, __tag__, 'archive') }}" - sha256 "{{ shadict['archive'] }}" - - on_linux do - url "{{ determine_release_url(svn_url, __tag__, 'linux_intel') }}" - sha256 "{{ shadict['linux_intel'] }}" - end - on_macos do - on_arm do - url "{{ determine_release_url(svn_url, __tag__, 'macos_arm') }}" - sha256 "{{ shadict['macos_arm'] }}" - end - on_intel do - url "{{ determine_release_url(svn_url, __tag__, 'macos_intel') }}" - sha256 "{{ shadict['macos_intel'] }}" - end - end - - def install - on_linux do - bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['linux_intel'] }}" => "openllm" - end - on_macos do - on_arm do - bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['macos_arm'] }}" => "openllm" - end - on_intel do - bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['macos_intel'] }}" => "openllm" - end - end - ohai "To get started, run: 'openllm --help'" - ohai "To see supported models, run: 'openllm models'" - end - - test do - shell_output "#{bin}/openllm --version" - end -end diff --git a/openllm-client/src/openllm_client/_http.py b/openllm-client/src/openllm_client/_http.py index 1142d95f..03165edf 100644 --- a/openllm-client/src/openllm_client/_http.py +++ b/openllm-client/src/openllm_client/_http.py @@ -86,6 +86,12 @@ class HTTPClient(Client): else: llm_config = {**self._config, **attrs} + if stop is not None: + if isinstance(stop, str): + stop = [stop] + else: + stop = list(stop) + return self._post( f'/{self._api_version}/generate', response_cls=Response, @@ -110,6 +116,13 @@ class HTTPClient(Client): llm_config = {**self._config, **llm_config, **attrs} else: llm_config = {**self._config, **attrs} + + if stop is not None: + if isinstance(stop, str): + stop = [stop] + else: + stop = list(stop) + return self._post( f'/{self._api_version}/generate_stream', response_cls=Response, @@ -181,12 +194,18 @@ class AsyncHTTPClient(AsyncClient, pydantic.BaseModel): timeout = self.timeout if verify is None: verify = self._verify # XXX: need to support this again - _metadata = await self._metadata _config = await self._config if llm_config is not None: llm_config = {**_config, **llm_config, **attrs} else: llm_config = {**_config, **attrs} + + if stop is not None: + if isinstance(stop, str): + stop = [stop] + else: + stop = list(stop) + return await self._post( f'/{self._api_version}/generate', response_cls=Response, @@ -209,13 +228,18 @@ class AsyncHTTPClient(AsyncClient, pydantic.BaseModel): timeout = self.timeout if verify is None: verify = self._verify # XXX: need to support this again - _metadata = await self._metadata _config = await self._config if llm_config is not None: llm_config = {**_config, **llm_config, **attrs} else: llm_config = {**_config, **attrs} + if stop is not None: + if isinstance(stop, str): + stop = [stop] + else: + stop = list(stop) + async for response_chunk in await self._post( f'/{self._api_version}/generate_stream', response_cls=Response, diff --git a/openllm-core/src/openllm_core/_schemas.py b/openllm-core/src/openllm_core/_schemas.py index 33a09080..85e9a0ac 100644 --- a/openllm-core/src/openllm_core/_schemas.py +++ b/openllm-core/src/openllm_core/_schemas.py @@ -52,7 +52,7 @@ class GenerationInput(pydantic.BaseModel): raise RuntimeError('This class is not meant to be used directly. Use "from_config" instead') super().__init__(**data) - @pydantic.field_validator('stop') + @pydantic.field_validator('stop', mode='before') @classmethod def stop_validator(cls, data: str | list[str] | t.Iterable[str] | None) -> list[str] | None: if data is None: diff --git a/openllm-python/tests/conftest.py b/openllm-python/tests/conftest.py deleted file mode 100644 index aaa3f895..00000000 --- a/openllm-python/tests/conftest.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import annotations - -import pytest, typing as t - - -@pytest.fixture( - scope='function', - name='model_id', - params={ - 'meta-llama/Meta-Llama-3-8B-Instruct', - 'casperhansen/llama-3-70b-instruct-awq', - 'TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ', - }, -) -def fixture_model_id(request) -> t.Generator[str, None, None]: - yield request.param diff --git a/openllm-python/tests/regression_test.py b/openllm-python/tests/regression_test.py index cd37c77a..e1e1c4d7 100644 --- a/openllm-python/tests/regression_test.py +++ b/openllm-python/tests/regression_test.py @@ -1,26 +1,27 @@ from __future__ import annotations -import pytest, subprocess, sys, openllm, bentoml, asyncio -from openai import AsyncOpenAI +import pytest, subprocess, sys, asyncio, openllm, bentoml +from openai import OpenAI from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam SERVER_PORT = 53822 @pytest.mark.asyncio -async def test_openai_compatible(model_id: str): +async def test_openai_compatible(): + model_id = 'meta-llama/Meta-Llama-3-8B-Instruct' server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)]) - await asyncio.sleep(5) - with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client: + await asyncio.sleep(10) + with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client: assert client.is_ready(30) try: - client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1') - serve_model = (await client.models.list()).data[0].id + client = OpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1') + serve_model = client.models.list().data[0].id assert serve_model == openllm.utils.normalise_model_name(model_id) - streamable = await client.chat.completions.create( + streamable = client.chat.completions.create( model=serve_model, - max_tokens=512, + max_tokens=128, stream=False, messages=[ ChatCompletionSystemMessageParam( @@ -37,18 +38,27 @@ async def test_openai_compatible(model_id: str): @pytest.mark.asyncio -async def test_generate_endpoint(model_id: str): - server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)]) - await asyncio.sleep(5) +async def test_generate_endpoint(): + server = subprocess.Popen([ + sys.executable, + '-m', + 'openllm', + 'start', + 'microsoft/Phi-3-mini-4k-instruct', + '--trust-remote-code', + '--port', + str(SERVER_PORT), + ]) + await asyncio.sleep(10) - with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client: + with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client: assert client.is_ready(30) try: - client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1') - assert await client.health() + client = openllm.HTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1') + assert client.health() - response = await client.generate( + response = client.generate( 'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2} ) assert response is not None diff --git a/pyproject.toml b/pyproject.toml index dbd4bde2..fa72768a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,8 +163,8 @@ testpaths = ["openllm-python/tests"] [tool.coverage.paths] openllm = [ - "openllm-python/src/openllm", - "*/openllm-python/src/openllm", + "openllm-python/src/_openllm_tiny", + "*/openllm-python/src/_openllm_tiny", "openllm-client/src/openllm_client", "*/openllm-client/src/openllm_client", "openllm-core/src/openllm_core", @@ -174,13 +174,11 @@ openllm = [ branch = true omit = [ "__pypackages__/*", - "openllm-python/src/openllm/_version.py", - "openllm-python/src/openllm/__init__.py", - "openllm-python/src/openllm/__main__.py", "openllm-core/src/openllm_core/_typing_compat.py", - "openllm-client/src/openllm_client/pb/**", + "openllm-core/src/openllm_core/_version.py", + "openllm-client/src/openllm_client/_version.py", ] -source_pkgs = ["openllm", "openllm_core", "openllm_client"] +source_pkgs = ["_openllm_tiny", "openllm_core", "openllm_client"] [tool.coverage.report] exclude_lines = [ "no cov", @@ -204,11 +202,9 @@ exclude_lines = [ ] omit = [ "__pypackages__/*", - "openllm-python/src/openllm/_version.py", - "openllm-python/src/openllm/__init__.py", - "openllm-python/src/openllm/__main__.py", "openllm-core/src/openllm_core/_typing_compat.py", - "openllm-client/src/openllm_client/pb/**", + "openllm-core/src/openllm_core/_version.py", + "openllm-client/src/openllm_client/_version.py", ] precision = 2 show_missing = true diff --git a/tools/machines.py b/tools/machines.py new file mode 100644 index 00000000..02d60539 --- /dev/null +++ b/tools/machines.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import httpx,os,dataclasses,logging,time,argparse,typing as t + +if (ENV := os.getenv("PAPERSPACE_API_KEY")) is None: raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"') +HEADERS = httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'}) +API_URL = 'https://api.paperspace.com/v1' + +logging.basicConfig(level=logging.ERROR) +logger = logging.getLogger(__name__) + +@dataclasses.dataclass +class Machine: + id: str + inner: httpx.Client = dataclasses.field(default_factory=lambda: httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60), repr=False) + + def close(self): self.inner.close() + def __del__(self): self.close() + def __enter__(self): return self + def __exit__(self, *_: t.Any) -> None: self.close() + @property + def metadata(self) -> dict[str, t.Any]: return self.inner.get(f'/machines/{self.id}').json() + @property + def status(self) -> t.Literal['off', 'ready', 'stopping', 'starting']: return self.metadata['state'] + def start(self) -> bool: + response = self.inner.patch(f'/machines/{self.id}/start') + if response.status_code == 400 or self.status == 'ready': + logger.error('machine is already running') + return False + elif response.status_code != 200: + logger.error('Error while starting machine "%s": %s', self.id, response.json()) + return True + def stop(self) -> bool: + response = self.inner.patch(f'/machines/{self.id}/stop') + if response.status_code == 400 or self.status == 'off': + logger.error('machine is already off') + return False + elif response.status_code != 200: + logger.error('Error while stopping machine "%s": %s', self.id, response.json()) + return True + +def main(): + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--start', metavar='ID') + group.add_argument('--stop', metavar='ID') + args = parser.parse_args() + + if args.start: + with Machine(id=args.start) as machine: + if machine.start(): + while machine.status != 'ready': + logger.info('Waiting for machine "%s" to be ready...', machine.id) + time.sleep(5) + else: + logger.error('Failed to start machine "%s"', machine.id) + return 1 + elif args.stop: + with Machine(id=args.stop) as machine: + if machine.stop(): + while machine.status != 'ready': + logger.info('Waiting for machine "%s" to stop...', machine.id) + time.sleep(5) + else: + logger.error('Failed to stopmachine "%s"', machine.id) + return 1 + return 0 + +if __name__ == "__main__": raise SystemExit(main()) diff --git a/tools/update-brew-tap.py b/tools/update-brew-tap.py deleted file mode 100755 index 71f99fbd..00000000 --- a/tools/update-brew-tap.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations -import os, typing as t, fs -from pathlib import Path -from ghapi.all import GhApi -from jinja2 import Environment -from jinja2.loaders import FileSystemLoader -from plumbum.cmd import curl, cut, shasum - -if t.TYPE_CHECKING: - from plumbum.commands.base import Pipeline - -# get git root from this file -ROOT = Path(__file__).parent.parent - -_OWNER = 'bentoml' -_REPO = 'openllm' - -_gz_strategies: dict[t.Literal['macos_arm', 'macos_intel', 'linux_intel'], str] = { - 'macos_arm': 'aarch64-apple-darwin', - 'macos_intel': 'x86_64-apple-darwin', - 'linux_intel': 'x86_64-unknown-linux-musl', -} - - -def determine_release_url( - svn_url: str, tag: str, target: t.Literal['macos_arm', 'macos_intel', 'linux_intel', 'archive'] -) -> str: - if target == 'archive': - return f'{svn_url}/archive/{tag}.tar.gz' - return f"{svn_url}/releases/download/{tag}/openllm-{tag.replace('v', '')}-{_gz_strategies[target]}.tar.gz" - - -# curl -sSL /archive/refs/tags/.tar.gz | shasum -a256 | cut -d'' -f1 -def get_release_hash_command(svn_url: str, tag: str) -> Pipeline: - return curl['-sSL', svn_url] | shasum['-a256'] | cut['-d', ' ', '-f1'] - - -def main() -> int: - api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False) - _info = api.repos.get() - release_tag = api.repos.get_latest_release().name - - shadict: dict[str, t.Any] = { - k: get_release_hash_command(determine_release_url(_info.svn_url, release_tag, k), release_tag)().strip() - for k in _gz_strategies - } - shadict['archive'] = get_release_hash_command( - determine_release_url(_info.svn_url, release_tag, 'archive'), release_tag - )().strip() - - ENVIRONMENT = Environment( - extensions=['jinja2.ext.do', 'jinja2.ext.loopcontrols', 'jinja2.ext.debug'], - trim_blocks=True, - lstrip_blocks=True, - loader=FileSystemLoader((ROOT / 'Formula').__fspath__(), followlinks=True), - ) - template_file = 'openllm.rb.j2' - with (ROOT / 'Formula' / 'openllm.rb').open('w') as f: - f.write( - ENVIRONMENT.get_template(template_file, globals={'determine_release_url': determine_release_url}).render( - shadict=shadict, - __tag__=release_tag, - __cmd__=fs.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__)), - __template_file__=fs.path.join('Formula', template_file), - __gz_extension__=_gz_strategies, - **_info, - ) - ) - f.write('\n') - return 0 - - -if __name__ == '__main__': - raise SystemExit(main())