mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-23 18:27:18 -05:00
feat(ci): running CI on paperspace (#998)
* chore: update tiny script Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * feat(ci): running on paperspace machines Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update models and increase timeout readiness Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: schema validation for inputs and update client supporting stop Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update coverage config Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: remove some non-essentials Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update locks Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
22
.github/workflows/binary-releases.yml
vendored
22
.github/workflows/binary-releases.yml
vendored
@@ -8,8 +8,6 @@ on:
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'docs/**'
|
||||
- 'bazel/**'
|
||||
- 'typings/**'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
- 'openllm-node/**'
|
||||
@@ -19,8 +17,6 @@ on:
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'docs/**'
|
||||
- 'bazel/**'
|
||||
- 'typings/**'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
- 'openllm-node/**'
|
||||
@@ -74,10 +70,10 @@ jobs:
|
||||
run: python -m pip install --upgrade build
|
||||
- name: Build
|
||||
run: |
|
||||
bash local.sh
|
||||
bash local.sh -e vllm
|
||||
python -m build -sw openllm-python/
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-artefacts
|
||||
path: openllm-python/dist/*
|
||||
@@ -146,7 +142,7 @@ jobs:
|
||||
- name: Install Hatch
|
||||
run: pip install -U hatch
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@78c6b5541adb5849f5d72d15da722aedb26327ca # ratchet:dtolnay/rust-toolchain@stable
|
||||
uses: dtolnay/rust-toolchain@d388a4836fcdbde0e50e395dc79a2670ccdef13f # ratchet:dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: ${{ matrix.job.target }}
|
||||
- name: Set up cross compiling
|
||||
@@ -165,7 +161,7 @@ jobs:
|
||||
fi
|
||||
- name: Download Python artifacts
|
||||
if: ${{ !startsWith(github.event.ref, 'refs/tags') }}
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: binary-artefacts
|
||||
path: openllm-python/dist
|
||||
@@ -211,14 +207,14 @@ jobs:
|
||||
done
|
||||
- name: Upload staged archive
|
||||
if: runner.os != 'Linux'
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: staged-${{ runner.os }}
|
||||
path: openllm-python/packaging/*
|
||||
if-no-files-found: error
|
||||
- name: Upload archive
|
||||
if: runner.os == 'Linux'
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: standalone
|
||||
path: openllm-python/packaging/*
|
||||
@@ -245,7 +241,7 @@ jobs:
|
||||
- name: Install PyOxidizer ${{ env.PYOXIDIZER_VERSION }}
|
||||
run: pip install pyoxidizer==${{ env.PYOXIDIZER_VERSION }}
|
||||
- name: Download staged binaries
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: staged-${{ runner.os }}
|
||||
path: openllm-python/archives
|
||||
@@ -274,13 +270,13 @@ jobs:
|
||||
mkdir installers
|
||||
mv build/*/release/*/*.{exe,msi} installers
|
||||
- name: Upload binaries
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: standalone
|
||||
path: openllm-python/archives/*
|
||||
if-no-files-found: error
|
||||
- name: Upload installers
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: installers
|
||||
path: openllm-python/installers/*
|
||||
|
||||
15
.github/workflows/build-pypi.yml
vendored
15
.github/workflows/build-pypi.yml
vendored
@@ -86,9 +86,9 @@ jobs:
|
||||
run: hatch build
|
||||
working-directory: ${{ matrix.directory }}
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-artefacts
|
||||
name: python-artefacts-${{ matrix.directory }}
|
||||
path: ${{ matrix.directory }}/dist/*
|
||||
if-no-files-found: error
|
||||
check-download-artefacts:
|
||||
@@ -98,9 +98,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: python-artefacts
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: dry ls
|
||||
run: ls -rthlaR
|
||||
@@ -110,13 +111,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
# needs: [pure-wheels-sdist, mypyc]
|
||||
needs: [pure-wheels-sdist]
|
||||
steps:
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: python-artefacts
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: Publish nightly wheels to test.pypi.org
|
||||
uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # ratchet:pypa/gh-action-pypi-publish@release/v1
|
||||
|
||||
222
.github/workflows/build.yml
vendored
222
.github/workflows/build.yml
vendored
@@ -1,222 +0,0 @@
|
||||
name: Build and push OpenLLM base container
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
tags:
|
||||
- '*'
|
||||
paths:
|
||||
- 'openllm-python/src/openllm/**'
|
||||
- 'openllm-python/src/openllm_cli/**'
|
||||
- 'openllm-core/src/openllm_core/**'
|
||||
- 'openllm-client/src/openllm_client/**'
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
paths:
|
||||
- 'openllm-python/src/openllm/**'
|
||||
- 'openllm-python/src/openllm_cli/**'
|
||||
- 'openllm-core/src/openllm_core/**'
|
||||
- 'openllm-client/src/openllm_client/**'
|
||||
types: [labeled, opened, synchronize, reopened]
|
||||
workflow_call:
|
||||
inputs:
|
||||
tags:
|
||||
required: true
|
||||
type: string
|
||||
env:
|
||||
LINES: 120
|
||||
COLUMNS: 120
|
||||
OPENLLM_DO_NOT_TRACK: True
|
||||
PYTHONUNBUFFERED: '1'
|
||||
AWS_REGION: us-west-2
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
get_commit_message:
|
||||
name: Get commit message
|
||||
runs-on: ubuntu-latest
|
||||
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
|
||||
outputs:
|
||||
message: ${{ steps.commit_message.outputs.message }}
|
||||
steps:
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
# Gets the correct commit message for pull request
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Get commit message
|
||||
id: commit_message
|
||||
run: |
|
||||
set -xe
|
||||
COMMIT_MSG=$(git log --no-merges -1 --oneline)
|
||||
echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT
|
||||
echo github.ref ${{ github.ref }}
|
||||
start-runner:
|
||||
name: Start self-hosted EC2 runner
|
||||
runs-on: ubuntu-latest
|
||||
needs: get_commit_message
|
||||
if: >-
|
||||
contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))
|
||||
env:
|
||||
EC2_INSTANCE_TYPE: t3.2xlarge
|
||||
EC2_AMI_ID: ami-089dafe9af191a0fd
|
||||
EC2_SUBNET_ID: subnet-0ca63188fe98788c1,subnet-05997205433b249d0,subnet-07ef5d3e974275fed,subnet-0161ef0151089bb0b
|
||||
EC2_SECURITY_GROUP: sg-051366641bf2b8049
|
||||
outputs:
|
||||
label: ${{ steps.start-ec2-runner.outputs.label }}
|
||||
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
|
||||
steps:
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # ratchet:aws-actions/configure-aws-credentials@v4.0.2
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
- name: Start EC2 Runner
|
||||
id: start-ec2-runner
|
||||
uses: aarnphm/ec2-github-runner@main # ratchet:exclude
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.OPENLLM_PAT }}
|
||||
ec2-region: ${{ env.AWS_REGION }}
|
||||
ec2-image-id: ${{ env.EC2_AMI_ID }}
|
||||
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
|
||||
subnet-id: ${{ env.EC2_SUBNET_ID }}
|
||||
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
|
||||
build-and-push-image:
|
||||
name: Build and push OpenLLM base image
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
needs: start-runner
|
||||
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
# This is used to complete the identity challenge
|
||||
# with sigstore/fulcio when running outside of PRs.
|
||||
id-token: write
|
||||
security-events: write
|
||||
steps:
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: '${{ inputs.tags }}'
|
||||
- name: Inject slug/short variables
|
||||
uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 # ratchet:rlespinasse/github-slug-action@v4.5.0
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # ratchet:docker/setup-qemu-action@v3.0.0
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # ratchet:docker/setup-buildx-action@v3.3.0
|
||||
with:
|
||||
install: true
|
||||
driver-opts: |
|
||||
image=moby/buildkit:master
|
||||
network=host
|
||||
- name: Install cosign
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # ratchet:sigstore/cosign-installer@v3.5.0
|
||||
with:
|
||||
cosign-release: 'v2.1.1'
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # ratchet:docker/login-action@v3.1.0
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Extract metadata tags and labels for main, release or tag
|
||||
if: github.event_name != 'pull_request'
|
||||
id: meta
|
||||
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # ratchet:docker/metadata-action@v5.5.1
|
||||
with:
|
||||
flavor: |
|
||||
latest=auto
|
||||
images: |
|
||||
ghcr.io/bentoml/openllm
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
||||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
||||
labels: |
|
||||
maintainer=aarnphm
|
||||
org.opencontainers.image.source="https://github.com/bentoml/OpenLLM"
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # ratchet:docker/build-push-action@v5.3.0
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: 'linux/amd64'
|
||||
build-args: |
|
||||
GIT_SHA=${{ env.GITHUB_SHA }}
|
||||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
|
||||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
||||
# TODO: Once https://github.com/aws/containers-roadmap/issues/876 is supported with OCI 1.1
|
||||
# then move back to saving cache within the public repo. For now we will save the cache manifest within our internal S3 buckets.
|
||||
# NOTE: the region of the S3 on prod is us-east-1, where the EC2 machine is at us-west-2
|
||||
cache-from: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6
|
||||
# @aarnphm: max is fine here, since we didn't do any custom code yet, so it is ok to cache every layer for optimal build time
|
||||
# We also ignore-error for now, just upload anything to the blob storage
|
||||
cache-to: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6,mode=max,compression=zstd,ignore-error=true
|
||||
- name: Sign the released image
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
env:
|
||||
COSIGN_EXPERIMENTAL: 'true'
|
||||
run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }}
|
||||
- name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph
|
||||
uses: aquasecurity/trivy-action@b2933f565dbc598b29947660e66259e3c7bc8561 # ratchet:aquasecurity/trivy-action@master
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
with:
|
||||
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}'
|
||||
format: 'github'
|
||||
output: 'dependency-results.sbom.json'
|
||||
github-pat: ${{ secrets.GITHUB_TOKEN }}
|
||||
scanners: 'vuln'
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@b2933f565dbc598b29947660e66259e3c7bc8561 # ratchet:aquasecurity/trivy-action@master
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
with:
|
||||
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}'
|
||||
format: 'sarif'
|
||||
output: 'trivy-results.sarif'
|
||||
severity: 'CRITICAL'
|
||||
scanners: 'vuln'
|
||||
- name: Upload Trivy scan results to GitHub Security tab
|
||||
uses: github/codeql-action/upload-sarif@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # ratchet:github/codeql-action/upload-sarif@v3.25.5
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
with:
|
||||
sarif_file: 'trivy-results.sarif'
|
||||
# TODO: Add snapshot tests here.
|
||||
stop-runner:
|
||||
name: Stop self-hosted EC2 runner
|
||||
needs:
|
||||
- start-runner
|
||||
- build-and-push-image
|
||||
- get_commit_message
|
||||
runs-on: ubuntu-latest
|
||||
if: >-
|
||||
(contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))) && always()
|
||||
steps:
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # ratchet:aws-actions/configure-aws-credentials@v4.0.2
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
- name: Stop EC2 runner
|
||||
uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.OPENLLM_PAT }}
|
||||
ec2-region: ${{ env.AWS_REGION }}
|
||||
label: ${{ needs.start-runner.outputs.label }}
|
||||
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
|
||||
155
.github/workflows/ci.yml
vendored
155
.github/workflows/ci.yml
vendored
@@ -1,31 +1,12 @@
|
||||
name: Continuous Integration
|
||||
name: CI
|
||||
on:
|
||||
workflow_call:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'bazel/**'
|
||||
- 'typings/**'
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'bazel/**'
|
||||
- 'typings/**'
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
env:
|
||||
LINES: 120
|
||||
COLUMNS: 120
|
||||
OPENLLM_DO_NOT_TRACK: True
|
||||
PYTHONUNBUFFERED: '1'
|
||||
HATCH_VERBOSE: 2
|
||||
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
|
||||
PAPERSPACE_API_KEY: ${{secrets.PAPERSPACE_API_KEY}}
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -exo pipefail {0}
|
||||
@@ -34,111 +15,35 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'pull_request' || github.event_name == 'push'|| github.event_name == 'workflow_call' }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.9', '3.12']
|
||||
name: tests (${{ matrix.python-version }}.${{ matrix.os }})
|
||||
name: General API tests
|
||||
steps:
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4
|
||||
- uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
python-version-file: '.python-version-default'
|
||||
- name: setup tooling
|
||||
run: |
|
||||
python -m pip install uv
|
||||
uv pip install --system httpx
|
||||
- name: startup machine
|
||||
run: python tools/machines.py --start ${{ secrets.PAPERSPACE_MACHINE_ID }} || true
|
||||
- name: executing remote ssh commands using password
|
||||
uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version: ${{ matrix.python-version }}
|
||||
# - name: Run tests
|
||||
# run: hatch run tests:python
|
||||
# - name: Disambiguate coverage filename
|
||||
# run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}"
|
||||
# - name: Upload coverage data
|
||||
# uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
# with:
|
||||
# name: coverage-data
|
||||
# path: .coverage.*
|
||||
# coverage:
|
||||
# name: report-coverage
|
||||
# runs-on: ubuntu-latest
|
||||
# if: false
|
||||
# needs: tests
|
||||
# steps:
|
||||
# - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
|
||||
# with:
|
||||
# fetch-depth: 0
|
||||
# ref: ${{ github.event.pull_request.head.sha }}
|
||||
# - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
# with:
|
||||
# bentoml-version: 'main'
|
||||
# python-version-file: .python-version-default
|
||||
# - name: Download coverage data
|
||||
# uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: coverage-data
|
||||
# - name: Combine coverage data
|
||||
# run: hatch run coverage:combine
|
||||
# - name: Export coverage reports
|
||||
# run: |
|
||||
# hatch run coverage:report-xml openllm-python
|
||||
# hatch run coverage:report-uncovered-html openllm-python
|
||||
# - name: Upload uncovered HTML report
|
||||
# uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
|
||||
# with:
|
||||
# name: uncovered-html-report
|
||||
# path: htmlcov
|
||||
# - name: Generate coverage summary
|
||||
# run: hatch run coverage:generate-summary
|
||||
# - name: Write coverage summary report
|
||||
# if: github.event_name == 'pull_request'
|
||||
# run: hatch run coverage:write-summary-report
|
||||
# - name: Update coverage pull request comment
|
||||
# if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork
|
||||
# uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # ratchet:marocchino/sticky-pull-request-comment@v2
|
||||
# with:
|
||||
# path: coverage-report.md
|
||||
# cli-benchmark:
|
||||
# name: Check for CLI responsiveness
|
||||
# runs-on: ubuntu-latest
|
||||
# env:
|
||||
# HYPERFINE_VERSION: '1.12.0'
|
||||
# steps:
|
||||
# - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
|
||||
# with:
|
||||
# fetch-depth: 0
|
||||
# - name: Install hyperfine
|
||||
# run: |
|
||||
# wget https://github.com/sharkdp/hyperfine/releases/download/v${HYPERFINE_VERSION}/hyperfine_${HYPERFINE_VERSION}_amd64.deb
|
||||
# sudo dpkg -i hyperfine_${HYPERFINE_VERSION}_amd64.deb
|
||||
# - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
# with:
|
||||
# bentoml-version: 'main'
|
||||
# python-version-file: .python-version-default
|
||||
# - name: Install self
|
||||
# run: bash local.sh
|
||||
# - name: Speed
|
||||
# run: hyperfine -m 100 --warmup 10 openllm
|
||||
# brew-dry-run:
|
||||
# name: Running dry-run tests for brew
|
||||
# runs-on: macos-latest
|
||||
# steps:
|
||||
# - name: Install tap and dry-run
|
||||
# run: |
|
||||
# brew tap bentoml/openllm https://github.com/bentoml/openllm
|
||||
# brew install openllm
|
||||
# openllm --help
|
||||
# openllm models --show-available
|
||||
# evergreen: # https://github.com/marketplace/actions/alls-green#why
|
||||
# if: always()
|
||||
# needs:
|
||||
# - tests
|
||||
# # - cli-benchmark
|
||||
# # - brew-dry-run
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Decide whether the needed jobs succeeded or failed
|
||||
# uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
|
||||
# with:
|
||||
# jobs: ${{ toJSON(needs) }}
|
||||
host: ${{secrets.PAPERSPACE_HOST}}
|
||||
username: ${{secrets.PAPERSPACE_USERNAME}}
|
||||
key: ${{secrets.PAPERSPACE_SSH_KEY}}
|
||||
port: ${{secrets.PAPERSPACE_PORT}}
|
||||
script: bash ci.sh --pr ${{github.event.number}}
|
||||
evergreen: # https://github.com/marketplace/actions/alls-green#why
|
||||
if: always()
|
||||
needs:
|
||||
- tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Decide whether the needed jobs succeeded or failed
|
||||
uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
|
||||
with:
|
||||
jobs: ${{ toJSON(needs) }}
|
||||
|
||||
16
.github/workflows/cleanup.yml
vendored
16
.github/workflows/cleanup.yml
vendored
@@ -1,16 +0,0 @@
|
||||
name: Cleanup PR cache
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
jobs:
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
- name: Cleanup
|
||||
run: "gh extension install actions/gh-actions-cache\n\nREPO=${{ github.repository }}\nBRANCH=\"refs/pull/${{ github.event.pull_request.number }}/merge\"\n\necho \"Fetching list of cache key\"\ncacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )\n\n## Setting this to not fail the workflow while deleting cache keys. \nset +e\necho \"Deleting caches...\"\nfor cacheKey in $cacheKeysForPR\ndo\n gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm\ndone\necho \"Done\"\n"
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
51
.github/workflows/create-releases.yml
vendored
51
.github/workflows/create-releases.yml
vendored
@@ -102,7 +102,6 @@ jobs:
|
||||
needs:
|
||||
- release
|
||||
- publish-python
|
||||
- binary-distribution
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -155,62 +154,12 @@ jobs:
|
||||
popd &>/dev/null
|
||||
git add package.json openllm-node/package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]"
|
||||
git push origin HEAD:main
|
||||
binary-distribution:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs: build-pypi
|
||||
name: Create binary/wheels distribution
|
||||
uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main # ratchet:exclude
|
||||
release-notes:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs:
|
||||
- release
|
||||
- publish-python
|
||||
- binary-distribution
|
||||
name: Create release notes and setup for next cycle
|
||||
uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@main # ratchet:exclude
|
||||
with:
|
||||
tags: ${{ needs.release.outputs.version }}
|
||||
bump-homebrew-tap:
|
||||
needs:
|
||||
- release-notes
|
||||
- prepare-next-dev-cycle
|
||||
- release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version-file: .python-version-default
|
||||
- name: Install jq and curl
|
||||
run: sudo apt-get install -y jq curl
|
||||
- name: Import bot's GPG key for signing commits
|
||||
id: import-gpg-key
|
||||
uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
git_config_global: true
|
||||
git_user_signingkey: true
|
||||
git_commit_gpgsign: true
|
||||
git_tag_gpgsign: true
|
||||
- name: Update current formula
|
||||
env:
|
||||
GIT_AUTHOR_NAME: ${{ steps.import-gpg-key.outputs.name }}
|
||||
GIT_AUTHOR_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
|
||||
GIT_COMMITTER_NAME: ${{ steps.import-gpg-key.outputs.name }}
|
||||
GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
|
||||
GITHUB_TOKEN: ${{ secrets.HOMEBREW_GITHUB_TOKEN }}
|
||||
run: |
|
||||
git pull --autostash --no-edit --gpg-sign --ff origin main
|
||||
SEMVER="${{ needs.release.outputs.version }}"
|
||||
SEMVER="${SEMVER#v}"
|
||||
pip install fs jinja2 ghapi plumbum
|
||||
./tools/update-brew-tap.py
|
||||
git add Formula && git commit -S -sm "infra: bump to homebrew tap release to ${SEMVER} [generated] [skip ci]"
|
||||
git push origin HEAD:main
|
||||
|
||||
59
.github/workflows/cron.yml
vendored
59
.github/workflows/cron.yml
vendored
@@ -1,59 +0,0 @@
|
||||
name: Cron update
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
# │ │ │ │ │
|
||||
- cron: '42 2 * * SUN-WED'
|
||||
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -exo pipefail {0}
|
||||
concurrency:
|
||||
group: cron-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
update-actions:
|
||||
runs-on: 'ubuntu-latest'
|
||||
name: Ratchet update
|
||||
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
|
||||
env:
|
||||
ACTIONS_TOKEN: ${{ secrets.OPENLLM_PAT }}
|
||||
steps:
|
||||
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install jq and curl
|
||||
run: sudo apt-get install -y jq curl
|
||||
- name: Import bot's GPG key for signing commits
|
||||
id: import-gpg-key
|
||||
uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
git_config_global: true
|
||||
git_user_signingkey: true
|
||||
git_commit_gpgsign: true
|
||||
- name: Locking dependencies
|
||||
run: bash ./tools/update-actions.sh
|
||||
- name: Create a PR
|
||||
uses: peter-evans/create-pull-request@6d6857d36972b65feb161a90e484f2984215f83e # ratchet:peter-evans/create-pull-request@v6.0.5
|
||||
env:
|
||||
GIT_AUTHOR_NAME: ${{ steps.import-gpg-key.outputs.name }}
|
||||
GIT_AUTHOR_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
|
||||
GIT_COMMITTER_NAME: ${{ steps.import-gpg-key.outputs.name }}
|
||||
GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
|
||||
BRANCH_NAME: cron/ratchet
|
||||
with:
|
||||
title: 'ci: update lock actions [generated]'
|
||||
commit-message: 'cron: ratchet update'
|
||||
branch-suffix: timestamp
|
||||
signoff: true
|
||||
delete-branch: true
|
||||
reviewers: aarnphm
|
||||
author: ${{ env.GIT_AUTHOR_NAME }} <${{ env.GIT_AUTHOR_EMAIL }}>
|
||||
branch: ${{ env.BRANCH_NAME }}
|
||||
43
.github/workflows/release-notes.yml
vendored
43
.github/workflows/release-notes.yml
vendored
@@ -35,46 +35,11 @@ jobs:
|
||||
- name: Create release notes
|
||||
run: ./.github/actions/create_release_and_archive.sh ${{ inputs.tags }}
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: python-artefacts
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
# - name: Download Linux x86_64 compiled artifacts
|
||||
# uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: linux-x86_64-mypyc-wheels
|
||||
# path: dist
|
||||
# - name: Download MacOS x86_64 compiled artifacts
|
||||
# uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: macos-x86_64-mypyc-wheels
|
||||
# path: dist
|
||||
# - name: Download MacOS arm64 compiled artifacts
|
||||
# uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: macos-arm64-mypyc-wheels
|
||||
# path: dist
|
||||
# - name: Download MacOS universal2 compiled artifacts
|
||||
# uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: macos-universal2-mypyc-wheels
|
||||
# path: dist
|
||||
- name: Download binaries
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
with:
|
||||
name: standalone
|
||||
path: archives
|
||||
- name: Download standalone MacOS
|
||||
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
|
||||
with:
|
||||
name: staged-macOS
|
||||
path: archives
|
||||
# TODO: Uncomment me when I decided to bring back Windows support
|
||||
# - name: Download installers
|
||||
# uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
|
||||
# with:
|
||||
# name: installers
|
||||
# path: installers
|
||||
- name: Create release
|
||||
uses: softprops/action-gh-release@69320dbe05506a9a39fc8ae11030b214ec2d1f87 # ratchet:softprops/action-gh-release@v2.0.5
|
||||
with:
|
||||
@@ -83,7 +48,7 @@ jobs:
|
||||
body_path: release_notes.txt
|
||||
fail_on_unmatched_files: true
|
||||
tag_name: '${{ inputs.tags }}'
|
||||
# TODO: add instasllers/* once windows support is back
|
||||
# TODO: add installers/* archives/* once windows support is back
|
||||
files: |-
|
||||
dist/*
|
||||
archives/*
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
This directory hosts the brew tap for OpenLLM
|
||||
|
||||
```bash
|
||||
brew tap bentoml/openllm https://github.com/bentoml/openllm
|
||||
|
||||
brew install openllm
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> This will install the generated release binary from GitHub release. If any releases doesn't include the binary, then you will need to install from pip
|
||||
|
||||
> [!IMPORTANT]
|
||||
> If you want to install from source, please refer to [Development Guide](https://github.com/bentoml/openllm/tree/main/DEVELOPMENT.md).
|
||||
47
Formula/openllm.rb
generated
47
Formula/openllm.rb
generated
@@ -1,47 +0,0 @@
|
||||
# Generated by tools/update-brew-tap.py. DO NOT EDIT!
|
||||
# Please refers to the original template file Formula/openllm.rb.j2
|
||||
# vim: set ft=ruby:
|
||||
class Openllm < Formula
|
||||
desc "OpenLLM: Operating LLMs in production"
|
||||
homepage "https://github.com/bentoml/OpenLLM"
|
||||
version "0.4.44"
|
||||
license "Apache-2.0"
|
||||
head "https://github.com/bentoml/OpenLLM, branch: main"
|
||||
url "https://github.com/bentoml/OpenLLM/archive/v0.4.44.tar.gz"
|
||||
sha256 "5158eee3c4b771d3cabd8827439c148d3d710a9b63c7ad98a58a607f587d3acc"
|
||||
|
||||
on_linux do
|
||||
url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-x86_64-unknown-linux-musl.tar.gz"
|
||||
sha256 "d56d14b032ffdb0de0ea2a7575f6039726987adccc74b634b0e943a73679232a"
|
||||
end
|
||||
on_macos do
|
||||
on_arm do
|
||||
url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-aarch64-apple-darwin.tar.gz"
|
||||
sha256 "ef57c27ab684179c6cccc02cd60506ea6a721908b56e7f483dda0b9931c74cdd"
|
||||
end
|
||||
on_intel do
|
||||
url "https://github.com/bentoml/OpenLLM/releases/download/v0.4.44/openllm-0.4.44-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "65c5f176362f00cf29187c6c2062141abfc8165e63a89845038d2921715d270c"
|
||||
end
|
||||
end
|
||||
|
||||
def install
|
||||
on_linux do
|
||||
bin.install "openllm-0.4.44-x86_64-unknown-linux-musl" => "openllm"
|
||||
end
|
||||
on_macos do
|
||||
on_arm do
|
||||
bin.install "openllm-0.4.44-aarch64-apple-darwin" => "openllm"
|
||||
end
|
||||
on_intel do
|
||||
bin.install "openllm-0.4.44-x86_64-apple-darwin" => "openllm"
|
||||
end
|
||||
end
|
||||
ohai "To get started, run: 'openllm --help'"
|
||||
ohai "To see supported models, run: 'openllm models'"
|
||||
end
|
||||
|
||||
test do
|
||||
shell_output "#{bin}/openllm --version"
|
||||
end
|
||||
end
|
||||
@@ -1,47 +0,0 @@
|
||||
# Generated by {{ __cmd__ }}. DO NOT EDIT!
|
||||
# Please refers to the original template file {{ __template_file__ }}
|
||||
# vim: set ft=ruby:
|
||||
class Openllm < Formula
|
||||
desc "{{ name }}: {{ description }}"
|
||||
homepage "{{ html_url }}"
|
||||
version "{{ __tag__|replace('v', '') }}"
|
||||
license "{{ license["spdx_id"] }}"
|
||||
head "{{ html_url }}, branch: {{ default_branch }}"
|
||||
url "{{ determine_release_url(svn_url, __tag__, 'archive') }}"
|
||||
sha256 "{{ shadict['archive'] }}"
|
||||
|
||||
on_linux do
|
||||
url "{{ determine_release_url(svn_url, __tag__, 'linux_intel') }}"
|
||||
sha256 "{{ shadict['linux_intel'] }}"
|
||||
end
|
||||
on_macos do
|
||||
on_arm do
|
||||
url "{{ determine_release_url(svn_url, __tag__, 'macos_arm') }}"
|
||||
sha256 "{{ shadict['macos_arm'] }}"
|
||||
end
|
||||
on_intel do
|
||||
url "{{ determine_release_url(svn_url, __tag__, 'macos_intel') }}"
|
||||
sha256 "{{ shadict['macos_intel'] }}"
|
||||
end
|
||||
end
|
||||
|
||||
def install
|
||||
on_linux do
|
||||
bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['linux_intel'] }}" => "openllm"
|
||||
end
|
||||
on_macos do
|
||||
on_arm do
|
||||
bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['macos_arm'] }}" => "openllm"
|
||||
end
|
||||
on_intel do
|
||||
bin.install "openllm-{{ __tag__|replace('v', '') }}-{{ __gz_extension__['macos_intel'] }}" => "openllm"
|
||||
end
|
||||
end
|
||||
ohai "To get started, run: 'openllm --help'"
|
||||
ohai "To see supported models, run: 'openllm models'"
|
||||
end
|
||||
|
||||
test do
|
||||
shell_output "#{bin}/openllm --version"
|
||||
end
|
||||
end
|
||||
@@ -86,6 +86,12 @@ class HTTPClient(Client):
|
||||
else:
|
||||
llm_config = {**self._config, **attrs}
|
||||
|
||||
if stop is not None:
|
||||
if isinstance(stop, str):
|
||||
stop = [stop]
|
||||
else:
|
||||
stop = list(stop)
|
||||
|
||||
return self._post(
|
||||
f'/{self._api_version}/generate',
|
||||
response_cls=Response,
|
||||
@@ -110,6 +116,13 @@ class HTTPClient(Client):
|
||||
llm_config = {**self._config, **llm_config, **attrs}
|
||||
else:
|
||||
llm_config = {**self._config, **attrs}
|
||||
|
||||
if stop is not None:
|
||||
if isinstance(stop, str):
|
||||
stop = [stop]
|
||||
else:
|
||||
stop = list(stop)
|
||||
|
||||
return self._post(
|
||||
f'/{self._api_version}/generate_stream',
|
||||
response_cls=Response,
|
||||
@@ -181,12 +194,18 @@ class AsyncHTTPClient(AsyncClient, pydantic.BaseModel):
|
||||
timeout = self.timeout
|
||||
if verify is None:
|
||||
verify = self._verify # XXX: need to support this again
|
||||
_metadata = await self._metadata
|
||||
_config = await self._config
|
||||
if llm_config is not None:
|
||||
llm_config = {**_config, **llm_config, **attrs}
|
||||
else:
|
||||
llm_config = {**_config, **attrs}
|
||||
|
||||
if stop is not None:
|
||||
if isinstance(stop, str):
|
||||
stop = [stop]
|
||||
else:
|
||||
stop = list(stop)
|
||||
|
||||
return await self._post(
|
||||
f'/{self._api_version}/generate',
|
||||
response_cls=Response,
|
||||
@@ -209,13 +228,18 @@ class AsyncHTTPClient(AsyncClient, pydantic.BaseModel):
|
||||
timeout = self.timeout
|
||||
if verify is None:
|
||||
verify = self._verify # XXX: need to support this again
|
||||
_metadata = await self._metadata
|
||||
_config = await self._config
|
||||
if llm_config is not None:
|
||||
llm_config = {**_config, **llm_config, **attrs}
|
||||
else:
|
||||
llm_config = {**_config, **attrs}
|
||||
|
||||
if stop is not None:
|
||||
if isinstance(stop, str):
|
||||
stop = [stop]
|
||||
else:
|
||||
stop = list(stop)
|
||||
|
||||
async for response_chunk in await self._post(
|
||||
f'/{self._api_version}/generate_stream',
|
||||
response_cls=Response,
|
||||
|
||||
@@ -52,7 +52,7 @@ class GenerationInput(pydantic.BaseModel):
|
||||
raise RuntimeError('This class is not meant to be used directly. Use "from_config" instead')
|
||||
super().__init__(**data)
|
||||
|
||||
@pydantic.field_validator('stop')
|
||||
@pydantic.field_validator('stop', mode='before')
|
||||
@classmethod
|
||||
def stop_validator(cls, data: str | list[str] | t.Iterable[str] | None) -> list[str] | None:
|
||||
if data is None:
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest, typing as t
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
scope='function',
|
||||
name='model_id',
|
||||
params={
|
||||
'meta-llama/Meta-Llama-3-8B-Instruct',
|
||||
'casperhansen/llama-3-70b-instruct-awq',
|
||||
'TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ',
|
||||
},
|
||||
)
|
||||
def fixture_model_id(request) -> t.Generator[str, None, None]:
|
||||
yield request.param
|
||||
@@ -1,26 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest, subprocess, sys, openllm, bentoml, asyncio
|
||||
from openai import AsyncOpenAI
|
||||
import pytest, subprocess, sys, asyncio, openllm, bentoml
|
||||
from openai import OpenAI
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
|
||||
SERVER_PORT = 53822
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_compatible(model_id: str):
|
||||
async def test_openai_compatible():
|
||||
model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
|
||||
await asyncio.sleep(5)
|
||||
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
|
||||
await asyncio.sleep(10)
|
||||
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
|
||||
assert client.is_ready(30)
|
||||
|
||||
try:
|
||||
client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
|
||||
serve_model = (await client.models.list()).data[0].id
|
||||
client = OpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
|
||||
serve_model = client.models.list().data[0].id
|
||||
assert serve_model == openllm.utils.normalise_model_name(model_id)
|
||||
streamable = await client.chat.completions.create(
|
||||
streamable = client.chat.completions.create(
|
||||
model=serve_model,
|
||||
max_tokens=512,
|
||||
max_tokens=128,
|
||||
stream=False,
|
||||
messages=[
|
||||
ChatCompletionSystemMessageParam(
|
||||
@@ -37,18 +38,27 @@ async def test_openai_compatible(model_id: str):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_endpoint(model_id: str):
|
||||
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
|
||||
await asyncio.sleep(5)
|
||||
async def test_generate_endpoint():
|
||||
server = subprocess.Popen([
|
||||
sys.executable,
|
||||
'-m',
|
||||
'openllm',
|
||||
'start',
|
||||
'microsoft/Phi-3-mini-4k-instruct',
|
||||
'--trust-remote-code',
|
||||
'--port',
|
||||
str(SERVER_PORT),
|
||||
])
|
||||
await asyncio.sleep(10)
|
||||
|
||||
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
|
||||
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
|
||||
assert client.is_ready(30)
|
||||
|
||||
try:
|
||||
client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
|
||||
assert await client.health()
|
||||
client = openllm.HTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
|
||||
assert client.health()
|
||||
|
||||
response = await client.generate(
|
||||
response = client.generate(
|
||||
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
|
||||
)
|
||||
assert response is not None
|
||||
|
||||
@@ -163,8 +163,8 @@ testpaths = ["openllm-python/tests"]
|
||||
|
||||
[tool.coverage.paths]
|
||||
openllm = [
|
||||
"openllm-python/src/openllm",
|
||||
"*/openllm-python/src/openllm",
|
||||
"openllm-python/src/_openllm_tiny",
|
||||
"*/openllm-python/src/_openllm_tiny",
|
||||
"openllm-client/src/openllm_client",
|
||||
"*/openllm-client/src/openllm_client",
|
||||
"openllm-core/src/openllm_core",
|
||||
@@ -174,13 +174,11 @@ openllm = [
|
||||
branch = true
|
||||
omit = [
|
||||
"__pypackages__/*",
|
||||
"openllm-python/src/openllm/_version.py",
|
||||
"openllm-python/src/openllm/__init__.py",
|
||||
"openllm-python/src/openllm/__main__.py",
|
||||
"openllm-core/src/openllm_core/_typing_compat.py",
|
||||
"openllm-client/src/openllm_client/pb/**",
|
||||
"openllm-core/src/openllm_core/_version.py",
|
||||
"openllm-client/src/openllm_client/_version.py",
|
||||
]
|
||||
source_pkgs = ["openllm", "openllm_core", "openllm_client"]
|
||||
source_pkgs = ["_openllm_tiny", "openllm_core", "openllm_client"]
|
||||
[tool.coverage.report]
|
||||
exclude_lines = [
|
||||
"no cov",
|
||||
@@ -204,11 +202,9 @@ exclude_lines = [
|
||||
]
|
||||
omit = [
|
||||
"__pypackages__/*",
|
||||
"openllm-python/src/openllm/_version.py",
|
||||
"openllm-python/src/openllm/__init__.py",
|
||||
"openllm-python/src/openllm/__main__.py",
|
||||
"openllm-core/src/openllm_core/_typing_compat.py",
|
||||
"openllm-client/src/openllm_client/pb/**",
|
||||
"openllm-core/src/openllm_core/_version.py",
|
||||
"openllm-client/src/openllm_client/_version.py",
|
||||
]
|
||||
precision = 2
|
||||
show_missing = true
|
||||
|
||||
69
tools/machines.py
Normal file
69
tools/machines.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx,os,dataclasses,logging,time,argparse,typing as t
|
||||
|
||||
if (ENV := os.getenv("PAPERSPACE_API_KEY")) is None: raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"')
|
||||
HEADERS = httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'})
|
||||
API_URL = 'https://api.paperspace.com/v1'
|
||||
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Machine:
|
||||
id: str
|
||||
inner: httpx.Client = dataclasses.field(default_factory=lambda: httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60), repr=False)
|
||||
|
||||
def close(self): self.inner.close()
|
||||
def __del__(self): self.close()
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *_: t.Any) -> None: self.close()
|
||||
@property
|
||||
def metadata(self) -> dict[str, t.Any]: return self.inner.get(f'/machines/{self.id}').json()
|
||||
@property
|
||||
def status(self) -> t.Literal['off', 'ready', 'stopping', 'starting']: return self.metadata['state']
|
||||
def start(self) -> bool:
|
||||
response = self.inner.patch(f'/machines/{self.id}/start')
|
||||
if response.status_code == 400 or self.status == 'ready':
|
||||
logger.error('machine is already running')
|
||||
return False
|
||||
elif response.status_code != 200:
|
||||
logger.error('Error while starting machine "%s": %s', self.id, response.json())
|
||||
return True
|
||||
def stop(self) -> bool:
|
||||
response = self.inner.patch(f'/machines/{self.id}/stop')
|
||||
if response.status_code == 400 or self.status == 'off':
|
||||
logger.error('machine is already off')
|
||||
return False
|
||||
elif response.status_code != 200:
|
||||
logger.error('Error while stopping machine "%s": %s', self.id, response.json())
|
||||
return True
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--start', metavar='ID')
|
||||
group.add_argument('--stop', metavar='ID')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.start:
|
||||
with Machine(id=args.start) as machine:
|
||||
if machine.start():
|
||||
while machine.status != 'ready':
|
||||
logger.info('Waiting for machine "%s" to be ready...', machine.id)
|
||||
time.sleep(5)
|
||||
else:
|
||||
logger.error('Failed to start machine "%s"', machine.id)
|
||||
return 1
|
||||
elif args.stop:
|
||||
with Machine(id=args.stop) as machine:
|
||||
if machine.stop():
|
||||
while machine.status != 'ready':
|
||||
logger.info('Waiting for machine "%s" to stop...', machine.id)
|
||||
time.sleep(5)
|
||||
else:
|
||||
logger.error('Failed to stopmachine "%s"', machine.id)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__": raise SystemExit(main())
|
||||
@@ -1,75 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
import os, typing as t, fs
|
||||
from pathlib import Path
|
||||
from ghapi.all import GhApi
|
||||
from jinja2 import Environment
|
||||
from jinja2.loaders import FileSystemLoader
|
||||
from plumbum.cmd import curl, cut, shasum
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from plumbum.commands.base import Pipeline
|
||||
|
||||
# get git root from this file
|
||||
ROOT = Path(__file__).parent.parent
|
||||
|
||||
_OWNER = 'bentoml'
|
||||
_REPO = 'openllm'
|
||||
|
||||
_gz_strategies: dict[t.Literal['macos_arm', 'macos_intel', 'linux_intel'], str] = {
|
||||
'macos_arm': 'aarch64-apple-darwin',
|
||||
'macos_intel': 'x86_64-apple-darwin',
|
||||
'linux_intel': 'x86_64-unknown-linux-musl',
|
||||
}
|
||||
|
||||
|
||||
def determine_release_url(
|
||||
svn_url: str, tag: str, target: t.Literal['macos_arm', 'macos_intel', 'linux_intel', 'archive']
|
||||
) -> str:
|
||||
if target == 'archive':
|
||||
return f'{svn_url}/archive/{tag}.tar.gz'
|
||||
return f"{svn_url}/releases/download/{tag}/openllm-{tag.replace('v', '')}-{_gz_strategies[target]}.tar.gz"
|
||||
|
||||
|
||||
# curl -sSL <svn_url>/archive/refs/tags/<tag>.tar.gz | shasum -a256 | cut -d'' -f1
|
||||
def get_release_hash_command(svn_url: str, tag: str) -> Pipeline:
|
||||
return curl['-sSL', svn_url] | shasum['-a256'] | cut['-d', ' ', '-f1']
|
||||
|
||||
|
||||
def main() -> int:
|
||||
api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
_info = api.repos.get()
|
||||
release_tag = api.repos.get_latest_release().name
|
||||
|
||||
shadict: dict[str, t.Any] = {
|
||||
k: get_release_hash_command(determine_release_url(_info.svn_url, release_tag, k), release_tag)().strip()
|
||||
for k in _gz_strategies
|
||||
}
|
||||
shadict['archive'] = get_release_hash_command(
|
||||
determine_release_url(_info.svn_url, release_tag, 'archive'), release_tag
|
||||
)().strip()
|
||||
|
||||
ENVIRONMENT = Environment(
|
||||
extensions=['jinja2.ext.do', 'jinja2.ext.loopcontrols', 'jinja2.ext.debug'],
|
||||
trim_blocks=True,
|
||||
lstrip_blocks=True,
|
||||
loader=FileSystemLoader((ROOT / 'Formula').__fspath__(), followlinks=True),
|
||||
)
|
||||
template_file = 'openllm.rb.j2'
|
||||
with (ROOT / 'Formula' / 'openllm.rb').open('w') as f:
|
||||
f.write(
|
||||
ENVIRONMENT.get_template(template_file, globals={'determine_release_url': determine_release_url}).render(
|
||||
shadict=shadict,
|
||||
__tag__=release_tag,
|
||||
__cmd__=fs.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__)),
|
||||
__template_file__=fs.path.join('Formula', template_file),
|
||||
__gz_extension__=_gz_strategies,
|
||||
**_info,
|
||||
)
|
||||
)
|
||||
f.write('\n')
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user