diff --git a/.gitattributes b/.gitattributes index 60e5a277..83d331b7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,3 @@ -**/_next/ linguist-generated=true - * text=auto eol=lf # Needed for setuptools-scm-git-archive .git_archival.txt export-subst diff --git a/.github/INFRA.md b/.github/INFRA.md deleted file mode 100644 index 93c744ea..00000000 --- a/.github/INFRA.md +++ /dev/null @@ -1,64 +0,0 @@ -## OpenLLM CI/CD - -> [!NOTE] -> All actions within this repository should always be locked to a specific version. We are using [ratchet](https://github.com/sethvargo/ratchet) -> for doing this via [this script](https://github.com/bentoml/OpenLLM/blob/main/tools/lock-actions.sh) - -OpenLLM uses a GitHub Action to run all CI/CD workflows. It also use [pre-commit.ci](https://pre-commit.ci/) to run CI for all pre-commit hooks. - -The folder structure of this are as follow: - -```prolog -. -├── CODEOWNERS # Code owners -├── CODE_OF_CONDUCT.md # Code of conduct -├── ISSUE_TEMPLATE # Contains issue templates -├── SECURITY.md # Security policy -├── actions # Contains helpers script for all actions -├── assets # Contains static assets to be used throughout this repository -├── dependabot.yml # Dependabot configuration -└── workflows - ├── binary-releases.yml # Build and publish binary releases - ├── build.yml # Self-hosted EC2 runners - ├── ci.yml # CI workflow - ├── cleanup.yml # Cache cleanup - ├── build-pypi.yml # Build PyPI packages - ├── create-releases.yml # Create GitHub releases - ├── cron.yml # Cron jobs - └── release-notes.yml # Generate release notes -``` - -> [!IMPORTANT] -> All of the following jobs will and should only be run within the BentoML organisation and this repository. - -### Self-hosted EC2 runners - -The workflow for self-hosted EC2 runners is located in [build.yml](/.github/workflows/build.yml). -This workflow is currently used for building OpenLLM base images that contains all compiled kernels -for serving. It will then be published to the following registry: - -- GitHub Container Registry (`ghcr.io/bentoml/openllm`): This is where users can extend the base image - with their own custom kernels or use as base for building Bentos - -- AWS Elastic Container Registry (`public.ecr.aws/y5w8i4y6/bentoml/openllm`): This is where all Bento - created with `openllm` will be using. This is purely for build optimisation on BentoCloud. - -There are a few ways to trigger this workflow: - -- Automatically triggered when a new commit is pushed to the `main` branch and tag release - -- On pull request: This will be triggered manually when the label `00 - EC2 Build` - -- On commit with the following `[ec2 build]` - -### Wheel compilation - -The workflow for wheel compilation is located in [build-pypi.yml](/.github/workflows/build-pypi.yml). - -To speed up CI, opt in to the following label `02 - Wheel Build` on pull request or add `[wheel build]` to commit message. - -### Binary releases - -The workflow for binary releases is located in [binary-releases.yml](/.github/workflows/binary-releases.yml). - -To speed up CI, opt in to the following label `03 - Standalone Build` on pull request or add `[binary build]` to commit message. diff --git a/.github/actions/release.sh b/.github/actions/release.sh index c14fdb03..44073faf 100755 --- a/.github/actions/release.sh +++ b/.github/actions/release.sh @@ -48,14 +48,8 @@ fi release_package() { local version="$1" echo "Releasing version ${version}..." - jq --arg release_version "${version}" '.version = $release_version' package.json.tmp && mv package.json.tmp package.json - pushd openllm-node &>/dev/null - jq --arg release_version "${version}" '.version = $release_version' package.json.tmp && mv package.json.tmp package.json - popd &>/dev/null towncrier build --yes --version "${version}" - ./tools/dependencies.py --release-version "${version}" - cp CHANGELOG.md openllm-python/CHANGELOG.md - git add CHANGELOG.md openllm-python/pyproject.toml openllm-python/CHANGELOG.md changelog.d package.json openllm-node/package.json + git add CHANGELOG.md changelog.d git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]" git push origin main echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]" diff --git a/.github/assets/agent.gif b/.github/assets/agent.gif deleted file mode 100644 index 93c4bcc4..00000000 Binary files a/.github/assets/agent.gif and /dev/null differ diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1b0ad9c0..ae377fcb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,28 +1,13 @@ version: 2 updates: - package-ecosystem: github-actions - directory: '/' + directory: "/" schedule: - interval: 'weekly' - day: 'monday' - time: '09:00' + interval: "weekly" + day: "monday" + time: "09:00" - package-ecosystem: pip - directory: '/openllm-python' + directory: "/" schedule: - interval: 'weekly' + interval: "weekly" open-pull-requests-limit: 5 - - package-ecosystem: pip - directory: '/openllm-core' - schedule: - interval: 'weekly' - open-pull-requests-limit: 5 - - package-ecosystem: pip - directory: '/openllm-client' - schedule: - interval: 'weekly' - open-pull-requests-limit: 5 - - package-ecosystem: npm - directory: '/' - schedule: - interval: 'weekly' - open-pull-requests-limit: 0 diff --git a/.github/workflows/binary-releases.yml b/.github/workflows/binary-releases.yml deleted file mode 100644 index d6c32658..00000000 --- a/.github/workflows/binary-releases.yml +++ /dev/null @@ -1,274 +0,0 @@ -# modified workflow from ofek/hatch, big kudos to them. -name: Standalone build -on: - workflow_call: - workflow_dispatch: - push: - branches: [main] - paths-ignore: - - '*.md' - - 'changelog.d/**' - - 'assets/**' - - 'openllm-node/**' - - 'Formula/**' - pull_request: - branches: [main] - paths-ignore: - - '*.md' - - 'changelog.d/**' - - 'assets/**' - - 'openllm-node/**' - - 'Formula/**' -defaults: - run: - shell: bash --noprofile --norc -exo pipefail {0} -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: true -env: - APP_NAME: openllm - PYOXIDIZER_VERSION: '0.24.0' - HATCH_VERBOSE: 2 -jobs: - get_commit_message: - name: Get commit message - runs-on: ubuntu-latest - if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository - outputs: - message: ${{ steps.commit_message.outputs.message }} - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - # Gets the correct commit message for pull request - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Get commit message - id: commit_message - run: | - set -xe - COMMIT_MSG=$(git log --no-merges -1 --oneline) - echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT - echo github.ref ${{ github.ref }} - python-artifacts: - name: Build wheel and source distribution - runs-on: ubuntu-latest - needs: get_commit_message - if: >- - contains(needs.get_commit_message.outputs.message, '[binary build]') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '03 - Standalone Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main'))) - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - fetch-depth: 0 - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - with: - bentoml-version: 'main' - python-version-file: .python-version-default - - name: Pull latest change - if: ${{ github.event_name != 'pull_request' }} - run: git pull --autostash --no-edit --gpg-sign --ff origin main - - name: Install build frontend - run: python -m pip install --upgrade build - - name: Build - run: python -m build -sw openllm-python/ - - name: Upload artifacts - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: binary-artefacts - path: openllm-python/dist/* - if-no-files-found: error - binaries: - name: ${{ matrix.job.target }} (${{ matrix.job.os }}) - needs: - - python-artifacts - defaults: - run: - working-directory: openllm-python - runs-on: ${{ matrix.job.os }} - strategy: - fail-fast: false - matrix: - job: - # Linux - - target: aarch64-unknown-linux-gnu - os: ubuntu-22.04 - cross: true - - target: x86_64-unknown-linux-gnu - os: ubuntu-22.04 - cross: true - - target: x86_64-unknown-linux-musl - os: ubuntu-22.04 - cross: true - - target: i686-unknown-linux-gnu - os: ubuntu-22.04 - cross: true - - target: powerpc64le-unknown-linux-gnu - os: ubuntu-22.04 - cross: true - # macOS - - target: aarch64-apple-darwin - os: macos-12 - - target: x86_64-apple-darwin - os: macos-12 - outputs: - version: ${{ steps.version.outputs.version }} - env: - CARGO: cargo - CARGO_BUILD_TARGET: ${{ matrix.job.target }} - PYAPP_REPO: pyapp - PYAPP_VERSION: '0.10.1' - PYAPP_PIP_EXTERNAL: 'true' - steps: - - name: Checkout code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - fetch-depth: 0 - - name: Pull latest change - if: ${{ github.event_name != 'pull_request' }} - run: git pull --autostash --no-edit --gpg-sign --ff origin main - - name: Fetch PyApp - run: >- - mkdir $PYAPP_REPO && curl -L https://github.com/ofek/pyapp/releases/download/v$PYAPP_VERSION/source.tar.gz | tar --strip-components=1 -xzf - -C $PYAPP_REPO - - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5.1.0 - with: - python-version-file: .python-version-default - - name: Install Hatch - run: pip install -U hatch - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@d388a4836fcdbde0e50e395dc79a2670ccdef13f # ratchet:dtolnay/rust-toolchain@stable - with: - targets: ${{ matrix.job.target }} - - name: Set up cross compiling - if: matrix.job.cross - uses: taiki-e/install-action@64e4e2f995104968c78bd697b253d55bf557af66 # ratchet:taiki-e/install-action@v2.41.11 - with: - tool: cross - - name: Configure cross compiling - if: matrix.job.cross - run: echo "CARGO=cross" >> $GITHUB_ENV - - name: Configure target - run: |- - config_file="$PYAPP_REPO/.cargo/config_${{ matrix.job.target }}.toml" - if [[ -f "$config_file" ]]; then - mv "$config_file" "$PYAPP_REPO/.cargo/config.toml" - fi - - name: Download Python artifacts - if: ${{ !startsWith(github.event.ref, 'refs/tags') }} - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - name: binary-artefacts - path: openllm-python/dist - - name: Configure embedded project - if: ${{ !startsWith(github.event.ref, 'refs/tags') }} - run: |- - cd dist - wheel="$(echo *.whl)" - mv "$wheel" "../$PYAPP_REPO" - echo "PYAPP_PROJECT_PATH=$wheel" >> $GITHUB_ENV - - name: Build binary - run: hatch build --target app - # Windows installers don't accept non-integer versions so we ubiquitously - # perform the following transformation: X.Y.Z.devN -> X.Y.Z.N - - name: Set project version - id: version - run: |- - old_version="$(hatch version)" - version="${old_version/dev/}" - - if [[ "$version" != "$old_version" ]]; then - cd dist/app - find . -type f -iname "openllm-*" | while read -r file; do - binary=$(echo "$file" | sed 's/dev//') - mv "$file" "$binary" - done - fi - - echo "version=$version" >> $GITHUB_OUTPUT - echo "$version" - - name: Archive binary - run: |- - mkdir packaging - cd dist/app - find . -type f -iname "openllm-*" | while read -r file; do - if [[ "$file" =~ -pc-windows- ]]; then - 7z a "../../packaging/${file:0:-4}.zip" "$file" - else - binary="${file/dev/}" - chmod +x "$file" - tar -czf "../../packaging/$binary.tar.gz" "$file" - fi - done - - name: Upload staged archive - if: runner.os != 'Linux' - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: staged-${{ runner.os }}-${{ matrix.job.target }} - path: openllm-python/packaging/* - if-no-files-found: error - - name: Upload archive - if: runner.os == 'Linux' - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: standalone-${{ runner.os }}-${{ matrix.job.target }} - path: openllm-python/packaging/* - if-no-files-found: error - windows-packaging: - name: Build Windows installers - needs: binaries - if: false # ${{ github.event_name != 'pull_request' }} - runs-on: windows-2022 - env: - VERSION: ${{ needs.binaries.outputs.version }} - defaults: - run: - working-directory: openllm-python - steps: - - name: Checkout code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5.1.0 - with: - python-version-file: .python-version-default - - name: Pull latest change - run: git pull --autostash --no-edit --gpg-sign --ff origin main - - name: Install PyOxidizer ${{ env.PYOXIDIZER_VERSION }} - run: pip install pyoxidizer==${{ env.PYOXIDIZER_VERSION }} - - name: Download staged binaries - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - name: staged-${{ runner.os }} - path: openllm-python/archives - - name: Extract staged binaries - run: |- - mkdir -p bin - for f in archives/*; do - 7z e "$f" -obin - done - # bin/--.exe -> targets//.exe - - name: Prepare binaries - run: |- - mkdir -p targets - for f in bin/*; do - if [[ "$f" =~ openllm-${{ env.VERSION }}-(.+).exe$ ]]; then - target="${BASH_REMATCH[1]}" - mkdir -p "targets/$target" - mv "$f" "targets/$target/${{ env.APP_NAME }}.exe" - fi - done - - name: Build installers - run: >- - pyoxidizer build windows_installers --release --var version ${{ env.VERSION }} - - name: Prepare installers - run: |- - mkdir installers - mv build/*/release/*/*.{exe,msi} installers - - name: Upload binaries - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: standalone - path: openllm-python/archives/* - if-no-files-found: error - - name: Upload installers - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: installers - path: openllm-python/installers/* diff --git a/.github/workflows/build-pypi.yml b/.github/workflows/build-pypi.yml deleted file mode 100644 index d3cc7d87..00000000 --- a/.github/workflows/build-pypi.yml +++ /dev/null @@ -1,143 +0,0 @@ -name: Compile PyPI package -on: - workflow_dispatch: - inputs: - tags: - description: 'Tag for releases' - required: true - type: string - workflow_call: - inputs: - tags: - required: true - type: string - outputs: - sucess: - description: 'Whether the build is successful or not' - value: ${{ jobs.sucess-build.outputs.success }} - push: - branches: [main] - paths-ignore: - - '*.md' - - 'changelog.d/**' - - 'assets/**' - pull_request: - branches: [main] - paths-ignore: - - '*.md' - - 'changelog.d/**' - - 'assets/**' -env: - LINES: 120 - COLUMNS: 120 - HATCH_VERBOSE: 2 - CIBW_BUILD_FRONTEND: build - CIBW_ENVIRONMENT_PASS_LINUX: > - HATCH_BUILD_HOOKS_ENABLE MYPYPATH - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: true -jobs: - get_commit_message: - name: Get commit message - runs-on: ubuntu-latest - if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository - outputs: - message: ${{ steps.commit_message.outputs.message }} - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - # Gets the correct commit message for pull request - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Get commit message - id: commit_message - run: | - set -xe - COMMIT_MSG=$(git log --no-merges -1 --oneline) - echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT - echo github.ref ${{ github.ref }} - pure-wheels-sdist: - name: Pure wheels and sdist distribution (${{ matrix.directory }}) - runs-on: ubuntu-latest - needs: get_commit_message - if: >- - contains(needs.get_commit_message.outputs.message, '[wheel build]') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '02 - Wheel Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main'))) - strategy: - fail-fast: false - matrix: - directory: ['openllm-core', 'openllm-python', 'openllm-client'] - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - fetch-depth: 0 - ref: '${{ inputs.tags }}' - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - with: - bentoml-version: 'main' - python-version-file: .python-version-default - - name: Build - run: hatch build - working-directory: ${{ matrix.directory }} - - name: Upload artifacts - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 - with: - name: python-artefacts-${{ matrix.directory }} - path: ${{ matrix.directory }}/dist/* - if-no-files-found: error - check-download-artefacts: - name: dry-run for downloading artefacts - if: github.event_name == 'pull_request' - needs: [pure-wheels-sdist] - runs-on: ubuntu-latest - steps: - - name: Download Python artifacts - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: python-artefacts-* - merge-multiple: true - path: dist - - name: dry ls - run: ls -rthlaR - push-nightly: - name: Push nightly wheels - if: ${{ !github.event.repository.fork && github.event_name == 'push' }} - runs-on: ubuntu-latest - permissions: - id-token: write - needs: [pure-wheels-sdist] - steps: - - name: Download Python artifacts - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: python-artefacts-* - merge-multiple: true - path: dist - - name: Publish nightly wheels to test.pypi.org - uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ - sucess-push: # https://github.com/marketplace/actions/alls-green#why - if: github.event_name == 'push' - needs: [push-nightly, pure-wheels-sdist] - runs-on: ubuntu-latest - outputs: - success: ${{ steps.everygreen.outputs.success }} - steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 - id: evergreen - with: - jobs: ${{ toJSON(needs) }} - sucess-pr: # https://github.com/marketplace/actions/alls-green#why - if: github.event_name == 'pull_request' - needs: [pure-wheels-sdist] - runs-on: ubuntu-latest - outputs: - success: ${{ steps.everygreen.outputs.success }} - steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 - id: evergreen - with: - jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 1d1e898d..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: CI -on: - workflow_call: - push: - branches: [main] - pull_request: - branches: [main] -env: - PAPERSPACE_API_KEY: ${{secrets.PAPERSPACE_API_KEY}} - PAPERSPACE_NETWORK_ID: ${{secrets.PAPERSPACE_NETWORK_ID}} -defaults: - run: - shell: bash --noprofile --norc -eo pipefail {0} -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: true -jobs: - tests: - runs-on: ubuntu-latest - if: ${{ github.event_name == 'pull_request' || github.event_name == 'push'|| github.event_name == 'workflow_call' }} - name: General API tests - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4 - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5 - with: - python-version-file: '.python-version-default' - - name: setup tooling - run: | - python -m pip install uv - uv pip install --system httpx orjson - - name: startup machine - id: paperspace-machine - run: | - PUBLIC_IP=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-ip) - MACHINE_ID=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-id) - echo "::add-mask::$PUBLIC_IP" - echo "::add-mask::$MACHINE_ID" - echo "publicIp=$PUBLIC_IP" >> $GITHUB_OUTPUT - echo "machineId=$MACHINE_ID" >> $GITHUB_OUTPUT - - name: running regression tests (PR) - uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3 - if: github.event_name == 'pull_request' - with: - host: ${{ steps.paperspace-machine.outputs.publicIp }} - username: paperspace - key: ${{ secrets.PAPERSPACE_SSH_KEY }} - port: ${{ secrets.PAPERSPACE_PORT }} - command_timeout: 1h - script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --refresh -- --pr ${{github.event.number}} - - name: running regression tests (main) - uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3 - if: ${{ !github.event.repository.fork && github.event_name == 'push' }} - with: - host: ${{ steps.paperspace-machine.outputs.publicIp }} - username: paperspace - key: ${{ secrets.PAPERSPACE_SSH_KEY }} - port: ${{ secrets.PAPERSPACE_PORT }} - command_timeout: 1h - script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --accept-flake-config --refresh -- --head - - name: shutdown machine - run: python tools/machines.py --delete ${{ steps.paperspace-machine.outputs.machineId }} - evergreen: # https://github.com/marketplace/actions/alls-green#why - if: always() - needs: - - tests - runs-on: ubuntu-latest - steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 - with: - jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml deleted file mode 100644 index 97a351da..00000000 --- a/.github/workflows/create-releases.yml +++ /dev/null @@ -1,114 +0,0 @@ -name: release -on: - workflow_dispatch: - inputs: - release_type: - description: 'Release type' - required: true - type: choice - default: 'patch' - options: - - major - - minor - - patch - - alpha -defaults: - run: - shell: bash --noprofile --norc -exo pipefail {0} -env: - LINES: 120 - COLUMNS: 120 - OPENLLM_DO_NOT_TRACK: True - PYTHONUNBUFFERED: '1' - HATCH_VERBOSE: 2 -jobs: - release: - if: github.repository_owner == 'bentoml' - runs-on: ubuntu-latest - permissions: - contents: write - id-token: write - outputs: - version: ${{ steps.version.outputs.version }} - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - with: - bentoml-version: 'main' - python-version-file: .python-version-default - - name: Import bot's GPG key for signing commits - id: import-gpg - uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6 - with: - gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} - passphrase: ${{ secrets.GPG_PASSPHRASE }} - git_config_global: true - git_user_signingkey: true - git_commit_gpgsign: true - git_tag_gpgsign: true - - name: Create a release - env: - GIT_AUTHOR_NAME: ${{ steps.import-gpg.outputs.name }} - GIT_AUTHOR_EMAIL: ${{ steps.import-gpg.outputs.email }} - GIT_COMMITTER_NAME: ${{ steps.import-gpg.outputs.name }} - GIT_COMMITTER_EMAIL: ${{ steps.import-gpg.outputs.email }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: |- - pip install ghapi inflection tomlkit orjson - ./.github/actions/release.sh --release ${{ inputs.release_type }} - - name: Set project version - id: version - run: |- - version="$(git describe --tags "$(git rev-list --tags --max-count=1)")" - echo "Release version: $version" - echo "version=$version" >> $GITHUB_OUTPUT - build-pypi: - if: github.repository_owner == 'bentoml' - needs: - - release - name: Compile PyPI distribution for OpenLLM - uses: bentoml/OpenLLM/.github/workflows/build-pypi.yml@main # ratchet:exclude - with: - tags: ${{ needs.release.outputs.version }} - publish-python: - needs: - - build-pypi - runs-on: ubuntu-latest - permissions: - id-token: write - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - ref: '${{ needs.release.outputs.version }}' - token: ${{ secrets.GITHUB_TOKEN }} - # NOTE: Keep this section in sync with build-pypi.yml - - name: Download Python artifacts - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: python-artefacts-* - merge-multiple: true - path: dist - - name: Smoke test compiled artefacts - run: ls -R dist - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1 - with: - print-hash: true - binary-distribution: - if: github.repository_owner == 'bentoml' - needs: publish-python - name: Create binary/wheels distribution - uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main # ratchet:exclude - release-notes: - if: github.repository_owner == 'bentoml' - needs: - - release - - publish-python - - binary-distribution - name: Create release notes and setup for next cycle - uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@main # ratchet:exclude - with: - tags: ${{ needs.release.outputs.version }} diff --git a/.github/workflows/release-notes.yml b/.github/workflows/release-notes.yml deleted file mode 100644 index bfb557b8..00000000 --- a/.github/workflows/release-notes.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: release-notes -on: - workflow_dispatch: - inputs: - tags: - description: 'Tag for releases' - required: true - type: string - workflow_call: - inputs: - tags: - required: true - type: string -env: - HATCH_VERBOSE: 2 -defaults: - run: - shell: bash --noprofile --norc -exo pipefail {0} -jobs: - gen-release-notes: - if: github.repository_owner == 'bentoml' - runs-on: ubuntu-latest - permissions: - contents: write - id-token: write - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7 - with: - fetch-depth: 0 - ref: '${{ inputs.tags }}' - - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1 - with: - bentoml-version: 'main' - python-version-file: .python-version-default - - name: Create release notes - run: ./.github/actions/create_release_and_archive.sh ${{ inputs.tags }} - - name: Download Python artifacts - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: python-artefacts-* - merge-multiple: true - path: dist - - name: Download binaries - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: standalone-* - merge-multiple: true - path: archives - - name: Download standalone MacOS - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 - with: - pattern: staged-* - merge-multiple: true - path: archives - - name: Create release - uses: softprops/action-gh-release@a74c6b72af54cfa997e81df42d94703d6313a2d0 # ratchet:softprops/action-gh-release@v2.0.6 - with: - # Use GH feature to populate the changelog automatically - generate_release_notes: true - body_path: release_notes.txt - fail_on_unmatched_files: true - tag_name: '${{ inputs.tags }}' - # TODO: add installers/* once windows support is back - files: |- - dist/* - archives/* diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml new file mode 100644 index 00000000..6e7a0e87 --- /dev/null +++ b/.github/workflows/releases.yml @@ -0,0 +1,43 @@ +name: release +on: + push: + tags: + - "*" +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version-file: .python-version-default + - name: Build + run: pipx run build + - name: Upload artifacts + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4 + with: + name: python-artefacts-openllm + path: dist/* + if-no-files-found: error + release: + if: github.repository_owner == 'bentoml' + needs: + - build + runs-on: ubuntu-latest + name: Release + permissions: + id-token: write + steps: + - name: Download Python artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4 + with: + pattern: python-artefacts-* + merge-multiple: true + path: dist + - name: dry ls + run: ls -rthlaR + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1 + with: + print-hash: true diff --git a/.gitignore b/.gitignore index 6d6b7cc7..19d0c252 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,6 @@ cython_debug/ *.whl # Environments venv/ +.envrc +.python-version +_version.py diff --git a/.npmrc b/.npmrc deleted file mode 100644 index a347224d..00000000 --- a/.npmrc +++ /dev/null @@ -1,7 +0,0 @@ -strict-peer-dependencies=false -shell-emulator=true -engine-strict=true -save-exact=true -unsafe-perm=true -prefer-offline=true -prefer-workspace-packages=true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f08c2a1..7ea3417f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,12 +31,7 @@ repos: - id: editorconfig-checker verbose: true alias: ec - types_or: [python, javascript] - exclude: | - (?x)^( - openllm-python/src/openllm_cli/entrypoint.py | - openllm-python/src/_openllm_tiny/_entrypoint.py - )$ + types_or: [python] - repo: meta hooks: - id: check-hooks-apply @@ -55,11 +50,3 @@ repos: - id: check-added-large-files - id: debug-statements - id: check-merge-conflict - - repo: https://github.com/econchick/interrogate - rev: 1.7.0 - hooks: - - id: interrogate - verbose: true - types: [python] - exclude: ^(docs|tools|openllm-python/tests) - args: [--config=pyproject.toml] diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index 9edc7743..00000000 --- a/.prettierrc +++ /dev/null @@ -1,9 +0,0 @@ -{ - "printWidth": 119, - "quoteProps": "as-needed", - "trailingComma": "none", - "useTabs": false, - "tabWidth": 2, - "semi": false, - "singleQuote": true -} diff --git a/.python-version b/.python-version new file mode 120000 index 00000000..85bfe7cb --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +/Users/aarnphm/workspace/bentoml/projects/openllm/upstream/.python-version-default \ No newline at end of file diff --git a/.ruff.toml b/.ruff.toml index 30f720d6..1b7d8ddf 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,4 +1,3 @@ -exclude = ["tools", "examples", "openllm-python/src/openllm_cli/playground/"] extend-include = ["*.ipynb"] preview = true line-length = 119 @@ -12,38 +11,35 @@ skip-magic-trailing-comma = true docstring-code-format = true [lint] -typing-modules = [ - "openllm_core._typing_compat", - "openllm_client._typing_compat", -] ignore = [ - "RUF012", - "ANN", # Mypy is better at this + "RUF012", + "ANN", # Mypy is better at this + "E722", ] select = [ - "F", - "G", # flake8-logging-format - "PERF", # perflint - "RUF", # Ruff-specific rules - "W6", - "E71", - "E72", - "E112", - "E113", - # "E124", - "E203", - "E272", - # "E303", - # "E304", - # "E501", - # "E502", - "E702", - "E703", - "E731", - "W191", - "W291", - "W293", - "UP039", # unnecessary-class-parentheses + "F", + "G", # flake8-logging-format + "PERF", # perflint + "RUF", # Ruff-specific rules + "W6", + "E71", + "E72", + "E112", + "E113", + # "E124", + "E203", + "E272", + # "E303", + # "E304", + # "E501", + # "E502", + "E702", + "E703", + "E731", + "W191", + "W291", + "W293", + "UP039", # unnecessary-class-parentheses ] [lint.pydocstyle] diff --git a/.taplo.toml b/.taplo.toml deleted file mode 100644 index 9b3a9b62..00000000 --- a/.taplo.toml +++ /dev/null @@ -1,2 +0,0 @@ -[formatting] -indent_string = " " diff --git a/CHANGELOG.md b/CHANGELOG.md index 98746cac..12c3f352 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ This changelog is managed by towncrier and is compiled at release time. +## [0.6.0](https://github.com/bentoml/openllm/tree/v0.6.0) +No significant changes. + + ## [0.5.7](https://github.com/bentoml/openllm/tree/v0.5.7) No significant changes. diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 87459002..9d99305c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -44,6 +44,12 @@ Before you can start developing, you'll need to set up your environment: git branch --set-upstream-to=upstream/main ``` +6. (Optional) Link `.python-version-default` to `.python-version`: + + ```bash + ln .python-version-default .python-version + ``` + ## Development Workflow There are a few ways to contribute to the repository structure for OpenLLM: diff --git a/README.md b/README.md index a2c87ee6..97be2f65 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,6 @@ OpenLLM helps developers **run any open-source LLMs**, such as Llama 2 and Mistr
- - ## 💾 TL/DR For starter, we provide two ways to quickly try out OpenLLM: @@ -98,7 +96,6 @@ OpenLLM currently supports the following models. By default, OpenLLM doesn't inc Baichuan - ### Quickstart Run the following command to quickly spin up a Baichuan server: @@ -106,23 +103,21 @@ Run the following command to quickly spin up a Baichuan server: ```bash openllm start baichuan-inc/baichuan-7b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Baichuan variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=baichuan) to see more Baichuan-compatible models. - - ### Supported models You can specify any of the following Baichuan models via `openllm start`: - - [baichuan-inc/baichuan2-7b-base](https://huggingface.co/baichuan-inc/baichuan2-7b-base) - [baichuan-inc/baichuan2-7b-chat](https://huggingface.co/baichuan-inc/baichuan2-7b-chat) - [baichuan-inc/baichuan2-13b-base](https://huggingface.co/baichuan-inc/baichuan2-13b-base) @@ -134,7 +129,6 @@ You can specify any of the following Baichuan models via `openllm start`: ChatGLM - ### Quickstart Run the following command to quickly spin up a ChatGLM server: @@ -142,23 +136,21 @@ Run the following command to quickly spin up a ChatGLM server: ```bash openllm start thudm/chatglm-6b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any ChatGLM variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=chatglm) to see more ChatGLM-compatible models. - - ### Supported models You can specify any of the following ChatGLM models via `openllm start`: - - [thudm/chatglm-6b](https://huggingface.co/thudm/chatglm-6b) - [thudm/chatglm-6b-int8](https://huggingface.co/thudm/chatglm-6b-int8) - [thudm/chatglm-6b-int4](https://huggingface.co/thudm/chatglm-6b-int4) @@ -172,7 +164,6 @@ You can specify any of the following ChatGLM models via `openllm start`: Cohere - ### Quickstart Run the following command to quickly spin up a Cohere server: @@ -180,23 +171,21 @@ Run the following command to quickly spin up a Cohere server: ```bash openllm start CohereForAI/c4ai-command-r-plus --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Cohere variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=commandr) to see more Cohere-compatible models. - - ### Supported models You can specify any of the following Cohere models via `openllm start`: - - [CohereForAI/c4ai-command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus) - [CohereForAI/c4ai-command-r-v01](https://huggingface.co/CohereForAI/c4ai-command-r-v01) @@ -206,7 +195,6 @@ You can specify any of the following Cohere models via `openllm start`: Dbrx - ### Quickstart Run the following command to quickly spin up a Dbrx server: @@ -214,23 +202,21 @@ Run the following command to quickly spin up a Dbrx server: ```bash openllm start databricks/dbrx-instruct --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Dbrx variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dbrx) to see more Dbrx-compatible models. - - ### Supported models You can specify any of the following Dbrx models via `openllm start`: - - [databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct) - [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base) @@ -240,7 +226,6 @@ You can specify any of the following Dbrx models via `openllm start`: DollyV2 - ### Quickstart Run the following command to quickly spin up a DollyV2 server: @@ -248,23 +233,21 @@ Run the following command to quickly spin up a DollyV2 server: ```bash openllm start databricks/dolly-v2-3b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any DollyV2 variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dolly_v2) to see more DollyV2-compatible models. - - ### Supported models You can specify any of the following DollyV2 models via `openllm start`: - - [databricks/dolly-v2-3b](https://huggingface.co/databricks/dolly-v2-3b) - [databricks/dolly-v2-7b](https://huggingface.co/databricks/dolly-v2-7b) - [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b) @@ -275,7 +258,6 @@ You can specify any of the following DollyV2 models via `openllm start`: Falcon - ### Quickstart Run the following command to quickly spin up a Falcon server: @@ -283,23 +265,21 @@ Run the following command to quickly spin up a Falcon server: ```bash openllm start tiiuae/falcon-7b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Falcon variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=falcon) to see more Falcon-compatible models. - - ### Supported models You can specify any of the following Falcon models via `openllm start`: - - [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) - [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b) - [tiiuae/falcon-7b-instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) @@ -311,7 +291,6 @@ You can specify any of the following Falcon models via `openllm start`: Gemma - ### Quickstart Run the following command to quickly spin up a Gemma server: @@ -319,23 +298,21 @@ Run the following command to quickly spin up a Gemma server: ```bash openllm start google/gemma-7b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Gemma variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=gemma) to see more Gemma-compatible models. - - ### Supported models You can specify any of the following Gemma models via `openllm start`: - - [google/gemma-7b](https://huggingface.co/google/gemma-7b) - [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it) - [google/gemma-2b](https://huggingface.co/google/gemma-2b) @@ -347,7 +324,6 @@ You can specify any of the following Gemma models via `openllm start`: GPTNeoX - ### Quickstart Run the following command to quickly spin up a GPTNeoX server: @@ -355,23 +331,21 @@ Run the following command to quickly spin up a GPTNeoX server: ```bash openllm start eleutherai/gpt-neox-20b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any GPTNeoX variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=gpt_neox) to see more GPTNeoX-compatible models. - - ### Supported models You can specify any of the following GPTNeoX models via `openllm start`: - - [eleutherai/gpt-neox-20b](https://huggingface.co/eleutherai/gpt-neox-20b) @@ -380,7 +354,6 @@ You can specify any of the following GPTNeoX models via `openllm start`: Llama - ### Quickstart Run the following command to quickly spin up a Llama server: @@ -388,23 +361,21 @@ Run the following command to quickly spin up a Llama server: ```bash openllm start NousResearch/llama-2-7b-hf --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Llama variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=llama) to see more Llama-compatible models. - - ### Supported models You can specify any of the following Llama models via `openllm start`: - - [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) - [meta-llama/Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) - [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) @@ -424,7 +395,6 @@ You can specify any of the following Llama models via `openllm start`: Mistral - ### Quickstart Run the following command to quickly spin up a Mistral server: @@ -432,23 +402,21 @@ Run the following command to quickly spin up a Mistral server: ```bash openllm start mistralai/Mistral-7B-Instruct-v0.1 --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Mistral variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mistral) to see more Mistral-compatible models. - - ### Supported models You can specify any of the following Mistral models via `openllm start`: - - [HuggingFaceH4/zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha) - [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) - [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) @@ -461,7 +429,6 @@ You can specify any of the following Mistral models via `openllm start`: Mixtral - ### Quickstart Run the following command to quickly spin up a Mixtral server: @@ -469,23 +436,21 @@ Run the following command to quickly spin up a Mixtral server: ```bash openllm start mistralai/Mixtral-8x7B-Instruct-v0.1 --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Mixtral variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mixtral) to see more Mixtral-compatible models. - - ### Supported models You can specify any of the following Mixtral models via `openllm start`: - - [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) - [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) @@ -495,7 +460,6 @@ You can specify any of the following Mixtral models via `openllm start`: MPT - ### Quickstart Run the following command to quickly spin up a MPT server: @@ -503,23 +467,21 @@ Run the following command to quickly spin up a MPT server: ```bash openllm start mosaicml/mpt-7b-instruct --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any MPT variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mpt) to see more MPT-compatible models. - - ### Supported models You can specify any of the following MPT models via `openllm start`: - - [mosaicml/mpt-7b](https://huggingface.co/mosaicml/mpt-7b) - [mosaicml/mpt-7b-instruct](https://huggingface.co/mosaicml/mpt-7b-instruct) - [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat) @@ -534,7 +496,6 @@ You can specify any of the following MPT models via `openllm start`: OPT - ### Quickstart Run the following command to quickly spin up a OPT server: @@ -542,23 +503,21 @@ Run the following command to quickly spin up a OPT server: ```bash openllm start facebook/opt-1.3b ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any OPT variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=opt) to see more OPT-compatible models. - - ### Supported models You can specify any of the following OPT models via `openllm start`: - - [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) - [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) - [facebook/opt-1.3b](https://huggingface.co/facebook/opt-1.3b) @@ -572,7 +531,6 @@ You can specify any of the following OPT models via `openllm start`: Phi - ### Quickstart Run the following command to quickly spin up a Phi server: @@ -580,23 +538,21 @@ Run the following command to quickly spin up a Phi server: ```bash openllm start microsoft/Phi-3-mini-4k-instruct --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Phi variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=phi) to see more Phi-compatible models. - - ### Supported models You can specify any of the following Phi models via `openllm start`: - - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) - [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) - [microsoft/Phi-3-small-8k-instruct](https://huggingface.co/microsoft/Phi-3-small-8k-instruct) @@ -610,7 +566,6 @@ You can specify any of the following Phi models via `openllm start`: Qwen - ### Quickstart Run the following command to quickly spin up a Qwen server: @@ -618,23 +573,21 @@ Run the following command to quickly spin up a Qwen server: ```bash openllm start qwen/Qwen-7B-Chat --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Qwen variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=qwen) to see more Qwen-compatible models. - - ### Supported models You can specify any of the following Qwen models via `openllm start`: - - [qwen/Qwen-7B-Chat](https://huggingface.co/qwen/Qwen-7B-Chat) - [qwen/Qwen-7B-Chat-Int8](https://huggingface.co/qwen/Qwen-7B-Chat-Int8) - [qwen/Qwen-7B-Chat-Int4](https://huggingface.co/qwen/Qwen-7B-Chat-Int4) @@ -648,7 +601,6 @@ You can specify any of the following Qwen models via `openllm start`: StableLM - ### Quickstart Run the following command to quickly spin up a StableLM server: @@ -656,23 +608,21 @@ Run the following command to quickly spin up a StableLM server: ```bash openllm start stabilityai/stablelm-tuned-alpha-3b --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any StableLM variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=stablelm) to see more StableLM-compatible models. - - ### Supported models You can specify any of the following StableLM models via `openllm start`: - - [stabilityai/stablelm-tuned-alpha-3b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b) - [stabilityai/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b) - [stabilityai/stablelm-base-alpha-3b](https://huggingface.co/stabilityai/stablelm-base-alpha-3b) @@ -684,7 +634,6 @@ You can specify any of the following StableLM models via `openllm start`: StarCoder - ### Quickstart Run the following command to quickly spin up a StarCoder server: @@ -692,23 +641,21 @@ Run the following command to quickly spin up a StarCoder server: ```bash openllm start bigcode/starcoder --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any StarCoder variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=starcoder) to see more StarCoder-compatible models. - - ### Supported models You can specify any of the following StarCoder models via `openllm start`: - - [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) - [bigcode/starcoderbase](https://huggingface.co/bigcode/starcoderbase) @@ -718,7 +665,6 @@ You can specify any of the following StarCoder models via `openllm start`: Yi - ### Quickstart Run the following command to quickly spin up a Yi server: @@ -726,23 +672,21 @@ Run the following command to quickly spin up a Yi server: ```bash openllm start 01-ai/Yi-6B --trust-remote-code ``` + You can run the following code in a different terminal to interact with the server: + ```python import openllm_client client = openllm_client.HTTPClient('http://localhost:3000') client.generate('What are large language models?') ``` - > **Note:** Any Yi variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=yi) to see more Yi-compatible models. - - ### Supported models You can specify any of the following Yi models via `openllm start`: - - [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) - [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B) - [01-ai/Yi-6B-200K](https://huggingface.co/01-ai/Yi-6B-200K) @@ -830,8 +774,6 @@ async for it in llm.astream('What is the difference between a duck and a goose? print(it, flush=True, end='') ``` - - ## 🚀 Deploying models to production diff --git a/hatch.toml b/hatch.toml deleted file mode 100644 index 0e81085e..00000000 --- a/hatch.toml +++ /dev/null @@ -1,84 +0,0 @@ -[envs.default] -installer = "uv" -type = "virtual" -dependencies = [ - "openllm-core @ {root:uri}/openllm-core", - "openllm-client @ {root:uri}/openllm-client", - "openllm[chatglm,fine-tune] @ {root:uri}/openllm-python", - # NOTE: To run all hooks - "pre-commit", - # NOTE: towncrier for changelog - "towncrier", - "tomlkit", - # NOTE: For fancy PyPI readme - "hatch-fancy-pypi-readme", - # NOTE: For working with shell pipe - "plumbum", - # The below sync with mypyc deps and pre-commit mypy - "types-psutil", - "types-tabulate", - "types-PyYAML", - "types-protobuf", -] -[envs.default.scripts] -changelog = "towncrier build --version main --draft" -inplace-changelog = "towncrier build --version main --keep" -setup = [ - "pre-commit install", - "- ln -s .python-version-default .python-version", -] -quality = [ - "bash ./local.sh --tools", - "- pre-commit run --all-files", - "- pnpm format", -] -tool = ["quality", "bash ./clean.sh", 'python ./cz.py'] -[envs.tests] -installer = "uv" -type = "virtual" -dependencies = [ - "openllm @ {root:uri}/openllm-python", - "openllm-core @ {root:uri}/openllm-core", - "openllm-client @ {root:uri}/openllm-client", - # NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy - "coverage[toml]>=6.5", - "filelock>=3.7.1", - "pytest", - "pytest-cov", - "pytest-mock", - "pytest-randomly", - "pytest-rerunfailures", - "pytest-asyncio>=0.21.0", - "pytest-xdist[psutil]", - "trustme", - "hypothesis", - "syrupy", -] -skip-install = false -template = "tests" -[envs.tests.scripts] -_run_script = "pytest --cov --cov-report={env:COVERAGE_REPORT:term-missing} --cov-config=pyproject.toml -vv" -python = "_run_script -r aR -x {args:openllm-python/tests}" -[envs.tests.overrides] -env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT=" -[envs.coverage] -dependencies = ["coverage[toml]>=6.5", "lxml", "orjson"] -detached = true -[envs.coverage.scripts] -combine = "coverage combine {args}" -generate-summary = "python tools/generate-coverage.py" -report-uncovered-html = "coverage html --skip-covered --skip-empty {args}" -report-xml = "coverage xml {args}" -write-summary-report = "python tools/write-coverage-report.py" -[envs.ui] -detached = true -[envs.ui.scripts] -server = "openllm start {args:facebook/opt-250m} --working-dir {root:uri} --cors --debug" -[envs.ci] -detached = true -[envs.ci.scripts] -client-stubs = "bash openllm-client/generate-grpc-stubs" -compile = "bash ./tools/compile.sh {args}" -recompile = ["bash ./clean.sh", "compile"] -edi = "bash local.sh" -lock = ["bash tools/lock-actions.sh"] diff --git a/openllm_next/__main__.py b/openllm_next/__main__.py deleted file mode 100644 index 51beec14..00000000 --- a/openllm_next/__main__.py +++ /dev/null @@ -1,338 +0,0 @@ -import os -import random -import sys -from collections import defaultdict -from typing import Annotated, Optional - -import questionary -import typer - -from openllm_next.accelerator_spec import ( - DeploymentTarget, - can_run, - get_local_machine_spec, -) -from openllm_next.analytic import DO_NOT_TRACK, OpenLLMTyper -from openllm_next.clean import app as clean_app -from openllm_next.cloud import deploy as cloud_deploy -from openllm_next.cloud import ensure_cloud_context, get_cloud_machine_spec -from openllm_next.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output -from openllm_next.local import run as local_run -from openllm_next.local import serve as local_serve -from openllm_next.model import app as model_app -from openllm_next.model import ensure_bento, list_bento -from openllm_next.repo import app as repo_app - -app = OpenLLMTyper( - help="`openllm hello` to get started. " - "OpenLLM is a CLI tool to manage and deploy open source LLMs and" - " get an OpenAI API compatible chat server in seconds.", -) - -app.add_typer(repo_app, name="repo") -app.add_typer(model_app, name="model") -app.add_typer(clean_app, name="clean") - - -def _select_bento_name(models, target): - from tabulate import tabulate - - options = [] - model_infos = [ - [model.repo.name, model.name, can_run(model, target)] for model in models - ] - model_name_groups = defaultdict(lambda: 0) - for repo, name, score in model_infos: - model_name_groups[(repo, name)] += score - table_data = [ - [name, repo, CHECKED if score > 0 else ""] - for (repo, name), score in model_name_groups.items() - ] - if not table_data: - output("No model found", style="red") - raise typer.Exit(1) - table = tabulate( - table_data, - headers=["model", "repo", "locally runnable"], - ).split("\n") - headers = f"{table[0]}\n {table[1]}" - - options.append(questionary.Separator(headers)) - for table_data, table_line in zip(table_data, table[2:]): - options.append(questionary.Choice(table_line, value=table_data[:2])) - selected = questionary.select("Select a model", options).ask() - if selected is None: - raise typer.Exit(1) - return selected - - -def _select_bento_version(models, target, bento_name, repo): - from tabulate import tabulate - - model_infos = [ - [model, can_run(model, target)] - for model in models - if model.name == bento_name and model.repo.name == repo - ] - - table_data = [ - [model.tag, CHECKED if score > 0 else ""] - for model, score in model_infos - if model.name == bento_name and model.repo.name == repo - ] - if not table_data: - output(f"No model found for {bento_name} in {repo}", style="red") - raise typer.Exit(1) - table = tabulate( - table_data, - headers=["version", "locally runnable"], - ).split("\n") - - options = [] - options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) - for table_data, table_line in zip(model_infos, table[2:]): - options.append(questionary.Choice(table_line, value=table_data)) - selected = questionary.select("Select a version", options).ask() - if selected is None: - raise typer.Exit(1) - return selected - - -def _select_target(bento, targets): - from tabulate import tabulate - - options = [] - targets.sort(key=lambda x: can_run(bento, x), reverse=True) - if not targets: - output( - "No available instance type, check your bentocloud account", - style="red", - ) - raise typer.Exit(1) - - table = tabulate( - [ - [ - target.name, - target.accelerators_repr, - f"${target.price}", - CHECKED if can_run(bento, target) else "insufficient res.", - ] - for target in targets - ], - headers=["instance type", "accelerator", "price/hr", "deployable"], - ).split("\n") - options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) - - for target, line in zip(targets, table[2:]): - options.append( - questionary.Choice( - f"{line}", - value=target, - ) - ) - selected = questionary.select("Select an instance type", options).ask() - if selected is None: - raise typer.Exit(1) - return selected - - -def _select_action(bento, score): - if score > 0: - options = [ - questionary.Separator("Available actions"), - questionary.Choice( - "0. Run the model in terminal", - value="run", - shortcut_key="0", - ), - questionary.Separator(f" $ openllm run {bento}"), - questionary.Separator(" "), - questionary.Choice( - "1. Serve the model locally and get a chat server", - value="serve", - shortcut_key="1", - ), - questionary.Separator(f" $ openllm serve {bento}"), - questionary.Separator(" "), - questionary.Choice( - "2. Deploy the model to bentocloud and get a scalable chat server", - value="deploy", - shortcut_key="2", - ), - questionary.Separator(f" $ openllm deploy {bento}"), - ] - else: - options = [ - questionary.Separator("Available actions"), - questionary.Choice( - "0. Run the model in terminal", - value="run", - disabled="insufficient res.", - shortcut_key="0", - ), - questionary.Separator(f" $ openllm run {bento}"), - questionary.Separator(" "), - questionary.Choice( - "1. Serve the model locally and get a chat server", - value="serve", - disabled="insufficient res.", - shortcut_key="1", - ), - questionary.Separator(f" $ openllm serve {bento}"), - questionary.Separator(" "), - questionary.Choice( - "2. Deploy the model to bentocloud and get a scalable chat server", - value="deploy", - shortcut_key="2", - ), - questionary.Separator(f" $ openllm deploy {bento}"), - ] - action = questionary.select("Select an action", options).ask() - if action is None: - raise typer.Exit(1) - if action == "run": - try: - local_run(bento) - finally: - output("\nUse this command to run the action again:", style="green") - output(f" $ openllm run {bento}", style="orange") - elif action == "serve": - try: - local_serve(bento) - finally: - output("\nUse this command to run the action again:", style="green") - output(f" $ openllm serve {bento}", style="orange") - elif action == "deploy": - ensure_cloud_context() - targets = get_cloud_machine_spec() - target = _select_target(bento, targets) - try: - cloud_deploy(bento, target) - finally: - output("\nUse this command to run the action again:", style="green") - output( - f" $ openllm deploy {bento} --instance-type {target.name}", - style="orange", - ) - - -@app.command(help="get started interactively") -def hello(): - INTERACTIVE.set(True) - VERBOSE_LEVEL.set(20) - - target = get_local_machine_spec() - output(f" Detected Platform: {target.platform}", style="green") - if target.accelerators: - output(" Detected Accelerators: ", style="green") - for a in target.accelerators: - output(f" - {a.model} {a.memory_size}GB", style="green") - else: - output(" Detected Accelerators: None", style="yellow") - - models = list_bento() - if not models: - output( - "No model found, you probably need to update the model repo:", - style="red", - ) - output( - " $ openllm repo update", - style="orange", - ) - raise typer.Exit(1) - - bento_name, repo = _select_bento_name(models, target) - bento, score = _select_bento_version(models, target, bento_name, repo) - _select_action(bento, score) - - -@app.command(help="start an OpenAI API compatible chat server and chat in browser") -def serve( - model: Annotated[str, typer.Argument()] = "", - repo: Optional[str] = None, - port: int = 3000, - verbose: bool = False, -): - if verbose: - VERBOSE_LEVEL.set(20) - target = get_local_machine_spec() - bento = ensure_bento(model, target=target, repo_name=repo) - local_serve(bento, port=port) - - -@app.command(help="run the model and chat in terminal") -def run( - model: Annotated[str, typer.Argument()] = "", - repo: Optional[str] = None, - port: Optional[int] = None, - timeout: int = 600, - verbose: bool = False, -): - if verbose: - VERBOSE_LEVEL.set(20) - target = get_local_machine_spec() - bento = ensure_bento(model, target=target, repo_name=repo) - if port is None: - port = random.randint(30000, 40000) - local_run(bento, port=port, timeout=timeout) - - -@app.command( - help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)", -) -def deploy( - model: Annotated[str, typer.Argument()] = "", - instance_type: Optional[str] = None, - repo: Optional[str] = None, - verbose: bool = False, -): - if verbose: - VERBOSE_LEVEL.set(20) - bento = ensure_bento(model, repo_name=repo) - if instance_type is not None: - cloud_deploy(bento, DeploymentTarget(name=instance_type)) - return - targets = get_cloud_machine_spec() - targets = filter(lambda x: can_run(bento, x) > 0, targets) - targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True) - if not targets: - output( - "No available instance type, check your bentocloud account", - style="red", - ) - raise typer.Exit(1) - target = targets[0] - output( - f"Recommended instance type: {target.name}", - style="green", - ) - cloud_deploy(bento, target) - - -@app.callback(invoke_without_command=True) -def typer_callback( - verbose: int = 0, - do_not_track: bool = typer.Option( - False, - "--do-not-track", - help="Whether to disable usage tracking", - envvar=DO_NOT_TRACK, - ), -): - if verbose: - VERBOSE_LEVEL.set(verbose) - if do_not_track: - os.environ[DO_NOT_TRACK] = str(True) - - -def main(): - if sys.version_info < (3, 9): - output("Python 3.8 or higher is required", style="red") - sys.exit(1) - app() - - -if __name__ == "__main__": - main() diff --git a/openllm_next/accelerator_spec.py b/openllm_next/accelerator_spec.py deleted file mode 100644 index 81b7d380..00000000 --- a/openllm_next/accelerator_spec.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import annotations - -import functools -import math -import typing -from types import SimpleNamespace - -import psutil - -from openllm_next.common import BentoInfo, DeploymentTarget, output - - -class Accelerator(SimpleNamespace): - model: str - memory_size: float - - def __gt__(self, other): - return self.memory_size > other.memory_size - - def __eq__(self, other): - return self.memory_size == other.memory_size - - def __repr__(self): - return f"{self.model}({self.memory_size}GB)" - - -class Resource(SimpleNamespace): - cpu: int = 0 - memory: float - gpu: int = 0 - gpu_type: str = "" - - def __hash__(self): - return hash((self.cpu, self.memory, self.gpu, self.gpu_type)) - - def __bool__(self): - return any(value is not None for value in self.__dict__.values()) - - -ACCELERATOR_SPEC_DICT: dict[str, dict] = { - "nvidia-gtx-1650": {"model": "GTX 1650", "memory_size": 4.0}, - "nvidia-gtx-1060": {"model": "GTX 1060", "memory_size": 6.0}, - "nvidia-gtx-1080-ti": {"model": "GTX 1080 Ti", "memory_size": 11.0}, - "nvidia-rtx-3060": {"model": "RTX 3060", "memory_size": 12.0}, - "nvidia-rtx-3060-ti": {"model": "RTX 3060 Ti", "memory_size": 8.0}, - "nvidia-rtx-3070-ti": {"model": "RTX 3070 Ti", "memory_size": 8.0}, - "nvidia-rtx-3080": {"model": "RTX 3080", "memory_size": 10.0}, - "nvidia-rtx-3080-ti": {"model": "RTX 3080 Ti", "memory_size": 12.0}, - "nvidia-rtx-3090": {"model": "RTX 3090", "memory_size": 24.0}, - "nvidia-rtx-4070-ti": {"model": "RTX 4070 Ti", "memory_size": 12.0}, - "nvidia-tesla-p4": {"model": "P4", "memory_size": 8.0}, - "nvidia-tesla-p100": {"model": "P100", "memory_size": 16.0}, - "nvidia-tesla-k80": {"model": "K80", "memory_size": 12.0}, - "nvidia-tesla-t4": {"model": "T4", "memory_size": 16.0}, - "nvidia-tesla-v100": {"model": "V100", "memory_size": 16.0}, - "nvidia-l4": {"model": "L4", "memory_size": 24.0}, - "nvidia-tesla-l4": {"model": "L4", "memory_size": 24.0}, - "nvidia-tesla-a10g": {"model": "A10G", "memory_size": 24.0}, - "nvidia-a100-80g": {"model": "A100", "memory_size": 80.0}, - "nvidia-a100-80gb": {"model": "A100", "memory_size": 80.0}, - "nvidia-tesla-a100": {"model": "A100", "memory_size": 40.0}, -} - - -ACCELERATOR_SPECS: dict[str, Accelerator] = { - key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items() -} - - -@functools.lru_cache -def get_local_machine_spec(): - if psutil.MACOS: - return DeploymentTarget(accelerators=[], source="local", platform="macos") - - if psutil.WINDOWS: - platform = "windows" - elif psutil.LINUX: - platform = "linux" - else: - raise NotImplementedError(f"Unsupported platform") - - from pynvml import ( - nvmlDeviceGetCount, - nvmlDeviceGetCudaComputeCapability, - nvmlDeviceGetHandleByIndex, - nvmlDeviceGetMemoryInfo, - nvmlDeviceGetName, - nvmlInit, - nvmlShutdown, - ) - - try: - nvmlInit() - device_count = nvmlDeviceGetCount() - accelerators: list[Accelerator] = [] - for i in range(device_count): - handle = nvmlDeviceGetHandleByIndex(i) - name = nvmlDeviceGetName(handle) - memory_info = nvmlDeviceGetMemoryInfo(handle) - accelerators.append( - Accelerator( - model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3) - ) - ) - compute_capability = nvmlDeviceGetCudaComputeCapability(handle) - if compute_capability < (7, 5): - output( - f"GPU {name} with compute capability {compute_capability} " - "may not be supported, 7.5 or higher is recommended. check " - "https://developer.nvidia.com/cuda-gpus for more information", - style="yellow", - ) - nvmlShutdown() - return DeploymentTarget( - accelerators=accelerators, - source="local", - platform=platform, - ) - except Exception as e: - output( - f"Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment", - style="yellow", - ) - output(f"Error: {e}", style="red", level=20) - return DeploymentTarget(accelerators=[], source="local", platform=platform) - - -@functools.lru_cache() -def can_run( - bento: typing.Union[Resource, BentoInfo], - target: typing.Optional[DeploymentTarget] = None, -) -> float: - """ - Calculate if the bento can be deployed on the target. - """ - if target is None: - target = get_local_machine_spec() - - resource_spec = Resource(**(bento.bento_yaml["services"][0]["config"].get("resources", {}))) - labels = bento.bento_yaml.get("labels", {}) - platforms = labels.get("platforms", "linux").split(",") - - if target.platform not in platforms: - return 0.0 - - # return 1.0 if no resource is specified - if not resource_spec: - return 0.5 - - if resource_spec.gpu > 0: - required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type] - filtered_accelerators = [ - ac - for ac in target.accelerators - if ac.memory_size >= required_gpu.memory_size - ] - if resource_spec.gpu > len(filtered_accelerators): - return 0.0 - return ( - required_gpu.memory_size - * resource_spec.gpu - / sum(ac.memory_size for ac in target.accelerators) - ) - if target.accelerators: - return 0.01 / sum(ac.memory_size for ac in target.accelerators) - return 1.0 diff --git a/openllm_next/analytic.py b/openllm_next/analytic.py deleted file mode 100644 index 0beb4482..00000000 --- a/openllm_next/analytic.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import annotations - -import functools -import os -import re -import time -import typing -from abc import ABC - -import attr -import click -import typer -import typer.core - -DO_NOT_TRACK = "BENTOML_DO_NOT_TRACK" - - -class EventMeta(ABC): - @property - def event_name(self): - # camel case to snake case - event_name = re.sub(r"(? typing.Iterable[str]: - return list(self.commands) - - -class OpenLLMTyper(typer.Typer): - def __init__(self, *args: typing.Any, **kwargs: typing.Any): - no_args_is_help = kwargs.pop("no_args_is_help", True) - context_settings = kwargs.pop("context_settings", {}) - if "help_option_names" not in context_settings: - context_settings["help_option_names"] = ("-h", "--help") - if "max_content_width" not in context_settings: - context_settings["max_content_width"] = int( - os.environ.get("COLUMNS", str(120)) - ) - klass = kwargs.pop("cls", OrderedCommands) - - super().__init__( - *args, - cls=klass, - no_args_is_help=no_args_is_help, - context_settings=context_settings, - **kwargs, - ) - - def command(self, *args: typing.Any, **kwargs: typing.Any): - def decorator(f): - @functools.wraps(f) - @click.pass_context - def wrapped(ctx: click.Context, *args, **kwargs): - from bentoml._internal.utils.analytics import track - - do_not_track = ( - os.environ.get(DO_NOT_TRACK, str(False)).lower() == "true" - ) - - # so we know that the root program is openllm - command_name = ctx.info_name - if ctx.parent.parent is not None: - # openllm model list - command_group = ctx.parent.info_name - elif ctx.parent.info_name == ctx.find_root().info_name: - # openllm run - command_group = "openllm" - - if do_not_track: - return f(*args, **kwargs) - start_time = time.time_ns() - try: - return_value = f(*args, **kwargs) - duration_in_ns = time.time_ns() - start_time - track( - OpenllmCliEvent( - cmd_group=command_group, - cmd_name=command_name, - duration_in_ms=duration_in_ns / 1e6, - ) - ) - return return_value - except BaseException as e: - duration_in_ns = time.time_ns() - start_time - track( - OpenllmCliEvent( - cmd_group=command_group, - cmd_name=command_name, - duration_in_ms=duration_in_ns / 1e6, - error_type=type(e).__name__, - return_code=2 if isinstance(e, KeyboardInterrupt) else 1, - ) - ) - raise - - return typer.Typer.command(self, *args, **kwargs)(wrapped) - - return decorator diff --git a/openllm_next/clean.py b/openllm_next/clean.py deleted file mode 100644 index 58dcb705..00000000 --- a/openllm_next/clean.py +++ /dev/null @@ -1,75 +0,0 @@ -import pathlib -import shutil - -import questionary - -from openllm_next.analytic import OpenLLMTyper -from openllm_next.common import ( - CONFIG_FILE, - REPO_DIR, - VENV_DIR, - VERBOSE_LEVEL, - output, -) - -app = OpenLLMTyper(help="clean up and release disk space used by OpenLLM") - - -HUGGINGFACE_CACHE = pathlib.Path.home() / ".cache" / "huggingface" / "hub" - - -@app.command(help="Clean up all the cached models from huggingface") -def model_cache(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - used_space = sum(f.stat().st_size for f in HUGGINGFACE_CACHE.rglob("*")) - sure = questionary.confirm( - f"This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?" - ).ask() - if not sure: - return - shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True) - output("All models cached by Huggingface have been removed", style="green") - - -@app.command(help="Clean up all the virtual environments created by OpenLLM") -def venvs(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - used_space = sum(f.stat().st_size for f in VENV_DIR.rglob("*")) - sure = questionary.confirm( - f"This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?" - ).ask() - if not sure: - return - shutil.rmtree(VENV_DIR, ignore_errors=True) - output("All virtual environments have been removed", style="green") - - -@app.command(help="Clean up all the repositories cloned by OpenLLM") -def repos(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - shutil.rmtree(REPO_DIR, ignore_errors=True) - output("All repositories have been removed", style="green") - - -@app.command(help="Reset configurations to default") -def configs(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - shutil.rmtree(CONFIG_FILE, ignore_errors=True) - output("All configurations have been reset", style="green") - - -@app.command( - name="all", - help="Clean up all above and bring OpenLLM to a fresh start", -) -def all_cache(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - repos() - venvs() - model_cache() - configs() diff --git a/openllm_next/cloud.py b/openllm_next/cloud.py deleted file mode 100644 index 261ea3dd..00000000 --- a/openllm_next/cloud.py +++ /dev/null @@ -1,174 +0,0 @@ -import json -import os -import pathlib -import shutil -import subprocess -import typing - -import typer - -from openllm_next.accelerator_spec import ACCELERATOR_SPECS -from openllm_next.analytic import OpenLLMTyper -from openllm_next.common import ( - INTERACTIVE, - BentoInfo, - DeploymentTarget, - output, - run_command, -) - -app = OpenLLMTyper() - - -def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None): - cmd = ["bentoml", "deploy", bento.bentoml_tag] - env = { - "BENTOML_HOME": f"{bento.repo.path}/bentoml", - } - - required_envs = bento.bento_yaml.get("envs", []) - required_env_names = [env["name"] for env in required_envs if "name" in env] - if required_env_names: - output( - f"This model requires the following environment variables to run: {repr(required_env_names)}", - style="yellow", - ) - - for env_info in bento.bento_yaml.get("envs", []): - if "name" not in env_info: - continue - if os.environ.get(env_info["name"]): - default = os.environ[env_info["name"]] - elif "value" in env_info: - default = env_info["value"] - else: - default = "" - - if INTERACTIVE.get(): - import questionary - - value = questionary.text( - f"{env_info['name']}:", - default=default, - ).ask() - else: - if default == "": - output( - f"Environment variable {env_info['name']} is required but not provided", - style="red", - ) - raise typer.Exit(1) - else: - value = default - - if value is None: - raise typer.Exit(1) - cmd += ["--env", f"{env_info['name']}={value}"] - - if target: - cmd += ["--instance-type", target.name] - - assert (pathlib.Path.home() / "bentoml" / ".yatai.yaml").exists() - shutil.copy( - pathlib.Path.home() / "bentoml" / ".yatai.yaml", - bento.repo.path / "bentoml" / ".yatai.yaml", - ) - - return cmd, env, None - - -def ensure_cloud_context(): - import questionary - - cmd = ["bentoml", "cloud", "current-context"] - try: - result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) - context = json.loads(result) - output(f" bentoml already logged in: {context['endpoint']}", style="green") - except subprocess.CalledProcessError: - output(" bentoml not logged in", style="red") - if not INTERACTIVE.get(): - output( - "\n get bentoml logged in by:", - ) - output( - " $ bentoml cloud login", - style="orange", - ) - output("") - output( - """ * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""", - style="yellow", - ) - raise typer.Exit(1) - else: - action = questionary.select( - "Choose an action:", - choices=[ - "I have a BentoCloud account", - "get an account in two minutes", - ], - ).ask() - if action is None: - raise typer.Exit(1) - elif action == "get an account in two minutes": - output( - "Please visit https://cloud.bentoml.com to get your token", - style="yellow", - ) - endpoint = questionary.text( - "Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)" - ).ask() - if endpoint is None: - raise typer.Exit(1) - token = questionary.text( - "Enter your token: (similar to cniluaxxxxxxxx)" - ).ask() - if token is None: - raise typer.Exit(1) - cmd = [ - "bentoml", - "cloud", - "login", - "--api-token", - token, - "--endpoint", - endpoint, - ] - try: - result = subprocess.check_output(cmd) - output(" Logged in successfully", style="green") - except subprocess.CalledProcessError: - output(" Failed to login", style="red") - raise typer.Exit(1) - - -def get_cloud_machine_spec(): - ensure_cloud_context() - cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"] - try: - result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) - instance_types = json.loads(result) - return [ - DeploymentTarget( - source="cloud", - name=it["name"], - price=it["price"], - platform="linux", - accelerators=( - [ACCELERATOR_SPECS[it["gpu_type"]] for _ in range(int(it["gpu"]))] - if it.get("gpu") and it["gpu_type"] in ACCELERATOR_SPECS - else [] - ), - ) - for it in instance_types - ] - except (subprocess.CalledProcessError, json.JSONDecodeError): - output("Failed to get cloud instance types", style="red") - return [] - - -def deploy(bento: BentoInfo, target: DeploymentTarget): - ensure_cloud_context() - cmd, env, cwd = _get_deploy_cmd(bento, target) - run_command(cmd, env=env, cwd=cwd) diff --git a/openllm_next/common.py b/openllm_next/common.py deleted file mode 100644 index 5191310c..00000000 --- a/openllm_next/common.py +++ /dev/null @@ -1,422 +0,0 @@ -from __future__ import annotations - -import asyncio -import functools -import hashlib -import io -import json -import os -import pathlib -import signal -import subprocess -import sys -import sysconfig -import typing -from contextlib import asynccontextmanager, contextmanager -from types import SimpleNamespace - -import typer -import typer.core - -ERROR_STYLE = "red" -SUCCESS_STYLE = "green" - - -CLLAMA_HOME = pathlib.Path.home() / ".openllm_next" -REPO_DIR = CLLAMA_HOME / "repos" -TEMP_DIR = CLLAMA_HOME / "temp" -VENV_DIR = CLLAMA_HOME / "venv" - -REPO_DIR.mkdir(exist_ok=True, parents=True) -TEMP_DIR.mkdir(exist_ok=True, parents=True) -VENV_DIR.mkdir(exist_ok=True, parents=True) - -CONFIG_FILE = CLLAMA_HOME / "config.json" - -CHECKED = "☆" - -T = typing.TypeVar("T") - - -class ContextVar(typing.Generic[T]): - def __init__(self, default: T): - self._stack: list[T] = [] - self._default = default - - def get(self) -> T: - if self._stack: - return self._stack[-1] - return self._default - - def set(self, value): - self._stack.append(value) - - @contextmanager - def patch(self, value): - self._stack.append(value) - try: - yield - finally: - self._stack.pop() - - -VERBOSE_LEVEL = ContextVar(10) -INTERACTIVE = ContextVar(False) -FORCE = ContextVar(False) - - -def output(content, level=0, style=None, end=None): - import questionary - - if level > VERBOSE_LEVEL.get(): - return - - if not isinstance(content, str): - import pyaml - - out = io.StringIO() - pyaml.pprint( - content, - dst=out, - sort_dicts=False, - sort_keys=False, - ) - questionary.print(out.getvalue(), style=style, end="" if end is None else end) - out.close() - - if isinstance(content, str): - questionary.print(content, style=style, end="\n" if end is None else end) - - -class Config(SimpleNamespace): - repos: dict[str, str] = { - "default": "git+https://github.com/bentoml/openllm-models@main" - } - default_repo: str = "default" - - def tolist(self): - return dict( - repos=self.repos, - default_repo=self.default_repo, - ) - - -def load_config(): - if CONFIG_FILE.exists(): - try: - with open(CONFIG_FILE) as f: - return Config(**json.load(f)) - except json.JSONDecodeError: - return Config() - return Config() - - -def save_config(config): - with open(CONFIG_FILE, "w") as f: - json.dump(config.tolist(), f, indent=2) - - -class RepoInfo(SimpleNamespace): - name: str - path: pathlib.Path - url: str - server: str - owner: str - repo: str - branch: str - - def tolist(self): - if VERBOSE_LEVEL.get() <= 0: - return f"{self.name} ({self.url})" - if VERBOSE_LEVEL.get() <= 10: - return dict( - name=self.name, - url=self.url, - path=str(self.path), - ) - if VERBOSE_LEVEL.get() <= 20: - return dict( - name=self.name, - url=self.url, - path=str(self.path), - server=self.server, - owner=self.owner, - repo=self.repo, - branch=self.branch, - ) - - -class BentoInfo(SimpleNamespace): - repo: RepoInfo - path: pathlib.Path - alias: str = "" - - def __str__(self): - if self.repo.name == "default": - return f"{self.tag}" - else: - return f"{self.repo.name}/{self.tag}" - - def __hash__(self): - return md5(str(self.path)) - - @property - def tag(self) -> str: - if self.alias: - return f"{self.path.parent.name}:{self.alias}" - return f"{self.path.parent.name}:{self.path.name}" - - @property - def bentoml_tag(self) -> str: - return f"{self.path.parent.name}:{self.path.name}" - - @property - def name(self) -> str: - return self.path.parent.name - - @property - def version(self) -> str: - return self.path.name - - @property - def labels(self) -> dict[str, str]: - return self.bento_yaml["labels"] - - @functools.cached_property - def bento_yaml(self) -> dict: - import yaml - - bento_file = self.path / "bento.yaml" - return yaml.safe_load(bento_file.read_text()) - - @functools.cached_property - def platforms(self) -> list[str]: - return self.bento_yaml["labels"].get("platforms", "linux").split(",") - - @functools.cached_property - def pretty_yaml(self) -> dict: - def _pretty_routes(routes): - return { - route["route"]: { - "input": { - k: v["type"] for k, v in route["input"]["properties"].items() - }, - "output": route["output"]["type"], - } - for route in routes - } - - if len(self.bento_yaml["services"]) == 1: - pretty_yaml = { - "apis": _pretty_routes(self.bento_yaml["schema"]["routes"]), - "resources": self.bento_yaml["services"][0]["config"]["resources"], - "envs": self.bento_yaml["envs"], - "platforms": self.platforms, - } - return pretty_yaml - return self.bento_yaml - - @functools.cached_property - def pretty_gpu(self) -> str: - from openllm_next.accelerator_spec import ACCELERATOR_SPECS - - try: - resources = self.bento_yaml["services"][0]["config"]["resources"] - if resources["gpu"] > 1: - acc = ACCELERATOR_SPECS[resources["gpu_type"]] - return f"{acc.memory_size:.0f}Gx{resources['gpu']}" - elif resources["gpu"] > 0: - acc = ACCELERATOR_SPECS[resources["gpu_type"]] - return f"{acc.memory_size:.0f}G" - except KeyError: - pass - return "" - - def tolist(self): - verbose = VERBOSE_LEVEL.get() - if verbose <= 0: - return str(self) - if verbose <= 10: - return dict( - tag=self.tag, - repo=self.repo.tolist(), - path=str(self.path), - model_card=self.pretty_yaml, - ) - if verbose <= 20: - return dict( - tag=self.tag, - repo=self.repo.tolist(), - path=str(self.path), - bento_yaml=self.bento_yaml, - ) - - -class VenvSpec(SimpleNamespace): - python_version: str - python_packages: dict[str, str] - name_prefix = "" - - def __hash__(self): - return md5( - # self.python_version, - *sorted(self.python_packages), - ) - - -class Accelerator(SimpleNamespace): - model: str - memory_size: float - - def __gt__(self, other): - return self.memory_size > other.memory_size - - def __eq__(self, other): - return self.memory_size == other.memory_size - - -class DeploymentTarget(SimpleNamespace): - source: str = "local" - name: str = "local" - price: str = "" - platform = "linux" - accelerators: list[Accelerator] - - def __hash__(self): - return hash(self.source) - - @property - def accelerators_repr(self) -> str: - accs = {a.model for a in self.accelerators} - if len(accs) == 0: - return "null" - if len(accs) == 1: - a = self.accelerators[0] - return f"{a.model} x{len(self.accelerators)}" - return ", ".join((f"{a.model}" for a in self.accelerators)) - - -def run_command( - cmd, - cwd=None, - env=None, - copy_env=True, - venv=None, - silent=False, -) -> subprocess.CompletedProcess: - import shlex - - env = env or {} - cmd = [str(c) for c in cmd] - bin_dir = "Scripts" if os.name == "nt" else "bin" - if not silent: - output("\n") - if cwd: - output(f"$ cd {cwd}", style="orange") - if env: - for k, v in env.items(): - output(f"$ export {k}={shlex.quote(v)}", style="orange") - if venv: - output(f"$ source {venv / 'bin' / 'activate'}", style="orange") - output(f"$ {' '.join(cmd)}", style="orange") - - if venv: - py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}" - else: - py = sys.executable - - if copy_env: - env = {**os.environ, **env} - - if cmd and cmd[0] == "bentoml": - cmd = [py, "-m", "bentoml"] + cmd[1:] - if cmd and cmd[0] == "python": - cmd = [py] + cmd[1:] - - try: - if silent: - return subprocess.run( # type: ignore - cmd, - cwd=cwd, - env=env, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - else: - return subprocess.run( - cmd, - cwd=cwd, - env=env, - ) - except subprocess.CalledProcessError: - output("Command failed", style="red") - raise typer.Exit(1) - - -async def stream_command_output(stream, style="gray"): - async for line in stream: - output(line.decode(), style=style, end="") - - -@asynccontextmanager -async def async_run_command( - cmd, - cwd=None, - env=None, - copy_env=True, - venv=None, - silent=True, -): - import shlex - - env = env or {} - cmd = [str(c) for c in cmd] - - if not silent: - output("\n") - if cwd: - output(f"$ cd {cwd}", style="orange") - if env: - for k, v in env.items(): - output(f"$ export {k}={shlex.quote(v)}", style="orange") - if venv: - output(f"$ source {venv / 'bin' / 'activate'}", style="orange") - output(f"$ {' '.join(cmd)}", style="orange") - - if venv: - py = venv / "bin" / "python" - else: - py = sys.executable - - if copy_env: - env = {**os.environ, **env} - - if cmd and cmd[0] == "bentoml": - cmd = [py, "-m", "bentoml"] + cmd[1:] - if cmd and cmd[0] == "python": - cmd = [py] + cmd[1:] - - proc = None - try: - proc = await asyncio.create_subprocess_shell( - " ".join(map(str, cmd)), - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=cwd, - env=env, - ) - yield proc - except subprocess.CalledProcessError: - output("Command failed", style="red") - raise typer.Exit(1) - finally: - if proc: - proc.send_signal(signal.SIGINT) - await proc.wait() - - -def md5(*strings: str) -> int: - m = hashlib.md5() - for s in strings: - m.update(s.encode()) - return int(m.hexdigest(), 16) diff --git a/openllm_next/local.py b/openllm_next/local.py deleted file mode 100644 index 947192fb..00000000 --- a/openllm_next/local.py +++ /dev/null @@ -1,117 +0,0 @@ -import asyncio -import time - -import httpx - -from openllm_next.common import ( - BentoInfo, - async_run_command, - output, - run_command, - stream_command_output, -) -from openllm_next.venv import ensure_venv - - -def _get_serve_cmd(bento: BentoInfo, port: int = 3000): - cmd = ["bentoml", "serve", bento.bentoml_tag] - if port != 3000: - cmd += ["--port", str(port)] - env = { - "BENTOML_HOME": f"{bento.repo.path}/bentoml", - } - return cmd, env, None - - -def serve( - bento: BentoInfo, - port: int = 3000, -): - venv = ensure_venv(bento) - cmd, env, cwd = _get_serve_cmd(bento, port=port) - run_command(cmd, env=env, cwd=cwd, venv=venv) - - -async def _run_model( - bento: BentoInfo, - port: int = 3000, - timeout: int = 600, -): - venv = ensure_venv(bento) - cmd, env, cwd = _get_serve_cmd(bento, port) - async with async_run_command( - cmd, - env=env, - cwd=cwd, - venv=venv, - silent=False, - ) as server_proc: - - output(f"Model server started {server_proc.pid}") - - stdout_streamer = None - stderr_streamer = None - start_time = time.time() - - output("Model loading...", style="green") - for _ in range(timeout): - try: - resp = httpx.get(f"http://localhost:{port}/readyz", timeout=3) - if resp.status_code == 200: - break - except httpx.RequestError: - if time.time() - start_time > 30: - if not stdout_streamer: - stdout_streamer = asyncio.create_task( - stream_command_output(server_proc.stdout, style="gray") - ) - if not stderr_streamer: - stderr_streamer = asyncio.create_task( - stream_command_output(server_proc.stderr, style="#BD2D0F") - ) - await asyncio.sleep(1) - else: - output("Model failed to load", style="red") - server_proc.terminate() - return - - if stdout_streamer: - stdout_streamer.cancel() - if stderr_streamer: - stderr_streamer.cancel() - - output("Model is ready", style="green") - messages: list[dict[str, str]] = [] - - from openai import AsyncOpenAI - - client = AsyncOpenAI(base_url=f"http://localhost:{port}/v1", api_key="local") - model_id = (await client.models.list()).data[0].id - while True: - try: - message = input("user: ") - if message == "": - output("empty message, please enter something", style="yellow") - continue - messages.append(dict(role="user", content=message)) - output("assistant: ", end="", style="lightgreen") - assistant_message = "" - stream = await client.chat.completions.create( - model=model_id, - messages=messages, # type: ignore - stream=True, - ) - async for chunk in stream: - text = chunk.choices[0].delta.content or "" - assistant_message += text - output(text, end="", style="lightgreen") - messages.append(dict(role="assistant", content=assistant_message)) - output("") - except KeyboardInterrupt: - break - output("\nStopping model server...", style="green") - output("Stopped model server", style="green") - - -def run(bento: BentoInfo, port: int = 3000, timeout: int = 600): - asyncio.run(_run_model(bento, port=port, timeout=timeout)) diff --git a/openllm_next/model.py b/openllm_next/model.py deleted file mode 100644 index 1d3c38b7..00000000 --- a/openllm_next/model.py +++ /dev/null @@ -1,173 +0,0 @@ -import typing -from typing import Optional - -import tabulate -import typer - -from openllm_next.accelerator_spec import DeploymentTarget, can_run -from openllm_next.analytic import OpenLLMTyper -from openllm_next.common import ( - FORCE, - VERBOSE_LEVEL, - BentoInfo, - load_config, - output, -) -from openllm_next.repo import ensure_repo_updated, parse_repo_url - -app = OpenLLMTyper(help="manage models") - - -@app.command() -def get( - tag: str, - repo: Optional[str] = None, - verbose: bool = False, -): - if verbose: - VERBOSE_LEVEL.set(20) - bento_info = ensure_bento(tag, repo_name=repo) - if bento_info: - output(bento_info) - - -@app.command(name="list") -def list_( - tag: Optional[str] = None, - repo: Optional[str] = None, - verbose: bool = False, -): - if verbose: - VERBOSE_LEVEL.set(20) - - bentos = list_bento(tag=tag, repo_name=repo) - bentos.sort(key=lambda x: x.name) - - seen = set() - - def is_seen(value): - if value in seen: - return True - seen.add(value) - return False - - table = tabulate.tabulate( - [ - [ - "" if is_seen(bento.name) else bento.name, - bento.tag, - bento.repo.name, - bento.pretty_gpu, - ",".join(bento.platforms), - ] - for bento in bentos - ], - headers=["model", "version", "repo", "required VRAM", "platforms"], - ) - output(table) - - -def ensure_bento( - model: str, - target: Optional[DeploymentTarget] = None, - repo_name: Optional[str] = None, -) -> BentoInfo: - bentos = list_bento(model, repo_name=repo_name) - if len(bentos) == 0: - output(f"No model found for {model}", style="red") - raise typer.Exit(1) - - if len(bentos) == 1: - if FORCE.get(): - output(f"Found model {bentos[0]}", style="green") - return bentos[0] - if target is None: - return bentos[0] - if can_run(bentos[0], target) <= 0: - return bentos[0] - output(f"Found model {bentos[0]}", style="green") - return bentos[0] - - if target is None: - output( - f"Multiple models match {model}, did you mean one of these?", - style="red", - ) - for bento in bentos: - output(f" {bento}") - raise typer.Exit(1) - - filtered = [bento for bento in bentos if can_run(bento, target) > 0] - if len(filtered) == 0: - output(f"No deployment target found for {model}", style="red") - raise typer.Exit(1) - - if len(filtered) == 0: - output(f"No deployment target found for {model}", style="red") - raise typer.Exit(1) - - if len(bentos) > 1: - output( - f"Multiple models match {model}, did you mean one of these?", - style="red", - ) - for bento in bentos: - output(f" {bento}") - raise typer.Exit(1) - - return bentos[0] - - -def list_bento( - tag: typing.Optional[str] = None, - repo_name: typing.Optional[str] = None, - include_alias: bool = False, -) -> typing.List[BentoInfo]: - ensure_repo_updated() - - if repo_name is not None: - config = load_config() - if repo_name not in config.repos: - output(f"Repo `{repo_name}` not found, did you mean one of these?") - for repo_name in config.repos: - output(f" {repo_name}") - raise typer.Exit(1) - - if not tag: - glob_pattern = "bentoml/bentos/*/*" - elif ":" in tag: - bento_name, version = tag.split(":") - glob_pattern = f"bentoml/bentos/{bento_name}/{version}" - else: - glob_pattern = f"bentoml/bentos/{tag}/*" - - model_list = [] - config = load_config() - for _repo_name, repo_url in config.repos.items(): - if repo_name is not None and _repo_name != repo_name: - continue - repo = parse_repo_url(repo_url, _repo_name) - for path in repo.path.glob(glob_pattern): - if path.is_dir() and (path / "bento.yaml").exists(): - model = BentoInfo(repo=repo, path=path) - elif path.is_file(): - with open(path) as f: - origin_name = f.read().strip() - origin_path = path.parent / origin_name - model = BentoInfo(alias=path.name, repo=repo, path=origin_path) - else: - model = None - if model: - model_list.append(model) - model_list.sort(key=lambda x: x.tag) - if not include_alias: - seen = set() - model_list = [ - x - for x in model_list - if not ( - f"{x.bento_yaml['name']}:{x.bento_yaml['version']}" in seen - or seen.add(f"{x.bento_yaml['name']}:{x.bento_yaml['version']}") - ) - ] - return model_list diff --git a/openllm_next/repo.py b/openllm_next/repo.py deleted file mode 100644 index 7d335c50..00000000 --- a/openllm_next/repo.py +++ /dev/null @@ -1,203 +0,0 @@ -import datetime -import re -import shutil - -import pyaml -import questionary -import typer - -from openllm_next.analytic import OpenLLMTyper -from openllm_next.common import ( - INTERACTIVE, - REPO_DIR, - VERBOSE_LEVEL, - RepoInfo, - load_config, - output, - save_config, -) - -UPDATE_INTERVAL = datetime.timedelta(days=3) - -app = OpenLLMTyper(help="manage repos") - - -@app.command() -def list(verbose: bool = False): - if verbose: - VERBOSE_LEVEL.set(20) - config = load_config() - pyaml.pprint( - [parse_repo_url(repo, name) for name, repo in config.repos.items()], - sort_dicts=False, - sort_keys=False, - ) - - -@app.command() -def remove(name: str): - config = load_config() - if name not in config.repos: - output(f"Repo {name} does not exist", style="red") - return - - del config.repos[name] - save_config(config) - output(f"Repo {name} removed", style="green") - - -def _complete_alias(repo_name: str): - from openllm_next.model import list_bento - - for bento in list_bento(repo_name=repo_name): - alias = bento.labels.get("openllm_alias", "").strip() - if alias: - for a in alias.split(","): - with open(bento.path.parent / a, "w") as f: - f.write(bento.version) - - -@app.command() -def update(): - import dulwich - import dulwich.errors - import dulwich.porcelain - - config = load_config() - repos_in_use = set() - for repo_name, repo in config.repos.items(): - repo = parse_repo_url(repo, repo_name) - repos_in_use.add((repo.server, repo.owner, repo.repo)) - if repo.path.exists(): # TODO: use update instead of remove and clone - shutil.rmtree(repo.path, ignore_errors=True) - if not repo.path.exists(): - repo.path.parent.mkdir(parents=True, exist_ok=True) - try: - dulwich.porcelain.clone( - f"https://{repo.server}/{repo.owner}/{repo.repo}.git", - str(repo.path), - checkout=True, - depth=1, - branch=repo.branch, - ) - output("") - output(f"Repo `{repo.name}` updated", style="green") - except: - shutil.rmtree(repo.path, ignore_errors=True) - output(f"Failed to clone repo {repo.name}", style="red") - else: - try: - import dulwich.porcelain - - dulwich.porcelain.pull( - str(repo.path), - f"https://{repo.server}/{repo.owner}/{repo.repo}.git", - refspecs=repo.branch, - force=True, - ) - dulwich.porcelain.clean(str(repo.path), str(repo.path)) - output("") - output(f"Repo `{repo.name}` updated", style="green") - except: - shutil.rmtree(repo.path, ignore_errors=True) - output(f"Failed to update repo {repo.name}", style="red") - for c in REPO_DIR.glob("*/*/*"): - repo_spec = tuple(c.parts[-3:]) - if repo_spec not in repos_in_use: - shutil.rmtree(c, ignore_errors=True) - output(f"Removed unused repo cache {c}") - with open(REPO_DIR / "last_update", "w") as f: - f.write(datetime.datetime.now().isoformat()) - for repo_name in config.repos: - _complete_alias(repo_name) - - -def ensure_repo_updated(): - last_update_file = REPO_DIR / "last_update" - if not last_update_file.exists(): - if INTERACTIVE.get(): - choice = questionary.confirm( - "The repo cache is never updated, do you want to update it to fetch the latest model list?" - ).ask() - if choice: - update() - return - else: - output( - "The repo cache is never updated, please run `openllm repo update` to fetch the latest model list", - style="red", - ) - raise typer.Exit(1) - last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip()) - if datetime.datetime.now() - last_update > UPDATE_INTERVAL: - if INTERACTIVE.get(): - choice = questionary.confirm( - "The repo cache is outdated, do you want to update it to fetch the latest model list?" - ).ask() - if choice: - update() - else: - output( - "The repo cache is outdated, please run `openllm repo update` to fetch the latest model list", - style="yellow", - ) - - -GIT_REPO_RE = re.compile( - r"git\+https://(?P.+)/(?P.+)/(?P.+?)(@(?P.+))?$" -) - - -def parse_repo_url(repo_url, repo_name=None) -> RepoInfo: - """ - parse the git repo url to server, owner, repo name, branch - >>> parse_repo_url("git+https://github.com/bentoml/bentovllm@main") - ('github.com', 'bentoml', 'bentovllm', 'main') - - >>> parse_repo_url("git+https://github.com/bentoml/bentovllm") - ('github.com', 'bentoml', 'bentovllm', 'main') - """ - match = GIT_REPO_RE.match(repo_url) - if not match: - raise ValueError(f"Invalid git repo url: {repo_url}") - server = match.group("server") - owner = match.group("owner") - repo = match.group("repo") - branch = match.group("branch") or "main" - path = REPO_DIR / server / owner / repo - return RepoInfo( - name=repo if repo_name is None else repo_name, - url=repo_url, - server=server, - owner=owner, - repo=repo, - branch=branch, - path=path, - ) - - -@app.command() -def add(name: str, repo: str): - name = name.lower() - if not name.isidentifier(): - output( - f"Invalid repo name: {name}, should only contain letters, numbers and underscores", - style="red", - ) - return - - config = load_config() - if name in config.repos: - override = questionary.confirm( - f"Repo {name} already exists({config.repos[name]}), override?" - ).ask() - if not override: - return - - config.repos[name] = repo - save_config(config) - output(f"Repo {name} added", style="green") - - -if __name__ == "__main__": - app() diff --git a/openllm_next/venv.py b/openllm_next/venv.py deleted file mode 100644 index 0e7e5124..00000000 --- a/openllm_next/venv.py +++ /dev/null @@ -1,164 +0,0 @@ -import functools -import os -import pathlib -import shutil -import typing -from typing import Iterable - -import typer - -from openllm_next.common import ( - VENV_DIR, - VERBOSE_LEVEL, - BentoInfo, - VenvSpec, - output, - run_command, -) - - -@functools.lru_cache -def _resolve_packages(requirement: typing.Union[pathlib.Path, str]): - from pip_requirements_parser import RequirementsFile - - requirements_txt = RequirementsFile.from_file( - str(requirement), - include_nested=True, - ) - return requirements_txt.requirements - - -def _filter_preheat_packages(requirements: Iterable) -> list[str]: - PREHEAT_PIP_PACKAGES = ["torch", "vllm"] - - deps: list[str] = [] - for req in requirements: - if ( - req.is_editable - or req.is_local_path - or req.is_url - or req.is_wheel - or not req.name - or not req.specifier - ): - continue - for sp in req.specifier: - if sp.operator == "==" and req.name in PREHEAT_PIP_PACKAGES: - assert req.line is not None - deps.append(req.line) - break - return deps - - -@functools.lru_cache -def _resolve_bento_env_specs(bento: BentoInfo): - ver_file = bento.path / "env" / "python" / "version.txt" - assert ver_file.exists(), f"cannot find version file in {bento.path}" - - lock_file = bento.path / "env" / "python" / "requirements.lock.txt" - if not lock_file.exists(): - lock_file = bento.path / "env" / "python" / "requirements.txt" - - reqs = _resolve_packages(lock_file) - preheat_packages = _filter_preheat_packages(reqs) - ver = ver_file.read_text().strip() - return ( - VenvSpec( - python_version=ver, - python_packages=preheat_packages, - name_prefix=f"{bento.tag.replace(':', '_')}-1-", - ), - VenvSpec( - python_version=ver, - python_packages=[v.line for v in reqs], - name_prefix=f"{bento.tag.replace(':', '_')}-2-", - ), - ) - - -def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path: - if os.name == "nt": - return venv / "Lib/site-packages" - else: - return next(venv.glob("lib/python*")) / "site-packages" - - -def _ensure_venv( - env_spec: VenvSpec, - parrent_venv: typing.Optional[pathlib.Path] = None, -) -> pathlib.Path: - venv = VENV_DIR / str(hash(env_spec)) - if venv.exists() and not (venv / "DONE").exists(): - shutil.rmtree(venv, ignore_errors=True) - if not venv.exists(): - output(f"Installing model dependencies({venv})...", style="green") - - venv_py = ( - venv / "Scripts" / "python.exe" - if os.name == "nt" - else venv / "bin" / "python" - ) - try: - run_command( - ["python", "-m", "uv", "venv", venv], - silent=VERBOSE_LEVEL.get() < 10, - ) - lib_dir = _get_lib_dir(venv) - if parrent_venv is not None: - parent_lib_dir = _get_lib_dir(parrent_venv) - with open(lib_dir / f"{parrent_venv.name}.pth", "w+") as f: - f.write(str(parent_lib_dir)) - with open(venv / "requirements.txt", "w") as f: - f.write("\n".join(sorted(env_spec.python_packages))) - run_command( - [ - "python", - "-m", - "uv", - "pip", - "install", - "-p", - str(venv_py), - "-r", - venv / "requirements.txt", - ], - silent=VERBOSE_LEVEL.get() < 10, - ) - with open(venv / "DONE", "w") as f: - f.write("DONE") - except Exception: - shutil.rmtree(venv, ignore_errors=True) - output( - f"Failed to install dependencies to {venv}. Cleaned up.", - style="red", - ) - raise typer.Exit(1) - output(f"Successfully installed dependencies to {venv}.", style="green") - return venv - else: - return venv - - -def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path: - last_venv = None - for env_spec in env_spec_list: - last_venv = _ensure_venv(env_spec, last_venv) - assert last_venv is not None - return last_venv - - -def ensure_venv(bento: BentoInfo) -> pathlib.Path: - return _ensure_venvs(_resolve_bento_env_specs(bento)) - - -def _check_venv(env_spec: VenvSpec) -> bool: - venv = VENV_DIR / str(hash(env_spec)) - if not venv.exists(): - return False - if venv.exists() and not (venv / "DONE").exists(): - return False - return True - - -def check_venv(bento: BentoInfo) -> bool: - return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento)) diff --git a/package.json b/package.json deleted file mode 100644 index 7e76c6b5..00000000 --- a/package.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "name": "@bentoml/openllm-monorepo", - "version": "0.5.7", - "description": "OpenLLM: Operating LLMs in production", - "author": "Aaron Pham <29749331+aarnphm@users.noreply.github.com>", - "license": "Apache-2.0", - "repository": { - "type": "git", - "url": "git+https://github.com/bentoml/OpenLLM.git" - }, - "bugs": { - "url": "https://github.com/bentoml/OpenLLM/issues" - }, - "engines": { - "node": ">=18" - }, - "type": "module", - "private": true, - "scripts": { - "lint": "eslint . --cache", - "check": "npx prettier . --check", - "format": "npx prettier . --write" - }, - "packageManager": "pnpm@8.6.3", - "devDependencies": { - "@edge-runtime/vm": "3.1.6", - "@next/eslint-plugin-next": "13.5.5", - "@types/node": "20.8.7", - "@typescript-eslint/eslint-plugin": "6.10.0", - "@typescript-eslint/parser": "6.8.0", - "eslint": "^8.53.0", - "eslint-config-prettier": "9.0.0", - "eslint-plugin-import": "2.29.0", - "eslint-plugin-react": "7.33.2", - "eslint-plugin-react-hooks": "4.6.0", - "eslint-plugin-tailwindcss": "3.13.0", - "eslint-plugin-unicorn": "48.0.1", - "prettier": "^3.0.3", - "prettier-plugin-pkg": "0.18.0", - "prettier-plugin-tailwindcss": "0.5.6", - "turbo": "1.10.16", - "typescript": "5.2.2" - }, - "pnpm": { - "overrides": { - "vitest": "^0.27.1", - "react": "^18.2.0", - "react-dom": "^18.2.0", - "@mdx-js/react": "^2.3.0", - "@vercel/analytics": "1.0.2", - "next-themes": "^0.2.1", - "next": "^13.4.8", - "@types/react": "^18.0.15", - "@types/react-dom": "^18.0.6" - } - } -} diff --git a/pyproject.toml b/pyproject.toml index 95968631..9e59fcb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,34 +1,189 @@ -[build-system] -requires = ["setuptools>=42", "wheel"] -build-backend = "setuptools.build_meta" - [project] -name = "openllm-next" -version = "0.0.1" -description = "A description of your package." -authors = [{name = "oasiszero", email = "oasis0.com@gmail.com"}] -license = {file = "LICENSE"} -dependencies = [ - "bentoml", - "typer", - "questionary", - "pyaml", - "psutil", - "pathlib", - "pip_requirements_parser", - "nvidia-ml-py", - "dulwich", - "tabulate", - "uv", - "openai==1.35.9", +name = "openllm" +description = "OpenLLM: Self-hosting LLMs Made Easy." +readme = { file = "README.md", content-type = "text/markdown" } +authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] +dynamic = ["version"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU :: NVIDIA CUDA", + "Environment :: GPU :: NVIDIA CUDA :: 12", + "Environment :: GPU :: NVIDIA CUDA :: 11.8", + "Environment :: GPU :: NVIDIA CUDA :: 11.7", + "License :: OSI Approved :: Apache Software License", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Intended Audience :: System Administrators", + "Typing :: Typed", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", ] +dependencies = [ + "bentoml", + "typer", + "questionary", + "pyaml", + "psutil", + "pathlib", + "pip_requirements_parser", + "nvidia-ml-py", + "dulwich", + "tabulate", + "uv", + "openai==1.35.9", +] +keywords = [ + "MLOps", + "AI", + "BentoML", + "Model Serving", + "Model Deployment", + "LLMOps", + "Falcon", + "Vicuna", + "Llama 2", + "Fine tuning", + "Serverless", + "Large Language Model", + "Generative AI", + "StableLM", + "Alpaca", + "PyTorch", + "Mistral", + "vLLM", + "Transformers", +] +license = "Apache-2.0" +requires-python = ">=3.9" [project.scripts] -openllm = "openllm_next.__main__:main" +openllm = "openllm.__main__:app" + +[project.urls] +Blog = "https://modelserving.com" +Documentation = "https://github.com/bentoml/OpenLLM#readme" +GitHub = "https://github.com/bentoml/OpenLLM" +Homepage = "https://bentoml.com" +Tracker = "https://github.com/bentoml/OpenLLM/issues" +Twitter = "https://twitter.com/bentomlai" [tool.typer] -src-dir = "openllm_next" +src-dir = "src/openllm" -[tool.isort] -multi_line_output = 3 -include_trailing_comma = true +[build-system] +requires = [ + "hatchling==1.25.0", + "hatch-vcs==0.4.0", + "hatch-fancy-pypi-readme==24.1.0", +] +build-backend = 'hatchling.build' + +[tool.hatch.version] +source = "vcs" +fallback-version = "0.0.0" +[tool.hatch.build.hooks.vcs] +version-file = "src/openllm/_version.py" +[tool.hatch.version.raw-options] +git_describe_command = [ + "git", + "describe", + "--dirty", + "--tags", + "--long", + "--first-parent", +] +version_scheme = "post-release" +fallback_version = "0.0.0" +[tool.hatch.metadata] +allow-direct-references = true +[tool.hatch.build.targets.wheel] +only-include = ["src/openllm"] +sources = ["src"] +[tool.hatch.build.targets.sdist] +exclude = ["/.git_archival.txt", "/.python-version-default"] + +[tool.hatch.metadata.hooks.fancy-pypi-readme] +content-type = "text/markdown" +# PyPI doesn't support the tag. +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +text = """ +

+ + Banner for OpenLLM + +

+""" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +end-before = "\n" +path = "README.md" +start-after = "\n" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +text = """ +

+ Gif showing OpenLLM Intro +

+""" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +end-before = "\n" +path = "README.md" +start-after = "\n" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +text = """ +## Release Information +""" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +path = "CHANGELOG.md" +pattern = "\n(###.+?\n)## " +start-after = "" +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +text = """ +--- +[Click me for full changelog](https://github.com/bentoml/openllm/blob/main/CHANGELOG.md) +""" + +[tool.towncrier] +directory = "changelog.d" +filename = "CHANGELOG.md" +issue_format = "[#{issue}](https://github.com/bentoml/openllm/issues/{issue})" +name = "openllm" +start_string = "\n" +template = "changelog.d/template.md.jinja" +title_format = "" +underlines = ["", "", ""] +[[tool.towncrier.section]] +path = "" +[[tool.towncrier.type]] +directory = "breaking" +name = "Backwards-incompatible Changes" +showcontent = true +[[tool.towncrier.type]] +directory = "deprecation" +name = "Deprecations" +showcontent = true +[[tool.towncrier.type]] +directory = "change" +name = "Changes" +showcontent = true +[[tool.towncrier.type]] +directory = "refactor" +name = "Refactor" +showcontent = true +[[tool.towncrier.type]] +directory = "feature" +name = "Features" +showcontent = true +[[tool.towncrier.type]] +directory = "fix" +name = "Bug fix" +showcontent = true diff --git a/openllm_next/__init__.py b/src/openllm/__init__.py similarity index 100% rename from openllm_next/__init__.py rename to src/openllm/__init__.py diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py new file mode 100644 index 00000000..36e6455b --- /dev/null +++ b/src/openllm/__main__.py @@ -0,0 +1,274 @@ +import os +import random +import sys +import platform +import importlib.metadata +from collections import defaultdict +from typing import Annotated, Optional + +import questionary +import typer + +from openllm.accelerator_spec import DeploymentTarget, can_run, get_local_machine_spec +from openllm.analytic import DO_NOT_TRACK, OpenLLMTyper +from openllm.clean import app as clean_app +from openllm.cloud import deploy as cloud_deploy +from openllm.cloud import ensure_cloud_context, get_cloud_machine_spec +from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output +from openllm.local import run as local_run +from openllm.local import serve as local_serve +from openllm.model import app as model_app +from openllm.model import ensure_bento, list_bento +from openllm.repo import app as repo_app + +app = OpenLLMTyper( + help='`openllm hello` to get started. ' + 'OpenLLM is a CLI tool to manage and deploy open source LLMs and' + ' get an OpenAI API compatible chat server in seconds.' +) + +app.add_typer(repo_app, name='repo') +app.add_typer(model_app, name='model') +app.add_typer(clean_app, name='clean') + + +def _select_bento_name(models, target): + from tabulate import tabulate + + options = [] + model_infos = [[model.repo.name, model.name, can_run(model, target)] for model in models] + model_name_groups = defaultdict(lambda: 0) + for repo, name, score in model_infos: + model_name_groups[(repo, name)] += score + table_data = [[name, repo, CHECKED if score > 0 else ''] for (repo, name), score in model_name_groups.items()] + if not table_data: + output('No model found', style='red') + raise typer.Exit(1) + table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n') + headers = f'{table[0]}\n {table[1]}' + + options.append(questionary.Separator(headers)) + for table_data, table_line in zip(table_data, table[2:]): + options.append(questionary.Choice(table_line, value=table_data[:2])) + selected = questionary.select('Select a model', options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_bento_version(models, target, bento_name, repo): + from tabulate import tabulate + + model_infos = [ + [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo + ] + + table_data = [ + [model.tag, CHECKED if score > 0 else ''] + for model, score in model_infos + if model.name == bento_name and model.repo.name == repo + ] + if not table_data: + output(f'No model found for {bento_name} in {repo}', style='red') + raise typer.Exit(1) + table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n') + + options = [] + options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) + for table_data, table_line in zip(model_infos, table[2:]): + options.append(questionary.Choice(table_line, value=table_data)) + selected = questionary.select('Select a version', options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_target(bento, targets): + from tabulate import tabulate + + options = [] + targets.sort(key=lambda x: can_run(bento, x), reverse=True) + if not targets: + output('No available instance type, check your bentocloud account', style='red') + raise typer.Exit(1) + + table = tabulate( + [ + [ + target.name, + target.accelerators_repr, + f'${target.price}', + CHECKED if can_run(bento, target) else 'insufficient res.', + ] + for target in targets + ], + headers=['instance type', 'accelerator', 'price/hr', 'deployable'], + ).split('\n') + options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) + + for target, line in zip(targets, table[2:]): + options.append(questionary.Choice(f'{line}', value=target)) + selected = questionary.select('Select an instance type', options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_action(bento, score): + if score > 0: + options = [ + questionary.Separator('Available actions'), + questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), + questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), + questionary.Choice( + '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' + ), + questionary.Separator(f' $ openllm deploy {bento}'), + ] + else: + options = [ + questionary.Separator('Available actions'), + questionary.Choice('0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), + questionary.Choice( + '1. Serve the model locally and get a chat server', + value='serve', + disabled='insufficient res.', + shortcut_key='1', + ), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), + questionary.Choice( + '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' + ), + questionary.Separator(f' $ openllm deploy {bento}'), + ] + action = questionary.select('Select an action', options).ask() + if action is None: + raise typer.Exit(1) + if action == 'run': + try: + local_run(bento) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm run {bento}', style='orange') + elif action == 'serve': + try: + local_serve(bento) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm serve {bento}', style='orange') + elif action == 'deploy': + ensure_cloud_context() + targets = get_cloud_machine_spec() + target = _select_target(bento, targets) + try: + cloud_deploy(bento, target) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange') + + +@app.command(help='get started interactively') +def hello(): + INTERACTIVE.set(True) + VERBOSE_LEVEL.set(20) + + target = get_local_machine_spec() + output(f' Detected Platform: {target.platform}', style='green') + if target.accelerators: + output(' Detected Accelerators: ', style='green') + for a in target.accelerators: + output(f' - {a.model} {a.memory_size}GB', style='green') + else: + output(' Detected Accelerators: None', style='yellow') + + models = list_bento() + if not models: + output('No model found, you probably need to update the model repo:', style='red') + output(' $ openllm repo update', style='orange') + raise typer.Exit(1) + + bento_name, repo = _select_bento_name(models, target) + bento, score = _select_bento_version(models, target, bento_name, repo) + _select_action(bento, score) + + +@app.command(help='start an OpenAI API compatible chat server and chat in browser') +def serve( + model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False +): + if verbose: + VERBOSE_LEVEL.set(20) + target = get_local_machine_spec() + bento = ensure_bento(model, target=target, repo_name=repo) + local_serve(bento, port=port) + + +@app.command(help='run the model and chat in terminal') +def run( + model: Annotated[str, typer.Argument()] = '', + repo: Optional[str] = None, + port: Optional[int] = None, + timeout: int = 600, + verbose: bool = False, +): + if verbose: + VERBOSE_LEVEL.set(20) + target = get_local_machine_spec() + bento = ensure_bento(model, target=target, repo_name=repo) + if port is None: + port = random.randint(30000, 40000) + local_run(bento, port=port, timeout=timeout) + + +@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)') +def deploy( + model: Annotated[str, typer.Argument()] = '', + instance_type: Optional[str] = None, + repo: Optional[str] = None, + verbose: bool = False, +): + if verbose: + VERBOSE_LEVEL.set(20) + bento = ensure_bento(model, repo_name=repo) + if instance_type is not None: + cloud_deploy(bento, DeploymentTarget(name=instance_type)) + return + targets = get_cloud_machine_spec() + targets = filter(lambda x: can_run(bento, x) > 0, targets) + targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True) + if not targets: + output('No available instance type, check your bentocloud account', style='red') + raise typer.Exit(1) + target = targets[0] + output(f'Recommended instance type: {target.name}', style='green') + cloud_deploy(bento, target) + + +@app.callback(invoke_without_command=True) +def typer_callback( + verbose: int = 0, + do_not_track: bool = typer.Option( + False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK + ), + version: bool = typer.Option(False, '--version', '-v', help='Show version'), +): + if verbose: + VERBOSE_LEVEL.set(verbose) + if version: + output( + f"openllm, {importlib.metadata.version('openllm')}\nPython ({platform.python_implementation()}) {platform.python_version()}" + ) + sys.exit(0) + if do_not_track: + os.environ[DO_NOT_TRACK] = str(True) + + +if __name__ == '__main__': + app() diff --git a/src/openllm/accelerator_spec.py b/src/openllm/accelerator_spec.py new file mode 100644 index 00000000..4d111a4a --- /dev/null +++ b/src/openllm/accelerator_spec.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import functools +import math +import typing +from types import SimpleNamespace + +import psutil + +from openllm.common import BentoInfo, DeploymentTarget, output + + +class Accelerator(SimpleNamespace): + model: str + memory_size: float + + def __gt__(self, other): + return self.memory_size > other.memory_size + + def __eq__(self, other): + return self.memory_size == other.memory_size + + def __repr__(self): + return f'{self.model}({self.memory_size}GB)' + + +class Resource(SimpleNamespace): + cpu: int = 0 + memory: float + gpu: int = 0 + gpu_type: str = '' + + def __hash__(self): + return hash((self.cpu, self.memory, self.gpu, self.gpu_type)) + + def __bool__(self): + return any(value is not None for value in self.__dict__.values()) + + +ACCELERATOR_SPEC_DICT: dict[str, dict] = { + 'nvidia-gtx-1650': {'model': 'GTX 1650', 'memory_size': 4.0}, + 'nvidia-gtx-1060': {'model': 'GTX 1060', 'memory_size': 6.0}, + 'nvidia-gtx-1080-ti': {'model': 'GTX 1080 Ti', 'memory_size': 11.0}, + 'nvidia-rtx-3060': {'model': 'RTX 3060', 'memory_size': 12.0}, + 'nvidia-rtx-3060-ti': {'model': 'RTX 3060 Ti', 'memory_size': 8.0}, + 'nvidia-rtx-3070-ti': {'model': 'RTX 3070 Ti', 'memory_size': 8.0}, + 'nvidia-rtx-3080': {'model': 'RTX 3080', 'memory_size': 10.0}, + 'nvidia-rtx-3080-ti': {'model': 'RTX 3080 Ti', 'memory_size': 12.0}, + 'nvidia-rtx-3090': {'model': 'RTX 3090', 'memory_size': 24.0}, + 'nvidia-rtx-4070-ti': {'model': 'RTX 4070 Ti', 'memory_size': 12.0}, + 'nvidia-tesla-p4': {'model': 'P4', 'memory_size': 8.0}, + 'nvidia-tesla-p100': {'model': 'P100', 'memory_size': 16.0}, + 'nvidia-tesla-k80': {'model': 'K80', 'memory_size': 12.0}, + 'nvidia-tesla-t4': {'model': 'T4', 'memory_size': 16.0}, + 'nvidia-tesla-v100': {'model': 'V100', 'memory_size': 16.0}, + 'nvidia-l4': {'model': 'L4', 'memory_size': 24.0}, + 'nvidia-tesla-l4': {'model': 'L4', 'memory_size': 24.0}, + 'nvidia-tesla-a10g': {'model': 'A10G', 'memory_size': 24.0}, + 'nvidia-a100-80g': {'model': 'A100', 'memory_size': 80.0}, + 'nvidia-a100-80gb': {'model': 'A100', 'memory_size': 80.0}, + 'nvidia-tesla-a100': {'model': 'A100', 'memory_size': 40.0}, +} + + +ACCELERATOR_SPECS: dict[str, Accelerator] = {key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()} + + +@functools.lru_cache +def get_local_machine_spec(): + if psutil.MACOS: + return DeploymentTarget(accelerators=[], source='local', platform='macos') + + if psutil.WINDOWS: + platform = 'windows' + elif psutil.LINUX: + platform = 'linux' + else: + raise NotImplementedError('Unsupported platform') + + from pynvml import ( + nvmlDeviceGetCount, + nvmlDeviceGetCudaComputeCapability, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlDeviceGetName, + nvmlInit, + nvmlShutdown, + ) + + try: + nvmlInit() + device_count = nvmlDeviceGetCount() + accelerators: list[Accelerator] = [] + for i in range(device_count): + handle = nvmlDeviceGetHandleByIndex(i) + name = nvmlDeviceGetName(handle) + memory_info = nvmlDeviceGetMemoryInfo(handle) + accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3))) + compute_capability = nvmlDeviceGetCudaComputeCapability(handle) + if compute_capability < (7, 5): + output( + f'GPU {name} with compute capability {compute_capability} ' + 'may not be supported, 7.5 or higher is recommended. check ' + 'https://developer.nvidia.com/cuda-gpus for more information', + style='yellow', + ) + nvmlShutdown() + return DeploymentTarget(accelerators=accelerators, source='local', platform=platform) + except Exception as e: + output( + 'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment', style='yellow' + ) + output(f'Error: {e}', style='red', level=20) + return DeploymentTarget(accelerators=[], source='local', platform=platform) + + +@functools.lru_cache() +def can_run(bento: typing.Union[Resource, BentoInfo], target: typing.Optional[DeploymentTarget] = None) -> float: + """ + Calculate if the bento can be deployed on the target. + """ + if target is None: + target = get_local_machine_spec() + + resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {}))) + labels = bento.bento_yaml.get('labels', {}) + platforms = labels.get('platforms', 'linux').split(',') + + if target.platform not in platforms: + return 0.0 + + # return 1.0 if no resource is specified + if not resource_spec: + return 0.5 + + if resource_spec.gpu > 0: + required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type] + filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size] + if resource_spec.gpu > len(filtered_accelerators): + return 0.0 + return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators) + if target.accelerators: + return 0.01 / sum(ac.memory_size for ac in target.accelerators) + return 1.0 diff --git a/src/openllm/analytic.py b/src/openllm/analytic.py new file mode 100644 index 00000000..f786ca6c --- /dev/null +++ b/src/openllm/analytic.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import functools +import os +import re +import time +import typing +from abc import ABC + +import attr +import click +import typer +import typer.core + +DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK' + + +class EventMeta(ABC): + @property + def event_name(self): + # camel case to snake case + event_name = re.sub(r'(? typing.Iterable[str]: + return list(self.commands) + + +class OpenLLMTyper(typer.Typer): + def __init__(self, *args: typing.Any, **kwargs: typing.Any): + no_args_is_help = kwargs.pop('no_args_is_help', True) + context_settings = kwargs.pop('context_settings', {}) + if 'help_option_names' not in context_settings: + context_settings['help_option_names'] = ('-h', '--help') + if 'max_content_width' not in context_settings: + context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120))) + klass = kwargs.pop('cls', OrderedCommands) + + super().__init__(*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs) + + def command(self, *args: typing.Any, **kwargs: typing.Any): + def decorator(f): + @functools.wraps(f) + @click.pass_context + def wrapped(ctx: click.Context, *args, **kwargs): + from bentoml._internal.utils.analytics import track + + do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true' + + # so we know that the root program is openllm + command_name = ctx.info_name + if ctx.parent.parent is not None: + # openllm model list + command_group = ctx.parent.info_name + elif ctx.parent.info_name == ctx.find_root().info_name: + # openllm run + command_group = 'openllm' + + if do_not_track: + return f(*args, **kwargs) + start_time = time.time_ns() + try: + return_value = f(*args, **kwargs) + duration_in_ns = time.time_ns() - start_time + track(OpenllmCliEvent(cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6)) + return return_value + except BaseException as e: + duration_in_ns = time.time_ns() - start_time + track( + OpenllmCliEvent( + cmd_group=command_group, + cmd_name=command_name, + duration_in_ms=duration_in_ns / 1e6, + error_type=type(e).__name__, + return_code=2 if isinstance(e, KeyboardInterrupt) else 1, + ) + ) + raise + + return typer.Typer.command(self, *args, **kwargs)(wrapped) + + return decorator diff --git a/src/openllm/clean.py b/src/openllm/clean.py new file mode 100644 index 00000000..a5e9dabd --- /dev/null +++ b/src/openllm/clean.py @@ -0,0 +1,66 @@ +import pathlib +import shutil + +import questionary + +from openllm.analytic import OpenLLMTyper +from openllm.common import CONFIG_FILE, REPO_DIR, VENV_DIR, VERBOSE_LEVEL, output + +app = OpenLLMTyper(help='clean up and release disk space used by OpenLLM') + + +HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub' + + +@app.command(help='Clean up all the cached models from huggingface') +def model_cache(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + used_space = sum(f.stat().st_size for f in HUGGINGFACE_CACHE.rglob('*')) + sure = questionary.confirm( + f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?' + ).ask() + if not sure: + return + shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True) + output('All models cached by Huggingface have been removed', style='green') + + +@app.command(help='Clean up all the virtual environments created by OpenLLM') +def venvs(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + used_space = sum(f.stat().st_size for f in VENV_DIR.rglob('*')) + sure = questionary.confirm( + f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?' + ).ask() + if not sure: + return + shutil.rmtree(VENV_DIR, ignore_errors=True) + output('All virtual environments have been removed', style='green') + + +@app.command(help='Clean up all the repositories cloned by OpenLLM') +def repos(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + shutil.rmtree(REPO_DIR, ignore_errors=True) + output('All repositories have been removed', style='green') + + +@app.command(help='Reset configurations to default') +def configs(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + shutil.rmtree(CONFIG_FILE, ignore_errors=True) + output('All configurations have been reset', style='green') + + +@app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start') +def all_cache(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + repos() + venvs() + model_cache() + configs() diff --git a/src/openllm/cloud.py b/src/openllm/cloud.py new file mode 100644 index 00000000..d2e9a4eb --- /dev/null +++ b/src/openllm/cloud.py @@ -0,0 +1,130 @@ +import json +import os +import pathlib +import shutil +import subprocess +import typing + +import typer + +from openllm.accelerator_spec import ACCELERATOR_SPECS +from openllm.analytic import OpenLLMTyper +from openllm.common import INTERACTIVE, BentoInfo, DeploymentTarget, output, run_command + +app = OpenLLMTyper() + + +def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None): + cmd = ['bentoml', 'deploy', bento.bentoml_tag] + env = {'BENTOML_HOME': f'{bento.repo.path}/bentoml'} + + required_envs = bento.bento_yaml.get('envs', []) + required_env_names = [env['name'] for env in required_envs if 'name' in env] + if required_env_names: + output(f'This model requires the following environment variables to run: {required_env_names!r}', style='yellow') + + for env_info in bento.bento_yaml.get('envs', []): + if 'name' not in env_info: + continue + if os.environ.get(env_info['name']): + default = os.environ[env_info['name']] + elif 'value' in env_info: + default = env_info['value'] + else: + default = '' + + if INTERACTIVE.get(): + import questionary + + value = questionary.text(f"{env_info['name']}:", default=default).ask() + else: + if default == '': + output(f"Environment variable {env_info['name']} is required but not provided", style='red') + raise typer.Exit(1) + else: + value = default + + if value is None: + raise typer.Exit(1) + cmd += ['--env', f"{env_info['name']}={value}"] + + if target: + cmd += ['--instance-type', target.name] + + assert (pathlib.Path.home() / 'bentoml' / '.yatai.yaml').exists() + shutil.copy(pathlib.Path.home() / 'bentoml' / '.yatai.yaml', bento.repo.path / 'bentoml' / '.yatai.yaml') + + return cmd, env, None + + +def ensure_cloud_context(): + import questionary + + cmd = ['bentoml', 'cloud', 'current-context'] + try: + result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + context = json.loads(result) + output(f" bentoml already logged in: {context['endpoint']}", style='green') + except subprocess.CalledProcessError: + output(' bentoml not logged in', style='red') + if not INTERACTIVE.get(): + output('\n get bentoml logged in by:') + output(' $ bentoml cloud login', style='orange') + output('') + output( + """ * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""", + style='yellow', + ) + raise typer.Exit(1) + else: + action = questionary.select( + 'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes'] + ).ask() + if action is None: + raise typer.Exit(1) + elif action == 'get an account in two minutes': + output('Please visit https://cloud.bentoml.com to get your token', style='yellow') + endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask() + if endpoint is None: + raise typer.Exit(1) + token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask() + if token is None: + raise typer.Exit(1) + cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint] + try: + result = subprocess.check_output(cmd) + output(' Logged in successfully', style='green') + except subprocess.CalledProcessError: + output(' Failed to login', style='red') + raise typer.Exit(1) + + +def get_cloud_machine_spec(): + ensure_cloud_context() + cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json'] + try: + result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + instance_types = json.loads(result) + return [ + DeploymentTarget( + source='cloud', + name=it['name'], + price=it['price'], + platform='linux', + accelerators=( + [ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))] + if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS + else [] + ), + ) + for it in instance_types + ] + except (subprocess.CalledProcessError, json.JSONDecodeError): + output('Failed to get cloud instance types', style='red') + return [] + + +def deploy(bento: BentoInfo, target: DeploymentTarget): + ensure_cloud_context() + cmd, env, cwd = _get_deploy_cmd(bento, target) + run_command(cmd, env=env, cwd=cwd) diff --git a/src/openllm/common.py b/src/openllm/common.py new file mode 100644 index 00000000..0a8b8bde --- /dev/null +++ b/src/openllm/common.py @@ -0,0 +1,370 @@ +from __future__ import annotations + +import asyncio +import functools +import hashlib +import io +import json +import os +import pathlib +import signal +import subprocess +import sys +import sysconfig +import typing +from contextlib import asynccontextmanager, contextmanager +from types import SimpleNamespace + +import typer +import typer.core + +ERROR_STYLE = 'red' +SUCCESS_STYLE = 'green' + + +CLLAMA_HOME = pathlib.Path.home() / '.openllm' +REPO_DIR = CLLAMA_HOME / 'repos' +TEMP_DIR = CLLAMA_HOME / 'temp' +VENV_DIR = CLLAMA_HOME / 'venv' + +REPO_DIR.mkdir(exist_ok=True, parents=True) +TEMP_DIR.mkdir(exist_ok=True, parents=True) +VENV_DIR.mkdir(exist_ok=True, parents=True) + +CONFIG_FILE = CLLAMA_HOME / 'config.json' + +CHECKED = '☆' + +T = typing.TypeVar('T') + + +class ContextVar(typing.Generic[T]): + def __init__(self, default: T): + self._stack: list[T] = [] + self._default = default + + def get(self) -> T: + if self._stack: + return self._stack[-1] + return self._default + + def set(self, value): + self._stack.append(value) + + @contextmanager + def patch(self, value): + self._stack.append(value) + try: + yield + finally: + self._stack.pop() + + +VERBOSE_LEVEL = ContextVar(10) +INTERACTIVE = ContextVar(False) +FORCE = ContextVar(False) + + +def output(content, level=0, style=None, end=None): + import questionary + + if level > VERBOSE_LEVEL.get(): + return + + if not isinstance(content, str): + import pyaml + + out = io.StringIO() + pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False) + questionary.print(out.getvalue(), style=style, end='' if end is None else end) + out.close() + + if isinstance(content, str): + questionary.print(content, style=style, end='\n' if end is None else end) + + +class Config(SimpleNamespace): + repos: dict[str, str] = {'default': 'git+https://github.com/bentoml/openllm-models@main'} + default_repo: str = 'default' + + def tolist(self): + return dict(repos=self.repos, default_repo=self.default_repo) + + +def load_config(): + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE) as f: + return Config(**json.load(f)) + except json.JSONDecodeError: + return Config() + return Config() + + +def save_config(config): + with open(CONFIG_FILE, 'w') as f: + json.dump(config.tolist(), f, indent=2) + + +class RepoInfo(SimpleNamespace): + name: str + path: pathlib.Path + url: str + server: str + owner: str + repo: str + branch: str + + def tolist(self): + if VERBOSE_LEVEL.get() <= 0: + return f'{self.name} ({self.url})' + if VERBOSE_LEVEL.get() <= 10: + return dict(name=self.name, url=self.url, path=str(self.path)) + if VERBOSE_LEVEL.get() <= 20: + return dict( + name=self.name, + url=self.url, + path=str(self.path), + server=self.server, + owner=self.owner, + repo=self.repo, + branch=self.branch, + ) + + +class BentoInfo(SimpleNamespace): + repo: RepoInfo + path: pathlib.Path + alias: str = '' + + def __str__(self): + if self.repo.name == 'default': + return f'{self.tag}' + else: + return f'{self.repo.name}/{self.tag}' + + def __hash__(self): + return md5(str(self.path)) + + @property + def tag(self) -> str: + if self.alias: + return f'{self.path.parent.name}:{self.alias}' + return f'{self.path.parent.name}:{self.path.name}' + + @property + def bentoml_tag(self) -> str: + return f'{self.path.parent.name}:{self.path.name}' + + @property + def name(self) -> str: + return self.path.parent.name + + @property + def version(self) -> str: + return self.path.name + + @property + def labels(self) -> dict[str, str]: + return self.bento_yaml['labels'] + + @functools.cached_property + def bento_yaml(self) -> dict: + import yaml + + bento_file = self.path / 'bento.yaml' + return yaml.safe_load(bento_file.read_text()) + + @functools.cached_property + def platforms(self) -> list[str]: + return self.bento_yaml['labels'].get('platforms', 'linux').split(',') + + @functools.cached_property + def pretty_yaml(self) -> dict: + def _pretty_routes(routes): + return { + route['route']: { + 'input': {k: v['type'] for k, v in route['input']['properties'].items()}, + 'output': route['output']['type'], + } + for route in routes + } + + if len(self.bento_yaml['services']) == 1: + pretty_yaml = { + 'apis': _pretty_routes(self.bento_yaml['schema']['routes']), + 'resources': self.bento_yaml['services'][0]['config']['resources'], + 'envs': self.bento_yaml['envs'], + 'platforms': self.platforms, + } + return pretty_yaml + return self.bento_yaml + + @functools.cached_property + def pretty_gpu(self) -> str: + from openllm.accelerator_spec import ACCELERATOR_SPECS + + try: + resources = self.bento_yaml['services'][0]['config']['resources'] + if resources['gpu'] > 1: + acc = ACCELERATOR_SPECS[resources['gpu_type']] + return f"{acc.memory_size:.0f}Gx{resources['gpu']}" + elif resources['gpu'] > 0: + acc = ACCELERATOR_SPECS[resources['gpu_type']] + return f'{acc.memory_size:.0f}G' + except KeyError: + pass + return '' + + def tolist(self): + verbose = VERBOSE_LEVEL.get() + if verbose <= 0: + return str(self) + if verbose <= 10: + return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml) + if verbose <= 20: + return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml) + + +class VenvSpec(SimpleNamespace): + python_version: str + python_packages: dict[str, str] + name_prefix = '' + + def __hash__(self): + return md5( + # self.python_version, + *sorted(self.python_packages) + ) + + +class Accelerator(SimpleNamespace): + model: str + memory_size: float + + def __gt__(self, other): + return self.memory_size > other.memory_size + + def __eq__(self, other): + return self.memory_size == other.memory_size + + +class DeploymentTarget(SimpleNamespace): + source: str = 'local' + name: str = 'local' + price: str = '' + platform = 'linux' + accelerators: list[Accelerator] + + def __hash__(self): + return hash(self.source) + + @property + def accelerators_repr(self) -> str: + accs = {a.model for a in self.accelerators} + if len(accs) == 0: + return 'null' + if len(accs) == 1: + a = self.accelerators[0] + return f'{a.model} x{len(self.accelerators)}' + return ', '.join((f'{a.model}' for a in self.accelerators)) + + +def run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=False) -> subprocess.CompletedProcess: + import shlex + + env = env or {} + cmd = [str(c) for c in cmd] + bin_dir = 'Scripts' if os.name == 'nt' else 'bin' + if not silent: + output('\n') + if cwd: + output(f'$ cd {cwd}', style='orange') + if env: + for k, v in env.items(): + output(f'$ export {k}={shlex.quote(v)}', style='orange') + if venv: + output(f"$ source {venv / 'bin' / 'activate'}", style='orange') + output(f"$ {' '.join(cmd)}", style='orange') + + if venv: + py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}" + else: + py = sys.executable + + if copy_env: + env = {**os.environ, **env} + + if cmd and cmd[0] == 'bentoml': + cmd = [py, '-m', 'bentoml'] + cmd[1:] + if cmd and cmd[0] == 'python': + cmd = [py] + cmd[1:] + + try: + if silent: + return subprocess.run( # type: ignore + cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + else: + return subprocess.run(cmd, cwd=cwd, env=env) + except subprocess.CalledProcessError: + output('Command failed', style='red') + raise typer.Exit(1) + + +async def stream_command_output(stream, style='gray'): + async for line in stream: + output(line.decode(), style=style, end='') + + +@asynccontextmanager +async def async_run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=True): + import shlex + + env = env or {} + cmd = [str(c) for c in cmd] + + if not silent: + output('\n') + if cwd: + output(f'$ cd {cwd}', style='orange') + if env: + for k, v in env.items(): + output(f'$ export {k}={shlex.quote(v)}', style='orange') + if venv: + output(f"$ source {venv / 'bin' / 'activate'}", style='orange') + output(f"$ {' '.join(cmd)}", style='orange') + + if venv: + py = venv / 'bin' / 'python' + else: + py = sys.executable + + if copy_env: + env = {**os.environ, **env} + + if cmd and cmd[0] == 'bentoml': + cmd = [py, '-m', 'bentoml'] + cmd[1:] + if cmd and cmd[0] == 'python': + cmd = [py] + cmd[1:] + + proc = None + try: + proc = await asyncio.create_subprocess_shell( + ' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env + ) + yield proc + except subprocess.CalledProcessError: + output('Command failed', style='red') + raise typer.Exit(1) + finally: + if proc: + proc.send_signal(signal.SIGINT) + await proc.wait() + + +def md5(*strings: str) -> int: + m = hashlib.md5() + for s in strings: + m.update(s.encode()) + return int(m.hexdigest(), 16) diff --git a/src/openllm/local.py b/src/openllm/local.py new file mode 100644 index 00000000..5eb4049f --- /dev/null +++ b/src/openllm/local.py @@ -0,0 +1,91 @@ +import asyncio +import time + +import httpx + +from openllm.common import BentoInfo, async_run_command, output, run_command, stream_command_output +from openllm.venv import ensure_venv + + +def _get_serve_cmd(bento: BentoInfo, port: int = 3000): + cmd = ['bentoml', 'serve', bento.bentoml_tag] + if port != 3000: + cmd += ['--port', str(port)] + env = {'BENTOML_HOME': f'{bento.repo.path}/bentoml'} + return cmd, env, None + + +def serve(bento: BentoInfo, port: int = 3000): + venv = ensure_venv(bento) + cmd, env, cwd = _get_serve_cmd(bento, port=port) + run_command(cmd, env=env, cwd=cwd, venv=venv) + + +async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600): + venv = ensure_venv(bento) + cmd, env, cwd = _get_serve_cmd(bento, port) + async with async_run_command(cmd, env=env, cwd=cwd, venv=venv, silent=False) as server_proc: + output(f'Model server started {server_proc.pid}') + + stdout_streamer = None + stderr_streamer = None + start_time = time.time() + + output('Model loading...', style='green') + for _ in range(timeout): + try: + resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3) + if resp.status_code == 200: + break + except httpx.RequestError: + if time.time() - start_time > 30: + if not stdout_streamer: + stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray')) + if not stderr_streamer: + stderr_streamer = asyncio.create_task(stream_command_output(server_proc.stderr, style='#BD2D0F')) + await asyncio.sleep(1) + else: + output('Model failed to load', style='red') + server_proc.terminate() + return + + if stdout_streamer: + stdout_streamer.cancel() + if stderr_streamer: + stderr_streamer.cancel() + + output('Model is ready', style='green') + messages: list[dict[str, str]] = [] + + from openai import AsyncOpenAI + + client = AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local') + model_id = (await client.models.list()).data[0].id + while True: + try: + message = input('user: ') + if message == '': + output('empty message, please enter something', style='yellow') + continue + messages.append(dict(role='user', content=message)) + output('assistant: ', end='', style='lightgreen') + assistant_message = '' + stream = await client.chat.completions.create( + model=model_id, + messages=messages, # type: ignore + stream=True, + ) + async for chunk in stream: + text = chunk.choices[0].delta.content or '' + assistant_message += text + output(text, end='', style='lightgreen') + messages.append(dict(role='assistant', content=assistant_message)) + output('') + except KeyboardInterrupt: + break + output('\nStopping model server...', style='green') + output('Stopped model server', style='green') + + +def run(bento: BentoInfo, port: int = 3000, timeout: int = 600): + asyncio.run(_run_model(bento, port=port, timeout=timeout)) diff --git a/src/openllm/model.py b/src/openllm/model.py new file mode 100644 index 00000000..d271b1b3 --- /dev/null +++ b/src/openllm/model.py @@ -0,0 +1,147 @@ +import typing +from typing import Optional + +import tabulate +import typer + +from openllm.accelerator_spec import DeploymentTarget, can_run +from openllm.analytic import OpenLLMTyper +from openllm.common import FORCE, VERBOSE_LEVEL, BentoInfo, load_config, output +from openllm.repo import ensure_repo_updated, parse_repo_url + +app = OpenLLMTyper(help='manage models') + + +@app.command() +def get(tag: str, repo: Optional[str] = None, verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + bento_info = ensure_bento(tag, repo_name=repo) + if bento_info: + output(bento_info) + + +@app.command(name='list') +def list_(tag: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + + bentos = list_bento(tag=tag, repo_name=repo) + bentos.sort(key=lambda x: x.name) + + seen = set() + + def is_seen(value): + if value in seen: + return True + seen.add(value) + return False + + table = tabulate.tabulate( + [ + [ + '' if is_seen(bento.name) else bento.name, + bento.tag, + bento.repo.name, + bento.pretty_gpu, + ','.join(bento.platforms), + ] + for bento in bentos + ], + headers=['model', 'version', 'repo', 'required VRAM', 'platforms'], + ) + output(table) + + +def ensure_bento(model: str, target: Optional[DeploymentTarget] = None, repo_name: Optional[str] = None) -> BentoInfo: + bentos = list_bento(model, repo_name=repo_name) + if len(bentos) == 0: + output(f'No model found for {model}', style='red') + raise typer.Exit(1) + + if len(bentos) == 1: + if FORCE.get(): + output(f'Found model {bentos[0]}', style='green') + return bentos[0] + if target is None: + return bentos[0] + if can_run(bentos[0], target) <= 0: + return bentos[0] + output(f'Found model {bentos[0]}', style='green') + return bentos[0] + + if target is None: + output(f'Multiple models match {model}, did you mean one of these?', style='red') + for bento in bentos: + output(f' {bento}') + raise typer.Exit(1) + + filtered = [bento for bento in bentos if can_run(bento, target) > 0] + if len(filtered) == 0: + output(f'No deployment target found for {model}', style='red') + raise typer.Exit(1) + + if len(filtered) == 0: + output(f'No deployment target found for {model}', style='red') + raise typer.Exit(1) + + if len(bentos) > 1: + output(f'Multiple models match {model}, did you mean one of these?', style='red') + for bento in bentos: + output(f' {bento}') + raise typer.Exit(1) + + return bentos[0] + + +def list_bento( + tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False +) -> typing.List[BentoInfo]: + ensure_repo_updated() + + if repo_name is not None: + config = load_config() + if repo_name not in config.repos: + output(f'Repo `{repo_name}` not found, did you mean one of these?') + for repo_name in config.repos: + output(f' {repo_name}') + raise typer.Exit(1) + + if not tag: + glob_pattern = 'bentoml/bentos/*/*' + elif ':' in tag: + bento_name, version = tag.split(':') + glob_pattern = f'bentoml/bentos/{bento_name}/{version}' + else: + glob_pattern = f'bentoml/bentos/{tag}/*' + + model_list = [] + config = load_config() + for _repo_name, repo_url in config.repos.items(): + if repo_name is not None and _repo_name != repo_name: + continue + repo = parse_repo_url(repo_url, _repo_name) + for path in repo.path.glob(glob_pattern): + if path.is_dir() and (path / 'bento.yaml').exists(): + model = BentoInfo(repo=repo, path=path) + elif path.is_file(): + with open(path) as f: + origin_name = f.read().strip() + origin_path = path.parent / origin_name + model = BentoInfo(alias=path.name, repo=repo, path=origin_path) + else: + model = None + if model: + model_list.append(model) + model_list.sort(key=lambda x: x.tag) + if not include_alias: + seen = set() + model_list = [ + x + for x in model_list + if not ( + f"{x.bento_yaml['name']}:{x.bento_yaml['version']}" in seen + or seen.add(f"{x.bento_yaml['name']}:{x.bento_yaml['version']}") + ) + ] + return model_list diff --git a/src/openllm/repo.py b/src/openllm/repo.py new file mode 100644 index 00000000..95d58426 --- /dev/null +++ b/src/openllm/repo.py @@ -0,0 +1,179 @@ +import datetime +import re +import shutil + +import pyaml +import questionary +import typer + +from openllm.analytic import OpenLLMTyper +from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config + +UPDATE_INTERVAL = datetime.timedelta(days=3) + +app = OpenLLMTyper(help='manage repos') + + +@app.command() +def list(verbose: bool = False): + if verbose: + VERBOSE_LEVEL.set(20) + config = load_config() + pyaml.pprint([parse_repo_url(repo, name) for name, repo in config.repos.items()], sort_dicts=False, sort_keys=False) + + +@app.command() +def remove(name: str): + config = load_config() + if name not in config.repos: + output(f'Repo {name} does not exist', style='red') + return + + del config.repos[name] + save_config(config) + output(f'Repo {name} removed', style='green') + + +def _complete_alias(repo_name: str): + from openllm.model import list_bento + + for bento in list_bento(repo_name=repo_name): + alias = bento.labels.get('openllm_alias', '').strip() + if alias: + for a in alias.split(','): + with open(bento.path.parent / a, 'w') as f: + f.write(bento.version) + + +@app.command() +def update(): + import dulwich + import dulwich.errors + import dulwich.porcelain + + config = load_config() + repos_in_use = set() + for repo_name, repo in config.repos.items(): + repo = parse_repo_url(repo, repo_name) + repos_in_use.add((repo.server, repo.owner, repo.repo)) + if repo.path.exists(): # TODO: use update instead of remove and clone + shutil.rmtree(repo.path, ignore_errors=True) + if not repo.path.exists(): + repo.path.parent.mkdir(parents=True, exist_ok=True) + try: + dulwich.porcelain.clone( + f'https://{repo.server}/{repo.owner}/{repo.repo}.git', + str(repo.path), + checkout=True, + depth=1, + branch=repo.branch, + ) + output('') + output(f'Repo `{repo.name}` updated', style='green') + except: + shutil.rmtree(repo.path, ignore_errors=True) + output(f'Failed to clone repo {repo.name}', style='red') + else: + try: + import dulwich.porcelain + + dulwich.porcelain.pull( + str(repo.path), f'https://{repo.server}/{repo.owner}/{repo.repo}.git', refspecs=repo.branch, force=True + ) + dulwich.porcelain.clean(str(repo.path), str(repo.path)) + output('') + output(f'Repo `{repo.name}` updated', style='green') + except: + shutil.rmtree(repo.path, ignore_errors=True) + output(f'Failed to update repo {repo.name}', style='red') + for c in REPO_DIR.glob('*/*/*'): + repo_spec = tuple(c.parts[-3:]) + if repo_spec not in repos_in_use: + shutil.rmtree(c, ignore_errors=True) + output(f'Removed unused repo cache {c}') + with open(REPO_DIR / 'last_update', 'w') as f: + f.write(datetime.datetime.now().isoformat()) + for repo_name in config.repos: + _complete_alias(repo_name) + + +def ensure_repo_updated(): + last_update_file = REPO_DIR / 'last_update' + if not last_update_file.exists(): + if INTERACTIVE.get(): + choice = questionary.confirm( + 'The repo cache is never updated, do you want to update it to fetch the latest model list?' + ).ask() + if choice: + update() + return + else: + output( + 'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list', style='red' + ) + raise typer.Exit(1) + last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip()) + if datetime.datetime.now() - last_update > UPDATE_INTERVAL: + if INTERACTIVE.get(): + choice = questionary.confirm( + 'The repo cache is outdated, do you want to update it to fetch the latest model list?' + ).ask() + if choice: + update() + else: + output( + 'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list', style='yellow' + ) + + +GIT_REPO_RE = re.compile(r'git\+https://(?P.+)/(?P.+)/(?P.+?)(@(?P.+))?$') + + +def parse_repo_url(repo_url, repo_name=None) -> RepoInfo: + """ + parse the git repo url to server, owner, repo name, branch + >>> parse_repo_url('git+https://github.com/bentoml/bentovllm@main') + ('github.com', 'bentoml', 'bentovllm', 'main') + + >>> parse_repo_url('git+https://github.com/bentoml/bentovllm') + ('github.com', 'bentoml', 'bentovllm', 'main') + """ + match = GIT_REPO_RE.match(repo_url) + if not match: + raise ValueError(f'Invalid git repo url: {repo_url}') + server = match.group('server') + owner = match.group('owner') + repo = match.group('repo') + branch = match.group('branch') or 'main' + path = REPO_DIR / server / owner / repo + return RepoInfo( + name=repo if repo_name is None else repo_name, + url=repo_url, + server=server, + owner=owner, + repo=repo, + branch=branch, + path=path, + ) + + +@app.command() +def add(name: str, repo: str): + name = name.lower() + if not name.isidentifier(): + output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red') + return + + config = load_config() + if name in config.repos: + override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask() + if not override: + return + + config.repos[name] = repo + save_config(config) + output(f'Repo {name} added', style='green') + + +if __name__ == '__main__': + app() diff --git a/src/openllm/venv.py b/src/openllm/venv.py new file mode 100644 index 00000000..686f1644 --- /dev/null +++ b/src/openllm/venv.py @@ -0,0 +1,118 @@ +import functools +import os +import pathlib +import shutil +import typing +from typing import Iterable + +import typer + +from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, VenvSpec, output, run_command + + +@functools.lru_cache +def _resolve_packages(requirement: typing.Union[pathlib.Path, str]): + from pip_requirements_parser import RequirementsFile + + requirements_txt = RequirementsFile.from_file(str(requirement), include_nested=True) + return requirements_txt.requirements + + +def _filter_preheat_packages(requirements: Iterable) -> list[str]: + PREHEAT_PIP_PACKAGES = ['torch', 'vllm'] + + deps: list[str] = [] + for req in requirements: + if req.is_editable or req.is_local_path or req.is_url or req.is_wheel or not req.name or not req.specifier: + continue + for sp in req.specifier: + if sp.operator == '==' and req.name in PREHEAT_PIP_PACKAGES: + assert req.line is not None + deps.append(req.line) + break + return deps + + +@functools.lru_cache +def _resolve_bento_env_specs(bento: BentoInfo): + ver_file = bento.path / 'env' / 'python' / 'version.txt' + assert ver_file.exists(), f'cannot find version file in {bento.path}' + + lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt' + if not lock_file.exists(): + lock_file = bento.path / 'env' / 'python' / 'requirements.txt' + + reqs = _resolve_packages(lock_file) + preheat_packages = _filter_preheat_packages(reqs) + ver = ver_file.read_text().strip() + return ( + VenvSpec(python_version=ver, python_packages=preheat_packages, name_prefix=f"{bento.tag.replace(':', '_')}-1-"), + VenvSpec( + python_version=ver, python_packages=[v.line for v in reqs], name_prefix=f"{bento.tag.replace(':', '_')}-2-" + ), + ) + + +def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path: + if os.name == 'nt': + return venv / 'Lib/site-packages' + else: + return next(venv.glob('lib/python*')) / 'site-packages' + + +def _ensure_venv(env_spec: VenvSpec, parrent_venv: typing.Optional[pathlib.Path] = None) -> pathlib.Path: + venv = VENV_DIR / str(hash(env_spec)) + if venv.exists() and not (venv / 'DONE').exists(): + shutil.rmtree(venv, ignore_errors=True) + if not venv.exists(): + output(f'Installing model dependencies({venv})...', style='green') + + venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python' + try: + run_command(['python', '-m', 'uv', 'venv', venv], silent=VERBOSE_LEVEL.get() < 10) + lib_dir = _get_lib_dir(venv) + if parrent_venv is not None: + parent_lib_dir = _get_lib_dir(parrent_venv) + with open(lib_dir / f'{parrent_venv.name}.pth', 'w+') as f: + f.write(str(parent_lib_dir)) + with open(venv / 'requirements.txt', 'w') as f: + f.write('\n'.join(sorted(env_spec.python_packages))) + run_command( + ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), '-r', venv / 'requirements.txt'], + silent=VERBOSE_LEVEL.get() < 10, + ) + with open(venv / 'DONE', 'w') as f: + f.write('DONE') + except Exception: + shutil.rmtree(venv, ignore_errors=True) + output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red') + raise typer.Exit(1) + output(f'Successfully installed dependencies to {venv}.', style='green') + return venv + else: + return venv + + +def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path: + last_venv = None + for env_spec in env_spec_list: + last_venv = _ensure_venv(env_spec, last_venv) + assert last_venv is not None + return last_venv + + +def ensure_venv(bento: BentoInfo) -> pathlib.Path: + return _ensure_venvs(_resolve_bento_env_specs(bento)) + + +def _check_venv(env_spec: VenvSpec) -> bool: + venv = VENV_DIR / str(hash(env_spec)) + if not venv.exists(): + return False + if venv.exists() and not (venv / 'DONE').exists(): + return False + return True + + +def check_venv(bento: BentoInfo) -> bool: + return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento)) diff --git a/tools/compile.sh b/tools/compile.sh deleted file mode 100644 index 9223c961..00000000 --- a/tools/compile.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) -pip install -U mypy==1.7.0 "build<1" hatchling==1.18.0 hatch-vcs==0.3.0 hatch-fancy-pypi-readme==23.1.0 hatch-mypyc==0.16.0 -HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-client -w -C--global-option=--verbose "$@" -HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-core -w -C--global-option=--verbose "$@" -HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-python -w -C--global-option=--verbose "$@" -hatch clean diff --git a/tools/dependencies.py b/tools/dependencies.py deleted file mode 100755 index 56d44039..00000000 --- a/tools/dependencies.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations -import dataclasses -import os -import typing as t - -import tomlkit -from ghapi.all import GhApi - -if t.TYPE_CHECKING: - from tomlkit.items import Array, Table - -ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -_OWNER, _REPO = 'bentoml', 'openllm' - - -@dataclasses.dataclass(frozen=True) -class Classifier: - identifier: t.Dict[str, str] = dataclasses.field( - default_factory=lambda: { - 'status': 'Development Status', - 'environment': 'Environment', - 'license': 'License', - 'topic': 'Topic', - 'os': 'Operating System', - 'audience': 'Intended Audience', - 'typing': 'Typing', - 'language': 'Programming Language', - } - ) - joiner: str = ' :: ' - - @staticmethod - def status() -> dict[int, str]: - return { - v: status - for v, status in zip( - range(1, 8), - [ - '1 - Planning', - '2 - Pre-Alpha', - '3 - Alpha', - '4 - Beta', - '5 - Production/Stable', - '6 - Mature', - '7 - Inactive', - ], - ) - } - - @staticmethod - def apache() -> str: - return Classifier.create_classifier('license', 'OSI Approved', 'Apache Software License') - - @staticmethod - def create_classifier(identifier: str, *decls: t.Any) -> str: - cls_ = Classifier() - if identifier not in cls_.identifier: - raise ValueError(f'{identifier} is not yet supported (supported alias: {Classifier.identifier})') - return cls_.joiner.join([cls_.identifier[identifier], *decls]) - - @staticmethod - def create_python_classifier( - implementation: list[str] | None = None, supported_version: list[str] | None = None - ) -> list[str]: - if supported_version is None: - supported_version = ['3.8', '3.9', '3.10', '3.11', '3.12'] - if implementation is None: - implementation = ['CPython', 'PyPy'] - base = [ - Classifier.create_classifier('language', 'Python'), - Classifier.create_classifier('language', 'Python', '3'), - ] - base.append(Classifier.create_classifier('language', 'Python', '3', 'Only')) - base.extend([Classifier.create_classifier('language', 'Python', version) for version in supported_version]) - base.extend([ - Classifier.create_classifier('language', 'Python', 'Implementation', impl) for impl in implementation - ]) - return base - - @staticmethod - def create_status_classifier(level: int) -> str: - return Classifier.create_classifier('status', Classifier.status()[level]) - - -@dataclasses.dataclass(frozen=True) -class Dependencies: - name: str - git_repo_url: t.Optional[str] = None - branch: t.Optional[str] = None - extensions: t.Optional[t.List[str]] = None - subdirectory: t.Optional[str] = None - requires_gpu: bool = False - lower_constraint: t.Optional[str] = None - upper_constraint: t.Optional[str] = None - platform: t.Optional[t.Tuple[t.Literal['Linux', 'Windows', 'Darwin'], t.Literal['eq', 'ne']]] = None - - def with_options(self, **kwargs: t.Any) -> Dependencies: - return dataclasses.replace(self, **kwargs) - - @property - def has_constraint(self) -> bool: - return self.lower_constraint is not None or self.upper_constraint is not None - - @property - def pypi_extensions(self) -> str: - return '' if self.extensions is None else f"[{','.join(self.extensions)}]" - - @staticmethod - def platform_restriction(platform: t.LiteralString, op: t.Literal['eq', 'ne'] = 'eq') -> str: - return f'platform_system{"==" if op == "eq" else "!="}"{platform}"' - - def to_str(self) -> str: - deps: list[str] = [] - if self.lower_constraint is not None and self.upper_constraint is not None: - dep = f'{self.name}{self.pypi_extensions}>={self.lower_constraint},<{self.upper_constraint}' - elif self.lower_constraint is not None: - dep = f'{self.name}{self.pypi_extensions}>={self.lower_constraint}' - elif self.upper_constraint is not None: - dep = f'{self.name}{self.pypi_extensions}<{self.upper_constraint}' - elif self.subdirectory is not None: - dep = f'{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git#subdirectory={self.subdirectory}' - elif self.branch is not None: - dep = f'{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git@{self.branch}' - else: - dep = f'{self.name}{self.pypi_extensions}' - deps.append(dep) - if self.platform: - deps.append(self.platform_restriction(*self.platform)) - return ';'.join(deps) - - @classmethod - def from_tuple(cls, *decls: t.Any) -> Dependencies: - return cls(*decls) - - -_LOWER_BENTOML_CONSTRAINT = '1.2.16' -_BENTOML_EXT = ['io'] - - -fname = f'{os.path.basename(os.path.dirname(__file__))}/{os.path.basename(__file__)}' - - -def correct_style(it: t.Any) -> t.Any: - return it - - -def create_classifiers() -> Array: - arr = correct_style(tomlkit.array()) - arr.extend([ - Classifier.create_status_classifier(5), - Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA'), - Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '12'), - Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '11.8'), - Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '11.7'), - Classifier.apache(), - Classifier.create_classifier('topic', 'Scientific/Engineering', 'Artificial Intelligence'), - Classifier.create_classifier('topic', 'Software Development', 'Libraries'), - Classifier.create_classifier('os', 'OS Independent'), - Classifier.create_classifier('audience', 'Developers'), - Classifier.create_classifier('audience', 'Science/Research'), - Classifier.create_classifier('audience', 'System Administrators'), - Classifier.create_classifier('typing', 'Typed'), - *Classifier.create_python_classifier(), - ]) - return arr.multiline(True) - - -def create_url_table(_info: t.Any) -> Table: - table = tomlkit.table() - _urls = { - 'Blog': 'https://modelserving.com', - 'Chat': 'https://discord.gg/openllm', - 'Documentation': 'https://github.com/bentoml/openllm#readme', - 'GitHub': _info.html_url, - 'History': f'{_info.html_url}/blob/main/CHANGELOG.md', - 'Homepage': _info.homepage, - 'Tracker': f'{_info.html_url}/issues', - 'Twitter': 'https://twitter.com/bentomlai', - } - table.update({k: v for k, v in sorted(_urls.items())}) - return table - - -def build_system() -> Table: - table = tomlkit.table() - table.add('build-backend', 'hatchling.build') - requires_array = correct_style(tomlkit.array()) - requires_array.extend(['hatchling==1.18.0', 'hatch-vcs==0.3.0', 'hatch-fancy-pypi-readme==23.1.0']) - table.add('requires', requires_array.multiline(True)) - return table - - -def keywords() -> Array: - arr = correct_style(tomlkit.array()) - arr.extend([ - 'MLOps', - 'AI', - 'BentoML', - 'Model Serving', - 'Model Deployment', - 'LLMOps', - 'Falcon', - 'Vicuna', - 'Llama 2', - 'Fine tuning', - 'Serverless', - 'Large Language Model', - 'Generative AI', - 'StableLM', - 'Alpaca', - 'PyTorch', - 'Mistral', - 'vLLM', - 'Transformers', - ]) - return arr.multiline(True) - - -def build_cli_extensions() -> Table: - table = tomlkit.table() - table.update({'openllm': '_openllm_tiny._entrypoint:cli'}) - return table - - -def main(args) -> int: - api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False) - _info = api.repos.get() - with open(os.path.join(ROOT, 'openllm-python', 'pyproject.toml'), 'r') as f: - pyproject = tomlkit.parse(f.read()) - - if args.release_version is not None: - release_version = args.release_version - else: - try: - release_version = api.repos.get_latest_release()['tag_name'].lstrip('v') - except Exception as err: - raise err - - _BASE_DEPENDENCIES = [ - Dependencies(name='bentoml', extensions=_BENTOML_EXT, lower_constraint=_LOWER_BENTOML_CONSTRAINT), - Dependencies(name='openllm-client', lower_constraint=release_version), - Dependencies(name='openllm-core', lower_constraint=release_version), - Dependencies(name='safetensors'), - Dependencies(name='vllm', lower_constraint='0.4.3'), - Dependencies(name='ghapi'), - Dependencies(name='einops'), - Dependencies(name='sentencepiece'), - Dependencies(name='scipy'), - Dependencies(name='click', lower_constraint='8.1.3'), - Dependencies(name='cuda-python', platform=('Darwin', 'ne')), - ] - - dependencies_array = correct_style(tomlkit.array()) - dependencies_array.extend([v.to_str() for v in _BASE_DEPENDENCIES]) - # dynamic field - dyn_arr = tomlkit.array() - dyn_arr.extend(['version', 'readme']) - - pyproject['build-system'] = build_system() - pyproject['project']['classifiers'] = create_classifiers() - pyproject['project']['dependencies'] = dependencies_array.multiline(True) - pyproject['project']['description'] = f'{_info.name}: {_info.description}' - pyproject['project']['dynamic'] = dyn_arr - pyproject['project']['keywords'] = keywords() - pyproject['project']['license'] = _info.license.spdx_id - pyproject['project']['name'] = f'{_info.name.lower()}' - pyproject['project']['requires-python'] = '>=3.8' - - pyproject['project']['urls'] = create_url_table(_info) - pyproject['project']['scripts'] = build_cli_extensions() - - with open(os.path.join(ROOT, 'openllm-python', 'pyproject.toml'), 'w') as f: - f.write(tomlkit.dumps(pyproject)) - return 0 - - -if __name__ == '__main__': - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument('--release-version', type=str, default=None) - raise SystemExit(main(parser.parse_args())) diff --git a/tools/generate-coverage.py b/tools/generate-coverage.py deleted file mode 100755 index 0e6df369..00000000 --- a/tools/generate-coverage.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations -from collections import defaultdict -from pathlib import Path - -import orjson -from lxml import etree - -ROOT = Path(__file__).resolve().parent.parent - -PACKAGES = {'openllm-python/src/openllm/': 'openllm'} - - -def main() -> int: - coverage_report = ROOT / 'coverage.xml' - root = etree.fromstring(coverage_report.read_text()) - - raw_package_data: defaultdict[str, dict[str, int]] = defaultdict(lambda: {'hits': 0, 'misses': 0}) - for package in root.find('packages'): - for module in package.find('classes'): - filename = module.attrib['filename'] - for relative_path, package_name in PACKAGES.items(): - if filename.startswith(relative_path): - data = raw_package_data[package_name] - break - else: - message = f'unknown package: {module}' - raise ValueError(message) - - for line in module.find('lines'): - if line.attrib['hits'] == '1': - data['hits'] += 1 - else: - data['misses'] += 1 - - total_statements_covered = 0 - total_statements = 0 - coverage_data = {} - for package_name, data in sorted(raw_package_data.items()): - statements_covered = data['hits'] - statements = statements_covered + data['misses'] - total_statements_covered += statements_covered - total_statements += statements - coverage_data[package_name] = {'statements_covered': statements_covered, 'statements': statements} - coverage_data['total'] = {'statements_covered': total_statements_covered, 'statements': total_statements} - - coverage_summary = ROOT / 'coverage-summary.json' - coverage_summary.write_text(orjson.dumps(coverage_data, option=orjson.OPT_INDENT_2).decode(), encoding='utf-8') - return 0 - - -if __name__ == '__main__': - raise SystemExit(main()) diff --git a/tools/lock-actions.sh b/tools/lock-actions.sh deleted file mode 100755 index da9d4adb..00000000 --- a/tools/lock-actions.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -GIT_ROOT=$(git rev-parse --show-toplevel) - -cd "$GIT_ROOT" || exit 1 - -[[ -x "$(command -v docker)" ]] || ( - echo "docker not found. Make sure to have docker running to run this job." - exit 1 -) - -RATCHET_EXP_KEEP_NEWLINES=true find "${GIT_ROOT}/.github/workflows" -type f -iname '*.yml' -exec docker run -it --rm -v "${PWD}":"${PWD}" -w "${PWD}" -e RATCHET_EXP_KEEP_NEWLINES=true ghcr.io/sethvargo/ratchet:0.4.0 pin {} \; diff --git a/tools/machines.py b/tools/machines.py deleted file mode 100644 index aedc5a70..00000000 --- a/tools/machines.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import annotations - -import httpx,os,dataclasses,datetime,time,argparse,typing as t -if (ENV:=os.getenv("PAPERSPACE_API_KEY")) is None:raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"') -HEADERS=httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'}) -API_URL='https://api.paperspace.com/v1' - -@dataclasses.dataclass -class Machine: - id:str - inner:httpx.Client=dataclasses.field(default_factory=lambda:httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60),repr=False) - def close(self):self.inner.close() - def __del__(self):self.close() - def __enter__(self):return self - def __exit__(self, *_: t.Any)->None:self.close() - @property - def metadata(self)->dict[str,t.Any]:return self.inner.get(f'/machines/{self.id}').json() - @property - def status(self)->t.Literal['off','ready','stopping','starting']:return self.metadata['state'] - def start(self)->bool: - response=self.inner.patch(f'/machines/{self.id}/start') - if response.status_code==400 or self.status=='ready': - print('machine is already running') - return False - elif response.status_code!=200:raise ValueError(f'Error while starting machine: {response.json()}') - return True - def stop(self)->bool: - response=self.inner.patch(f'/machines/{self.id}/stop') - if response.status_code==400 or self.status=='off': - print('machine is already off') - return False - elif response.status_code!=200:raise ValueError(f'Error while stopping machine {response.json()}') - return True - @classmethod - def ci(cls,template_id:str)->Machine: - client = httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60) - machines=client.get('/machines',params=dict(limit=1,name='openllm-ci')).json() - if len(machines['items']) == 1: - return cls(id=machines['items'][0]['id'],inner=client) - response=client.post('/machines',json=dict( - name=f'openllm-ci-{datetime.datetime.now().timestamp()}', machineType='A4000',templateId=template_id, - networkId=os.getenv("PAPERSPACE_NETWORK_ID"), diskSize=500,region='ny2',publicIpType='dynamic',startOnCreate=True)) - if response.status_code!=200:raise ValueError(f'Failed while creating a machine: {response.json()}') - return cls(id=response.json()['data']['id'],inner=client) - def actions(self,ip:bool=False,id:bool=False)->str: - if ip:return self.metadata["publicIp"] - if id:return self.id - raise ValueError('cannot be all false.') -def main(): - parser=argparse.ArgumentParser() - group=parser.add_mutually_exclusive_group(required=True) - group.add_argument('--start',metavar='ID') - group.add_argument('--stop',metavar='ID') - group.add_argument('--delete',metavar='ID') - group.add_argument('--ci-template',metavar='ID') - parser.add_argument('--output-ip',action='store_true') - parser.add_argument('--output-id',action='store_true') - args=parser.parse_args() - - if args.ci_template: - machine=Machine.ci(args.ci_template) - while machine.status!='ready':time.sleep(5) - print(machine.actions(args.output_ip,args.output_id)) - machine.close() - elif args.delete: - with httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60) as client: - response=client.delete(f'/machines/{args.delete}') - if response.status_code!=200: - print('Error while deleting machine', response.json()) - return 1 - elif args.start: - with Machine(id=args.start) as machine: - if machine.start(): - while machine.status!='ready': - print('Waiting for machine to be ready...') - time.sleep(5) - else: - print('Failed to start machine "%s"', machine.id) - return 1 - elif args.stop: - with Machine(id=args.stop) as machine: - if machine.stop(): - while machine.status!='ready': - print('Waiting for machine to stop...') - time.sleep(5) - else: - print('Failed to stop machine') - return 1 - return 0 - -if __name__ == "__main__":raise SystemExit(main()) diff --git a/tools/mirror.sh b/tools/mirror.sh deleted file mode 100755 index c7396a60..00000000 --- a/tools/mirror.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -GIT_ROOT="$(git rev-parse --show-toplevel)" -cd "$GIT_ROOT" || exit 1 - -mirror() { - cp $1 $2 -} - -mirror README.md openllm-python/README.md -mirror LICENSE.md openllm-python/LICENSE.md -mirror CHANGELOG.md openllm-python/CHANGELOG.md diff --git a/tools/requirements.txt b/tools/requirements.txt deleted file mode 100644 index b762f050..00000000 --- a/tools/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -ipython -jupyter -tomlkit -ghapi -pre-commit -orjson diff --git a/tools/run-release-action b/tools/run-release-action deleted file mode 100755 index a967295b..00000000 --- a/tools/run-release-action +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env bash - -set -e -o pipefail - -# Function to print script usage -print_usage() { - echo "Usage: $0 [--release ]" -} - -# Function to validate release argument -validate_release() { - local release=$1 - - if [[ $release == "major" || $release == "minor" || $release == "patch" || $release == "alpha" ]]; then - return 0 - else - return 1 - fi -} - -check_membership() { - local org="BentoML" - local username=$(gh api user | jq -r '.login') - if gh api orgs/$org/members/$username -q '.message' | grep -q "Not Found"; then - echo "ERROR: You must be a member of $org to run this script." - exit 1 - fi -} - -for cmd in gh jq; do - if ! command -v "$cmd" @ >&1 >/dev/null; then - echo "ERROR: $cmd not installed. Aborting..." - exit 1 - fi -done - -check_membership - -# Check if release flag is provided -if [[ $1 == "--release" ]]; then - # Check if release argument is provided - if [[ -z $2 ]]; then - echo "Error: No release argument provided." - print_usage - exit 1 - fi - - release=$2 - - if ! validate_release "$release"; then - echo "Error: Invalid release argument. Only 'major', 'minor', 'patch', or 'alpha' are allowed." - print_usage - exit 1 - fi -else - echo "Error: Unknown option or no option provided." - print_usage - exit 1 -fi - -# Get the current version and separate the alpha part if it exists -version="$(git describe --tags "$(git rev-list --tags --max-count=1)")" -VERSION="${version#v}" - -# Initialize variables for alpha versioning -ALPHA="" -ALPHA_NUM=0 - -# Check if current version is an alpha version and split accordingly -if [[ $VERSION =~ -alpha ]]; then - IFS='-' read -r BASE_VERSION ALPHA <<<"$VERSION" - if [[ $ALPHA =~ [.] ]]; then - IFS='.' read -r ALPHA ALPHA_NUM <<<"$ALPHA" - fi -else - BASE_VERSION="$VERSION" -fi - -# Save the current value of IFS to restore it later and split the base version -OLD_IFS=$IFS -IFS='.' -read -ra VERSION_BITS <<<"$BASE_VERSION" -IFS=$OLD_IFS - -# Assign split version numbers -VNUM1=${VERSION_BITS[0]} -VNUM2=${VERSION_BITS[1]} -VNUM3=${VERSION_BITS[2]} - -# Adjust the version numbers based on the release type -if [[ $release == 'major' ]]; then - VNUM1=$((VNUM1 + 1)) - VNUM2=0 - VNUM3=0 - ALPHA="" # Reset alpha for major release -elif [[ $release == 'minor' ]]; then - if [[ -n $ALPHA ]]; then - ALPHA="" # Remove alpha suffix for minor release from an alpha version - else - VNUM2=$((VNUM2 + 1)) - VNUM3=0 - fi -elif [[ $release == 'patch' ]]; then - VNUM3=$((VNUM3 + 1)) - ALPHA="" # Reset alpha for patch release -elif [[ $release == 'alpha' ]]; then - if [ -n "$ALPHA" ]; then - ALPHA_NUM=$((ALPHA_NUM + 1)) - else - VNUM2=$((VNUM2 + 1)) - VNUM3=0 - ALPHA="alpha" - ALPHA_NUM=0 - fi -fi - -# Construct the new version string -if [ -n "$ALPHA" ]; then - if ((ALPHA_NUM > 0)); then - RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3-alpha.$ALPHA_NUM" - else - RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3-alpha" - fi -else - RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3" -fi - -echo "Releasing version: $RELEASE_TAG" - -if [[ -v DRYRUN ]]; then - exit 0 -fi - -echo "Running release actions (create-releases.yml)..." -echo '{"release_type": "'"$release"'"}' | gh workflow run create-releases.yml --repo bentoml/openllm --json - -sleep 20 - -set -x - -echo "Waiting for new tags to be released from 'create-releases.yml'" -while true; do - git pull --autostash --no-edit --gpg-sign --ff origin main - if git ls-remote -t --exit-code origin "refs/tags/${RELEASE_TAG}" &>/dev/null; then - break - fi - sleep 10 -done diff --git a/tools/tests-requirements.txt b/tools/tests-requirements.txt deleted file mode 100644 index 06413c14..00000000 --- a/tools/tests-requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -pytest -pytest-cov -pytest-mock -pytest-randomly -pytest-rerunfailures -pytest-asyncio>=0.21.0 -coverage[toml] -filelock>=3.7.1 -pytest-xdist[psutil] diff --git a/tools/update-actions.sh b/tools/update-actions.sh deleted file mode 100755 index ceb35001..00000000 --- a/tools/update-actions.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEBUG=${DEBUG:-false} -[[ "${DEBUG}" == "true" ]] && set -x - -GIT_ROOT=$(git rev-parse --show-toplevel) - -cd "$GIT_ROOT" || exit 1 - -[[ -x "$(command -v docker)" ]] || ( - echo "docker not found. Make sure to have docker running to run this job." - exit 1 -) - -docker version &>/dev/null || ( - echo "docker is not healthy. Make sure to have docker running" - exit 1 -) - -[[ -z "${ACTIONS_TOKEN}" ]] && ( - echo "ACTIONS_TOKEN not found. Make sure to have ACTIONS_TOKEN set to run this job." - exit 1 -) - -find "${GIT_ROOT}/.github/workflows" -type f -iname '*.yml' -exec docker run --rm -v "${PWD}":"${PWD}" -w "${PWD}" -e ACTIONS_TOKEN -e RATCHET_EXP_KEEP_NEWLINES=true ghcr.io/sethvargo/ratchet:0.4.0 update {} \; diff --git a/tools/update-config-stubs.py b/tools/update-config-stubs.py deleted file mode 100755 index 672abd68..00000000 --- a/tools/update-config-stubs.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations -import os, sys -from pathlib import Path - -# currently we are assuming the indentatio level is 2 for comments -START_COMMENT = f'# {os.path.basename(__file__)}: start\n' -END_COMMENT = f'# {os.path.basename(__file__)}: stop\n' -# Stubs for auto class -START_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs start\n' -END_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs stop\n' -# Stubs for actual imports -START_IMPORT_STUBS_COMMENT = f'# {os.path.basename(__file__)}: import stubs start\n' -END_IMPORT_STUBS_COMMENT = f'# {os.path.basename(__file__)}: import stubs stop\n' - -ROOT = Path(__file__).parent.parent -_TARGET_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / '_configuration.py' -_TARGET_AUTO_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'config' / 'configuration_auto.py' -_TARGET_CORE_INIT_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'config' / '__init__.py' -_TARGET_INIT_FILE = ROOT / 'openllm-python' / 'src' / 'openllm' / '__init__.pyi' -_TARGET_IMPORT_UTILS_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'utils' / 'import_utils.pyi' - -sys.path.insert(0, (ROOT / 'openllm-core' / 'src').__fspath__()) -from openllm_core._configuration import GenerationConfig, ModelSettings -from openllm_core.config.configuration_auto import CONFIG_MAPPING_NAMES -from openllm_core.utils import codegen, import_utils as iutils -# from openllm_core.utils.peft import PeftType - - -def process_annotations(annotations: str) -> str: - if 'NotRequired' in annotations: - return annotations[len('NotRequired[') : -1] - elif 'Required' in annotations: - return annotations[len('Required[') : -1] - else: - return annotations - - -_transformed = {'fine_tune_strategies': 't.Dict[AdapterType, FineTuneConfig]'} - - -def main() -> int: - with _TARGET_FILE.open('r') as f: - processed = f.readlines() - - start_idx, end_idx = processed.index(' ' * 2 + START_COMMENT), processed.index(' ' * 2 + END_COMMENT) - - # NOTE: inline runtime __getitem__ overload process - lines: list[str] = [] - lines.append(' ' * 2 + '# NOTE: ModelSettings arguments\n') - for keys, ForwardRef in codegen.get_annotations(ModelSettings).items(): - lines.extend([ - ' ' * 2 + line - for line in [ - '@overload\n', - f"def __getitem__(self, item: t.Literal['{keys}']) -> {process_annotations(ForwardRef.__forward_arg__)}: ...\n", - ] - ]) - lines.append(' ' * 2 + '# NOTE: GenerationConfig arguments\n') - generation_config_anns = codegen.get_annotations(GenerationConfig) - for keys, type_pep563 in generation_config_anns.items(): - lines.extend([ - ' ' * 2 + line - for line in ['@overload\n', f"def __getitem__(self, item: t.Literal['{keys}']) -> {type_pep563}: ...\n"] - ]) - # backward compatible - lines.extend([ - ' ' * 2 + line - for line in [ - '@overload\n', - "def __getitem__(self, item: t.Literal['max_new_tokens']) -> int: ...\n", - '@overload\n', - "def __getitem__(self, item: t.Literal['start_name']) -> str: ...\n", - '@overload\n', - "def __getitem__(self, item: t.Literal['model_name']) -> str: ...\n", - ] - ]) - - processed = ( - processed[:start_idx] + [' ' * 2 + START_COMMENT, *lines, ' ' * 2 + END_COMMENT] + processed[end_idx + 1 :] - ) - with _TARGET_FILE.open('w') as f: - f.writelines(processed) - - with _TARGET_AUTO_FILE.open('r') as f: - processed = f.readlines() - - start_auto_stubs_idx, end_auto_stubs_idx = ( - processed.index(' ' * 2 + START_AUTO_STUBS_COMMENT), - processed.index(' ' * 2 + END_AUTO_STUBS_COMMENT), - ) - lines = [] - for model, class_name in CONFIG_MAPPING_NAMES.items(): - lines.extend([ - ' ' * 2 + line - for line in [ - '@t.overload\n', - '@classmethod\n', - f"def for_model(cls, model_name: t.Literal['{model}'], **attrs: t.Any) -> openllm_core.config.{class_name}: ...\n", - ] - ]) - lines.extend([ - ' ' * 2 + line - for line in [ - '@t.overload\n', - '@classmethod\n', - 'def for_model(cls, model_name: LiteralString, **attrs: t.Any) -> openllm_core.LLMConfig: ...\n', - ] - ]) - processed = ( - processed[:start_auto_stubs_idx] - + [' ' * 2 + START_AUTO_STUBS_COMMENT, *lines, ' ' * 2 + END_AUTO_STUBS_COMMENT] - + processed[end_auto_stubs_idx + 1 :] - ) - with _TARGET_AUTO_FILE.open('w') as f: - f.writelines(processed) - - with _TARGET_INIT_FILE.open('r') as f: - processed = f.readlines() - - start_import_stubs_idx, end_import_stubs_idx = ( - processed.index(START_IMPORT_STUBS_COMMENT), - processed.index(END_IMPORT_STUBS_COMMENT), - ) - mm = { - '_configuration': ('GenerationConfig', 'LLMConfig'), - '_schemas': ('GenerationInput', 'GenerationOutput', 'MetadataOutput', 'MessageParam'), - 'utils': ('api',), - } - lines = [ - 'from openllm_client import AsyncHTTPClient as AsyncHTTPClient, HTTPClient as HTTPClient', - f'from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, {", ".join([a+" as "+a for a in CONFIG_MAPPING_NAMES.values()])}', - ] - lines.extend([ - f'from openllm_core.{module} import {", ".join([a+" as "+a for a in attr])}' for module, attr in mm.items() - ]) - processed = ( - processed[:start_import_stubs_idx] - + [START_IMPORT_STUBS_COMMENT, '\n'.join(lines) + '\n', END_IMPORT_STUBS_COMMENT] - + processed[end_import_stubs_idx + 1 :] - ) - with _TARGET_INIT_FILE.open('w') as f: - f.writelines(processed) - - lines = [ - '# fmt: off\n', - f'# AUTOGENERATED BY {os.path.basename(__file__)}. DO NOT EDIT\n', - 'from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig\n', - *[f'from .configuration_{k} import {a} as {a}\n' for k, a in CONFIG_MAPPING_NAMES.items()], - ] - with _TARGET_CORE_INIT_FILE.open('w') as f: - f.writelines(lines) - - lines = [ - '# fmt: off\n', - f'# AUTOGENERATED BY {os.path.basename(__file__)}. DO NOT EDIT\n', - 'import typing as t\n', - 'def is_autoawq_available() -> bool: ...\n', - 'def is_vllm_available() -> bool: ...\n', - *[f'def {k}() -> bool: ...\n' for k in iutils.caller], - 'ENV_VARS_TRUE_VALUES: t.Set[str] = ...\n', - ] - with _TARGET_IMPORT_UTILS_FILE.open('w') as f: - f.writelines(lines) - - return 0 - - -if __name__ == '__main__': - raise SystemExit(main()) diff --git a/tools/update-mypy.py b/tools/update-mypy.py deleted file mode 100755 index d37dffa0..00000000 --- a/tools/update-mypy.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python3 -import concurrent.futures -import configparser -import os -from typing import List - -_MYPY_CONFIG = { - 'pretty': 'true', - 'python_version': '3.9', - 'show_error_codes': 'true', - 'strict': 'true', - 'plugins': 'pydantic.mypy', - 'ignore_missing_imports': 'true', - 'warn_unreachable': 'true', - 'explicit_package_bases': 'true', -} - - -# Function to find .pyi files in a given directory -def pyi_in_subdir(directory: str, git_root: str) -> List[str]: - pyi_files = [] - for root, _, files in os.walk(directory): - for file in files: - if file.endswith('.pyi') or file == '_typing_compat.py' or '_openllm_tiny' in file: - full_path = os.path.join(root, file) - # Convert to relative path with respect to the git root - relative_path = os.path.relpath(full_path, git_root) - pyi_files.append(relative_path) - return pyi_files - - -def find_pyi_files(git_root: str) -> List[str]: - # List all subdirectories - subdirectories = [ - os.path.join(git_root, name) - for name in os.listdir(git_root) - if os.path.isdir(os.path.join(git_root, name)) and name not in ['venv', '.git', '.venv'] - ] - - # Use a thread pool to execute searches concurrently - with concurrent.futures.ThreadPoolExecutor() as executor: - # Map of future to subdirectory - future_to_subdir = {executor.submit(pyi_in_subdir, subdir, git_root): subdir for subdir in subdirectories} - - all_pyi_files = set() - for future in concurrent.futures.as_completed(future_to_subdir): - pyi_files = future.result() - all_pyi_files.update(pyi_files) - - return list(all_pyi_files) - - -# Function to update mypy.ini file -def update_mypy_ini(pyi_files: List[str], mypy_ini_path: str) -> int: - config = configparser.ConfigParser() - config.read(mypy_ini_path) - - # Existing files from mypy.ini - existing_files = config.get('mypy', 'files', fallback='').split(', ') - - # Add new .pyi files if they are not already in the list - updated_files = existing_files + [f for f in pyi_files if f not in existing_files] - - # Update the 'files' entry - config['mypy']['files'] = ', '.join(updated_files) - for key, value in _MYPY_CONFIG.items(): - config.set('mypy', key, value) - - # Write changes back to mypy.ini - with open(mypy_ini_path, 'w') as configfile: - configfile.write( - f'# The following is autogenerated by {os.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__))}\n' - ) - config.write(configfile) - # Remove last newline if exists - with open(mypy_ini_path, 'rb+') as file: - file.seek(-1, os.SEEK_END) - if file.read(1) == b'\n': - file.seek(-1, os.SEEK_END) - file.truncate() - return 0 - - -ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -MYPY_CONFIG = os.path.join(ROOT, 'mypy.ini') - -if __name__ == '__main__': - raise SystemExit(update_mypy_ini(find_pyi_files(ROOT), MYPY_CONFIG)) diff --git a/tools/update-readme.py b/tools/update-readme.py deleted file mode 100755 index c1ffe77a..00000000 --- a/tools/update-readme.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -import os, shutil, sys, tomlkit - -START_COMMENT = f'\n' -END_COMMENT = f'\n' - -ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, os.path.join(ROOT, 'openllm-core', 'src')) -from openllm_core.config import CONFIG_MAPPING -from openllm_core.config.configuration_auto import CONFIG_TO_ALIAS_NAMES - - -def markdown_noteblock(text: str): - return ['\n', f'> **Note:** {text}\n'] - - -def main() -> int: - with open(os.path.join(ROOT, 'README.md'), 'r') as f: - readme = f.readlines() - - start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT) - - content = [] - - for it in CONFIG_MAPPING.values(): - it = it() - architecture_name = it.__class__.__name__[:-6] - details_block = ['
\n', f'{architecture_name}\n\n', '### Quickstart\n'] - nitem = CONFIG_TO_ALIAS_NAMES[it.__class__.__name__] - details_block.extend([ - f'Run the following command to quickly spin up a {architecture_name} server:\n', - f"""\ -```bash -openllm start {it['default_id']}{'' if not it['trust_remote_code'] else ' --trust-remote-code'} -```""", - 'You can run the following code in a different terminal to interact with the server:', - """\ -```python -import openllm_client -client = openllm_client.HTTPClient('http://localhost:3000') -client.generate('What are large language models?') -```""", - *markdown_noteblock( - f'Any {architecture_name} variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search={nitem}) to see more {architecture_name}-compatible models.\n' - ), - '\n### Supported models\n', - f'You can specify any of the following {architecture_name} models via `openllm start`:\n\n', - ]) - list_ids = [f'- [{model_id}](https://huggingface.co/{model_id})' for model_id in it['model_ids']] - details_block.extend(list_ids) - details_block.append('\n
\n\n') - - content.append('\n'.join(details_block)) - - readme = readme[:start_index] + [START_COMMENT] + content + [END_COMMENT] + readme[stop_index + 1 :] - with open(os.path.join(ROOT, 'README.md'), 'w') as f: - f.writelines(readme) - - shutil.copyfile(os.path.join(ROOT, 'README.md'), os.path.join(ROOT, 'openllm-python', 'README.md')) - return 0 - - -if __name__ == '__main__': - raise SystemExit(main()) diff --git a/tools/write-coverage-report.py b/tools/write-coverage-report.py deleted file mode 100755 index 6e8f67fb..00000000 --- a/tools/write-coverage-report.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations -from decimal import ROUND_DOWN, Decimal -from pathlib import Path -import orjson - -PRECISION = Decimal('.01') - -ROOT = Path(__file__).resolve().parent.parent - - -def main() -> int: - coverage_summary = ROOT / 'coverage-summary.json' - - coverage_data = orjson.loads(coverage_summary.read_text(encoding='utf-8')) - total_data = coverage_data.pop('total') - - lines = ['\n', 'Package | Statements\n', '------- | ----------\n'] - - for package, data in sorted(coverage_data.items()): - statements_covered = data['statements_covered'] - statements = data['statements'] - - rate = Decimal(statements_covered) / Decimal(statements) * 100 - rate = rate.quantize(PRECISION, rounding=ROUND_DOWN) - lines.append(f'{package} | {100 if rate == 100 else rate}% ({statements_covered} / {statements})\n') - - total_statements_covered = total_data['statements_covered'] - total_statements = total_data['statements'] - total_rate = Decimal(total_statements_covered) / Decimal(total_statements) * 100 - total_rate = total_rate.quantize(PRECISION, rounding=ROUND_DOWN) - color = 'ok' if float(total_rate) >= 95 else 'critical' - lines.insert(0, f'![Code Coverage](https://img.shields.io/badge/coverage-{total_rate}%25-{color}?style=flat)\n') - - lines.append( - f'**Summary** | {100 if total_rate == 100 else total_rate}% ({total_statements_covered} / {total_statements})\n' - ) - - coverage_report = ROOT / 'coverage-report.md' - with coverage_report.open('w', encoding='utf-8') as f: - f.write(''.join(lines)) - return 0 - - -if __name__ == '__main__': - raise SystemExit(main()) diff --git a/wheels.sh b/wheels.sh deleted file mode 100644 index fac97021..00000000 --- a/wheels.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -GIT_ROOT="$(git rev-parse --show-toplevel)" -cd "$GIT_ROOT" || exit 1 - -mkdir -p dist - -pushd openllm-client &>/dev/null - -python -m build -w && mv dist/* ../dist - -popd &>/dev/null - -pushd openllm-core &>/dev/null - -python -m build -w && mv dist/* ../dist - -popd &>/dev/null - -pushd openllm-python &>/dev/null - -python -m build -w && mv dist/* ../dist - -popd &>/dev/null