diff --git a/.editorconfig b/.editorconfig index b0815bbc..3fcb4be2 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,5 +9,5 @@ charset = utf-8 indent_style = space indent_size = 2 -[src/openllm/cli/entrypoint.py] +[openllm-python/src/openllm/cli/entrypoint.py] indent_size = unset diff --git a/.gitattributes b/.gitattributes index 1925cb33..226718d4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,10 +1,10 @@ -nightly-requirements.txt linguist-generated=true -nightly-requirements-gpu.txt linguist-generated=true -tests/models/__snapshots__/* linguist-generated=true +openllm-python/tests/models/__snapshots__/* linguist-generated=true typings/**/*.pyi linguist-generated=true Formula/openllm.rb linguist-generated=true -src/openllm/utils/dummy_*.py linguist-generated=true -src/openllm/models/__init__.py linguist-generated=true +openllm-python/src/openllm/utils/dummy_*.py linguist-generated=true +openllm-python/src/openllm/models/__init__.py linguist-generated=true +openllm-python/README.md linguist-generated=true * text=auto eol=lf # Needed for setuptools-scm-git-archive .git_archival.txt export-subst +openllm-python/.git_archival.txt export-subst diff --git a/.github/actions/create_release_and_archive.sh b/.github/actions/create_release_and_archive.sh index 30529c09..51e98f18 100755 --- a/.github/actions/create_release_and_archive.sh +++ b/.github/actions/create_release_and_archive.sh @@ -26,7 +26,7 @@ All available models: \`\`\`openllm models\`\`\` To start a LLM: \`\`\`python -m openllm start opt\`\`\` -To run OpenLLM within a container environment (requires GPUs): \`\`\`docker run --gpus all -it --entrypoint=/bin/bash -P ghcr.io/bentoml/openllm:${TAG} openllm --help\`\`\` +To run OpenLLM within a container environment (requires GPUs): \`\`\`docker run --gpus all -it -P ghcr.io/bentoml/openllm:${TAG} start opt\`\`\` Find more information about this release in the [CHANGELOG.md](https://github.com/bentoml/OpenLLM/blob/main/CHANGELOG.md) diff --git a/.github/actions/release.sh b/.github/actions/release.sh index 1b91427a..c6525d5a 100755 --- a/.github/actions/release.sh +++ b/.github/actions/release.sh @@ -47,11 +47,11 @@ release_package() { local version="$1" echo "Releasing version ${version}..." jq --arg release_version "${version}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json - pushd src/openllm-node &>/dev/null + pushd openllm-node &>/dev/null jq --arg release_version "${version}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json popd &>/dev/null towncrier build --yes --version "${version}" - git add CHANGELOG.md changelog.d package.json src/openllm-node/package.json + git add CHANGELOG.md changelog.d package.json openllm-node/package.json git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]" git push origin main echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]" diff --git a/assets/agent.gif b/.github/assets/agent.gif similarity index 100% rename from assets/agent.gif rename to .github/assets/agent.gif diff --git a/assets/main-banner.png b/.github/assets/main-banner.png similarity index 100% rename from assets/main-banner.png rename to .github/assets/main-banner.png diff --git a/assets/output.gif b/.github/assets/output.gif similarity index 100% rename from assets/output.gif rename to .github/assets/output.gif diff --git a/.github/dependabot.yml b/.github/dependabot.yml index dc2ca198..9cef2e59 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,17 +1,3 @@ -# Copyright 2023 BentoML Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - version: 2 updates: - package-ecosystem: github-actions @@ -22,7 +8,7 @@ updates: time: '09:00' # maintain required dependencies - package-ecosystem: pip - directory: '/' + directory: '/openllm-python' schedule: interval: 'weekly' open-pull-requests-limit: 5 diff --git a/.github/workflows/auto-bot.yml b/.github/workflows/auto-bot.yml index 4f4cc5e0..a868079c 100644 --- a/.github/workflows/auto-bot.yml +++ b/.github/workflows/auto-bot.yml @@ -13,7 +13,7 @@ jobs: if: ${{ github.actor == 'dependabot[bot]' }} steps: - name: Wait for tests to succeed - uses: lewagon/wait-on-check-action@v1.3.1 + uses: lewagon/wait-on-check-action@e106e5c43e8ca1edea6383a39a01c5ca495fd812 # ratchet:lewagon/wait-on-check-action@v1.3.1 with: ref: ${{ github.ref }} check-name: evergreen diff --git a/.github/workflows/binary-releases.yml b/.github/workflows/binary-releases.yml index fc464fa4..aa9d2c92 100644 --- a/.github/workflows/binary-releases.yml +++ b/.github/workflows/binary-releases.yml @@ -6,12 +6,14 @@ on: pull_request: branches: [main] paths-ignore: + - '*.md' - 'docs/**' - 'bazel/**' - 'typings/**' - - '*.md' - 'changelog.d/**' - 'assets/**' + - 'openllm-node/**' + - 'Formula/**' defaults: run: shell: bash --noprofile --norc -exo pipefail {0} @@ -29,7 +31,7 @@ jobs: runs-on: ubuntu-latest if: ${{ github.actor != 'dependabot[bot]' }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup CI @@ -41,11 +43,12 @@ jobs: run: python -m pip install --upgrade build - name: Build run: python -m build + working-directory: ./openllm-python - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: binary-artefacts - path: dist/* + path: openllm-python/dist/* if-no-files-found: error binaries: name: ${{ matrix.job.target }} (${{ matrix.job.os }}) @@ -92,7 +95,7 @@ jobs: PYAPP_PIP_EXTERNAL: 'true' steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Pull latest change @@ -102,18 +105,18 @@ jobs: run: >- mkdir $PYAPP_REPO && curl -L https://github.com/ofek/pyapp/releases/download/v$PYAPP_VERSION/source.tar.gz | tar --strip-components=1 -xzf - -C $PYAPP_REPO - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v4 + uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # ratchet:actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - name: Install Hatch run: pip install -U hatch - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable + uses: dtolnay/rust-toolchain@00b49be78f40fba4e87296b2ead62868750bdd83 # ratchet:dtolnay/rust-toolchain@stable with: targets: ${{ matrix.job.target }} - name: Set up cross compiling if: matrix.job.cross - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@5265bea15b0d6367d8e293f306c880f1d0271190 # ratchet:taiki-e/install-action@v2 with: tool: cross - name: Configure cross compiling @@ -127,7 +130,7 @@ jobs: fi - name: Download Python artifacts if: ${{ !startsWith(github.event.ref, 'refs/tags') }} - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: binary-artefacts path: dist @@ -173,14 +176,14 @@ jobs: done - name: Upload staged archive if: runner.os != 'Linux' - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: staged-${{ runner.os }} path: packaging/* if-no-files-found: error - name: Upload archive if: runner.os == 'Linux' - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: standalone path: packaging/* @@ -194,9 +197,9 @@ jobs: if: ${{ github.event_name != 'pull_request' }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v4 + uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # ratchet:actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - name: Pull latest change @@ -204,7 +207,7 @@ jobs: - name: Install PyOxidizer ${{ env.PYOXIDIZER_VERSION }} run: pip install pyoxidizer==${{ env.PYOXIDIZER_VERSION }} - name: Download staged binaries - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: staged-${{ runner.os }} path: archives @@ -233,13 +236,13 @@ jobs: mkdir installers mv build/*/release/*/*.{exe,msi} installers - name: Upload binaries - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: standalone path: archives/* if-no-files-found: error - name: Upload installers - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: installers path: installers/* diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 172a4ec9..fe5ee927 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,8 +11,8 @@ on: - "main" paths: - ".github/workflows/build.yaml" - - "src/openllm/bundle/oci/Dockerfile" - - "src/openllm/**" + - "openllm-python/src/openllm/bundle/oci/Dockerfile" + - "openllm-python/src/openllm/**" env: LINES: 120 COLUMNS: 120 @@ -37,14 +37,14 @@ jobs: ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} steps: - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # ratchet:aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} aws-region: ${{ env.AWS_REGION }} - name: Start EC2 Runner id: start-ec2-runner - uses: aarnphm/ec2-github-runner@main + uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main with: mode: start github-token: ${{ secrets.OPENLLM_PAT }} @@ -68,15 +68,15 @@ jobs: id-token: write security-events: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 1 - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 + uses: rlespinasse/github-slug-action@102b1a064a9b145e56556e22b18b19c624538d94 # ratchet:rlespinasse/github-slug-action@v4.4.1 - name: Set up QEMU - uses: docker/setup-qemu-action@v2.2.0 + uses: docker/setup-qemu-action@2b82ce82d56a2a04d2637cd93a637ae1b359c0a7 # ratchet:docker/setup-qemu-action@v2.2.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2.9.1 + uses: docker/setup-buildx-action@4c0219f9ac95b02789c1075625400b2acbff50b1 # ratchet:docker/setup-buildx-action@v2.9.1 with: install: true driver-opts: | @@ -84,18 +84,18 @@ jobs: network=host - name: Install cosign if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@v3.1.1 + uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 # ratchet:sigstore/cosign-installer@v3.1.1 with: cosign-release: 'v2.1.1' - name: Login to GitHub Container Registry - uses: docker/login-action@v2.2.0 + uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # ratchet:docker/login-action@v2.2.0 if: github.event_name != 'pull_request' with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Login to public ECR - uses: docker/login-action@v2.2.0 + uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # ratchet:docker/login-action@v2.2.0 with: registry: public.ecr.aws username: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -105,7 +105,7 @@ jobs: - name: Extract metadata tags and labels on PRs if: github.event_name == 'pull_request' id: meta-pr - uses: docker/metadata-action@v4.6.0 + uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 # ratchet:docker/metadata-action@v4.6.0 with: images: | public.ecr.aws/y5w8i4y6/bentoml/openllm @@ -116,7 +116,7 @@ jobs: - name: Extract metadata tags and labels for main, release or tag if: github.event_name != 'pull_request' id: meta - uses: docker/metadata-action@v4.6.0 + uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 # ratchet:docker/metadata-action@v4.6.0 with: flavor: | latest=auto @@ -133,13 +133,13 @@ jobs: org.opencontainers.image.source="https://github.com/bentoml/OpenLLM" - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@v4 + uses: docker/build-push-action@2eb1c1961a95fc15694676618e422e8ba1d63825 # ratchet:docker/build-push-action@v4 env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} with: context: . - file: src/openllm/bundle/oci/Dockerfile + file: openllm-python/src/openllm/bundle/oci/Dockerfile push: true platforms: 'linux/amd64' build-args: | @@ -160,7 +160,7 @@ jobs: COSIGN_EXPERIMENTAL: "true" run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} - name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@559eb1224e654a86c844a795e6702a0742c60c72 # ratchet:aquasecurity/trivy-action@master if: ${{ github.event_name != 'pull_request' }} with: image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' @@ -169,7 +169,7 @@ jobs: github-pat: ${{ secrets.GITHUB_TOKEN }} scanners: 'vuln' - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@559eb1224e654a86c844a795e6702a0742c60c72 # ratchet:aquasecurity/trivy-action@master if: ${{ github.event_name != 'pull_request' }} with: image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' @@ -178,7 +178,7 @@ jobs: severity: 'CRITICAL' scanners: 'vuln' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@a09933a12a80f87b87005513f0abb1494c27a716 # ratchet:github/codeql-action/upload-sarif@v2 if: ${{ github.event_name != 'pull_request' }} with: sarif_file: 'trivy-results.sarif' @@ -192,13 +192,13 @@ jobs: if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs steps: - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # ratchet:aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} aws-region: ${{ env.AWS_REGION }} - name: Stop EC2 runner - uses: aarnphm/ec2-github-runner@main + uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main with: mode: stop github-token: ${{ secrets.OPENLLM_PAT }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c27ae8a..f86b2c91 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,7 @@ jobs: runs-on: ubuntu-latest name: code-quality steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup CI @@ -42,8 +42,8 @@ jobs: with: python-version: ${{ env.STABLE_PYTHON_VERSION }} - name: Install OpenLLM - run: pip install -e . - - uses: pre-commit/action@v3.0.0 + run: pip install -e ./openllm-python + - uses: pre-commit/action@646c83fcd040023954eafda54b4db0192ce70507 # ratchet:pre-commit/action@v3.0.0 with: extra_args: --verbose tests: @@ -58,7 +58,7 @@ jobs: - os: 'windows-latest' name: tests (${{ matrix.python-version }}.${{ matrix.os }}) steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup CI @@ -70,17 +70,18 @@ jobs: - name: Disambiguate coverage filename run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}" - name: Upload coverage data - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: coverage-data path: .coverage.* coverage: name: report-coverage runs-on: ubuntu-latest + if: false needs: - tests steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup CI @@ -88,17 +89,17 @@ jobs: with: python-version: ${{ env.STABLE_PYTHON_VERSION }} - name: Download coverage data - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: coverage-data - name: Combine coverage data run: hatch run coverage:combine - name: Export coverage reports run: | - hatch run coverage:report-xml - hatch run coverage:report-uncovered-html + hatch run coverage:report-xml openllm-python + hatch run coverage:report-uncovered-html openllm-python - name: Upload uncovered HTML report - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: uncovered-html-report path: htmlcov @@ -109,7 +110,7 @@ jobs: run: hatch run coverage:write-summary-report - name: Update coverage pull request comment if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@f6a2580ed520ae15da6076e7410b088d1c5dddd9 # ratchet:marocchino/sticky-pull-request-comment@v2 with: path: coverage-report.md cli-benchmark: @@ -118,7 +119,7 @@ jobs: env: HYPERFINE_VERSION: '1.12.0' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Install hyperfine @@ -130,7 +131,7 @@ jobs: with: python-version: ${{ env.STABLE_PYTHON_VERSION }} - name: Install self - run: pip install . + run: pip install ./openllm-python - name: Speed run: hyperfine -m 100 --warmup 10 openllm brew-dry-run: @@ -146,7 +147,6 @@ jobs: evergreen: # https://github.com/marketplace/actions/alls-green#why if: always() needs: - - coverage - tests - quality - cli-benchmark @@ -154,7 +154,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} concurrency: diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml index 7b9d7051..0cab3126 100644 --- a/.github/workflows/cleanup.yml +++ b/.github/workflows/cleanup.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 - name: Cleanup run: "gh extension install actions/gh-actions-cache\n\nREPO=${{ github.repository }}\nBRANCH=\"refs/pull/${{ github.event.pull_request.number }}/merge\"\n\necho \"Fetching list of cache key\"\ncacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )\n\n## Setting this to not fail the workflow while deleting cache keys. \nset +e\necho \"Deleting caches...\"\nfor cacheKey in $cacheKeysForPR\ndo\n gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm\ndone\necho \"Done\"\n" env: diff --git a/.github/workflows/compile-pypi.yml b/.github/workflows/compile-pypi.yml index 0aa2b985..e2e8c742 100644 --- a/.github/workflows/compile-pypi.yml +++ b/.github/workflows/compile-pypi.yml @@ -35,7 +35,7 @@ jobs: name: Pure wheels and sdist distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 ref: '${{ inputs.tags }}' @@ -43,15 +43,19 @@ jobs: uses: ./.github/actions/setup-repo - name: Build run: hatch build + working-directory: openllm-python - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: python-artefacts - path: dist/* + path: openllm-python/dist/* if-no-files-found: error mypyc: name: Compiled mypyc wheels (${{ matrix.name }}) runs-on: ${{ matrix.os }} + defaults: + run: + working-directory: ./openllm-python strategy: fail-fast: false matrix: @@ -71,22 +75,26 @@ jobs: name: macos-universal2 macos_arch: "universal2" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 ref: '${{ inputs.tags }}' - name: Setup CI uses: ./.github/actions/setup-repo with: - python-version: 3.9 + python-version: 3.8 - name: Build wheels via cibuildwheel - uses: pypa/cibuildwheel@v2.15.0 + uses: pypa/cibuildwheel@39a63b5912f086dd459cf6fcb13dcdd3fe3bc24d # ratchet:pypa/cibuildwheel@v2.15.0 + with: + package-dir: openllm-python + config-file: pyproject.toml env: + CIBW_PRERELEASE_PYTHONS: True CIBW_BEFORE_BUILD_MACOS: "rustup target add aarch64-apple-darwin" CIBW_ARCHS_MACOS: "${{ matrix.macos_arch }}" MYPYPATH: /project/typings - name: Upload wheels as workflow artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # ratchet:actions/upload-artifact@v3 with: name: ${{ matrix.name }}-mypyc-wheels path: ./wheelhouse/*.whl @@ -98,27 +106,27 @@ jobs: steps: # NOTE: Keep this section in sync with compile-pypi.yml - name: Download Python artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: python-artefacts path: dist - name: Download Linux x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: linux-x86_64-mypyc-wheels path: dist - name: Download MacOS x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-x86_64-mypyc-wheels path: dist - name: Download MacOS arm64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-arm64-mypyc-wheels path: dist - name: Download MacOS universal2 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-universal2-mypyc-wheels path: dist @@ -134,7 +142,7 @@ jobs: success: ${{ steps.everygreen.outputs.success }} steps: - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1 id: evergreen with: jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml index 276a72cf..aef5c489 100644 --- a/.github/workflows/create-releases.yml +++ b/.github/workflows/create-releases.yml @@ -30,7 +30,7 @@ jobs: outputs: version: ${{ steps.version.outputs.version }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 token: ${{ secrets.OPENLLM_PAT }} @@ -40,7 +40,7 @@ jobs: python-version: '3.11' - name: Import bot's GPG key for signing commits id: import-gpg - uses: crazy-max/ghaction-import-gpg@v5 + uses: crazy-max/ghaction-import-gpg@72b6676b71ab476b77e676928516f6982eef7a41 # ratchet:crazy-max/ghaction-import-gpg@v5 with: gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} passphrase: ${{ secrets.GPG_PASSPHRASE }} @@ -68,7 +68,7 @@ jobs: needs: - release name: Compile PyPI distribution for OpenLLM - uses: bentoml/OpenLLM/.github/workflows/compile-pypi.yml@main + uses: bentoml/OpenLLM/.github/workflows/compile-pypi.yml@2d33100d729008e322209b090f9dcbb40840b5f4 # ratchet:bentoml/OpenLLM/.github/workflows/compile-pypi.yml@main with: tags: ${{ needs.release.outputs.version }} publish-python: @@ -78,38 +78,40 @@ jobs: permissions: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: ref: '${{ needs.release.outputs.version }}' token: ${{ secrets.OPENLLM_PAT }} # NOTE: Keep this section in sync with compile-pypi.yml - name: Download Python artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: python-artefacts path: dist - name: Download Linux x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: linux-x86_64-mypyc-wheels path: dist - name: Download MacOS x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-x86_64-mypyc-wheels path: dist - name: Download MacOS arm64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-arm64-mypyc-wheels path: dist - name: Download MacOS universal2 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-universal2-mypyc-wheels path: dist + - name: Smoke test compiled artefacts + run: ls -R dist - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@b7f401de30cb6434a1e19f805ff006643653240e # ratchet:pypa/gh-action-pypi-publish@release/v1 with: print-hash: true prepare-next-dev-cycle: @@ -122,18 +124,18 @@ jobs: contents: write id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # ratchet:actions/setup-python@v4 - name: Install dependencies run: pip install hatch towncrier - name: Install jq and curl run: sudo apt-get install -y jq curl - name: Import bot's GPG key for signing commits id: import-gpg-key - uses: crazy-max/ghaction-import-gpg@v5 + uses: crazy-max/ghaction-import-gpg@72b6676b71ab476b77e676928516f6982eef7a41 # ratchet:crazy-max/ghaction-import-gpg@v5 with: gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} passphrase: ${{ secrets.GPG_PASSPHRASE }} @@ -162,16 +164,16 @@ jobs: DEV_VERSION="$VNUM1.$VNUM2.$VNUM3.dev0" echo "Bumping version to ${DEV_VERSION}..." jq --arg release_version "${DEV_VERSION}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json - pushd src/openllm-node &>/dev/null + pushd openllm-node &>/dev/null jq --arg release_version "${DEV_VERSION}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json popd &>/dev/null - git add package.json src/openllm-node/package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]" + git add package.json openllm-node/package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]" git push origin HEAD:main binary-distribution: if: github.repository_owner == 'bentoml' needs: compile-pypi name: Create binary/wheels distribution - uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main + uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@2d33100d729008e322209b090f9dcbb40840b5f4 # ratchet:bentoml/OpenLLM/.github/workflows/binary-releases.yml@main release-notes: if: github.repository_owner == 'bentoml' needs: @@ -179,7 +181,7 @@ jobs: - publish-python - binary-distribution name: Create release notes and setup for next cycle - uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@main + uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@2d33100d729008e322209b090f9dcbb40840b5f4 # ratchet:bentoml/OpenLLM/.github/workflows/release-notes.yml@main with: tags: ${{ needs.release.outputs.version }} bump-homebrew-tap: @@ -192,7 +194,7 @@ jobs: contents: write id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 - name: Setup CI @@ -203,7 +205,7 @@ jobs: run: sudo apt-get install -y jq curl - name: Import bot's GPG key for signing commits id: import-gpg-key - uses: crazy-max/ghaction-import-gpg@v5 + uses: crazy-max/ghaction-import-gpg@72b6676b71ab476b77e676928516f6982eef7a41 # ratchet:crazy-max/ghaction-import-gpg@v5 with: gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} passphrase: ${{ secrets.GPG_PASSPHRASE }} diff --git a/.github/workflows/release-notes.yml b/.github/workflows/release-notes.yml index b0e35acf..dc4a68d1 100644 --- a/.github/workflows/release-notes.yml +++ b/.github/workflows/release-notes.yml @@ -24,7 +24,7 @@ jobs: contents: write id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # ratchet:actions/checkout@v3 with: fetch-depth: 0 ref: '${{ inputs.tags }}' @@ -35,47 +35,47 @@ jobs: - name: Create release notes run: ./.github/actions/create_release_and_archive.sh ${{ inputs.tags }} - name: Download Python artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: python-artefacts path: dist - name: Download Linux x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: linux-x86_64-mypyc-wheels path: dist - name: Download MacOS x86_64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-x86_64-mypyc-wheels path: dist - name: Download MacOS arm64 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-arm64-mypyc-wheels path: dist - name: Download MacOS universal2 compiled artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: macos-universal2-mypyc-wheels path: dist - name: Download binaries - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: standalone path: archives - name: Download standalone MacOS - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: staged-macOS path: archives - name: Download installers - uses: actions/download-artifact@v3 + uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3 with: name: installers path: installers - name: Create release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # ratchet:softprops/action-gh-release@v1 with: # Use GH feature to populate the changelog automatically generate_release_notes: true diff --git a/.gitignore b/.gitignore index 45dff7a8..5496bede 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,4 @@ pyapp /target .pdm-python -/src/openllm/_version.py +/openllm-python/src/openllm/_version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 80826181..e754b4ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ ci: autoupdate_schedule: weekly - skip: [check-models-table-update, changelog-dry-run, pyright, yapf, mypy] + skip: [check-models-table-update, changelog-dry-run, pyright, yapf, mypy, sync-readme] autofix_commit_msg: "ci: auto fixes from pre-commit.ci\n\nFor more information, see https://pre-commit.ci" autoupdate_commit_msg: 'ci: pre-commit autoupdate [pre-commit.ci]' autofix_prs: false @@ -37,7 +37,7 @@ repos: - id: interrogate verbose: true types: [python] - exclude: ^(docs|tools|tests) + exclude: ^(docs|tools|openllm-python/tests) args: [--config=pyproject.toml] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.5.0 @@ -49,9 +49,11 @@ repos: examples/.*| tools/.*| tests/.*| - src/openllm/playground/.*| + openllm-python/src/openllm/playground/.*| + openllm-python/tests/.*| .github/.*| - cz.py + cz.py | + hatch_build.py )$ additional_dependencies: - click==8.1.3 @@ -72,7 +74,7 @@ repos: verbose: true exclude: | (?x)^( - tests/models/.* + openllm-python/tests/models/.* )$ - id: check-yaml args: ['--unsafe'] @@ -93,6 +95,12 @@ repos: language: script verbose: true files: README.md + - id: sync-readme + name: sync readme with python core library + entry: ./tools/sync-readme + language: script + verbose: true + files: README.md - id: changelog-dry-run name: Running changelog dry-run entry: hatch run changelog diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 48d853e5..6061e67d 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -65,24 +65,31 @@ Before you can start developing, you'll need to set up your environment: This will automatically enter a virtual environment and update the relevant dependencies. +> [!NOTE] +> If you want to install editable, make sure to install it from `openllm-python` folder + ## Project Structure Here's a high-level overview of our project structure: -``` +```prolog openllm/ -├── examples # Usage demonstration scripts -├── src -│ ├── openllm # openllm core -│ └── openllm-node # openllm nodejs library -├── tests # Automated Tests -├── tools # Utilities Script -├── typings # Typing Checking Utilities Module and Classes -├── DEVELOPMENT.md # The project's Developer Guide -├── LICENSE # Use terms and conditions -├── package.json # Node.js or JavaScript dependencies -├── pyproject.toml # Python Project Specification File (PEP 518) -└── README.md # The project's README file +├── ADDING_NEW_MODEL.md # How to add a new model +├── CHANGELOG.md # Generated changelog +├── CITATION.cff # Citation File Format +├── DEVELOPMENT.md # The project's Developer Guide +├── Formula # Homebrew Formula +├── LICENSE.md # Use terms and conditions +├── README.md # The project's README file +├── STYLE.md # The project's Style Guide +├── cz.py # code-golf commitizen +├── examples # Usage demonstration scripts +├── openllm-node # openll node library +├── openllm-python # openllm python library +│   └── src +│      └── openllm # openllm core implementation +├── pyproject.toml # Python Project Specification File (PEP 518) +└── tools # Utilities Script ``` ## Development Workflow @@ -167,13 +174,25 @@ hatch run compile ``` > [!IMPORTANT] -> This will compiled some performance sensitive modules with mypyc. The compiled `.so` or `.pyd` can be found -> under `/src/openllm`. If you run into any issue, run `hatch run recompile` +> This will compiled some performance sensitive modules with mypyc. +> The compiled `.so` or `.pyd` can be found +> under `/openllm-python/src/openllm`. If you run into any issue, run `hatch run recompile` ## Style See [STYLE.md](STYLE.md) for our style guide. +## Working with OpenLLM's CI/CD + +After you change or update any CI related under `.github`, run `./tools/lock-actions` to lock the action version. + +## Install from git archive install + +```bash +pip install 'https://github.com/bentoml/OpenLLM/archive/main.tar.gz#subdirectory=openllm-python' +``` + + ## Releasing a New Version To release a new version, use `./tools/run-release-action`. It requires `gh`, diff --git a/README.md b/README.md index adbfc459..1327dd53 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Banner for OpenLLM](/assets/main-banner.png) +![Banner for OpenLLM](/.github/assets/main-banner.png) @@ -58,7 +58,7 @@ Images or deploy as serverless endpoint via -![Gif showing OpenLLM Intro](/assets/output.gif) +![Gif showing OpenLLM Intro](/.github/assets/output.gif)
@@ -136,9 +136,9 @@ openllm start flan-t5 --model-id google/flan-t5-large ``` > [!NOTE] -> `openllm` also supports all variants of fine-tuning weights, -> custom model path as well as quantized weights for any of the supported models -> as long as it can be loaded with the model architecture. Refer to +> `openllm` also supports all variants of fine-tuning weights, custom +> model path as well as quantized weights for any of the supported models as +> long as it can be loaded with the model architecture. Refer to > [supported models](https://github.com/bentoml/OpenLLM/tree/main#-supported-models) > section for models' architecture. @@ -532,7 +532,8 @@ client.embed("I like to eat apples") ``` > [!NOTE] -> Currently, the following model family supports embeddings: Llama, T5 (Flan-T5, FastChat, etc.), ChatGLM +> Currently, the following model family supports embeddings: Llama, T5 +> (Flan-T5, FastChat, etc.), ChatGLM ## ⚙️ Integrations @@ -582,8 +583,9 @@ llm("What is the difference between a duck and a goose? And why there are so man ``` > [!IMPORTANT] -> By default, OpenLLM use `safetensors` format for saving models. If the model doesn't support safetensors, -> make sure to pass `serialisation="legacy"` to use the legacy PyTorch bin format. +> By default, OpenLLM use `safetensors` format for saving models. +> If the model doesn't support safetensors, make sure to pass +> `serialisation="legacy"` to use the legacy PyTorch bin format. `langchain.llms.OpenLLM` has the capability to interact with remote OpenLLM Server. Given there is an OpenLLM server deployed elsewhere, you can connect to @@ -638,8 +640,8 @@ agent.run("Is the following `text` positive or negative?", text="I don't like ho ``` > [!IMPORTANT] -> Only `starcoder` is currently supported with Agent integration. The -> example above was also run with four T4s on EC2 `g4dn.12xlarge` +> Only `starcoder` is currently supported with Agent integration. +> The example above was also run with four T4s on EC2 `g4dn.12xlarge` If you want to use OpenLLM client to ask questions to the running agent, you can also do so: @@ -657,7 +659,7 @@ client.ask_agent( -![Gif showing Agent integration](/assets/agent.gif) +![Gif showing Agent integration](/.github/assets/agent.gif)
@@ -751,7 +753,7 @@ OpenLLM collects usage data to enhance user experience and improve the product. We only report OpenLLM's internal API calls and ensure maximum privacy by excluding sensitive information. We will never collect user code, model data, or stack traces. For usage tracking, check out the -[code](./src/openllm/utils/analytics.py). +[code](https://github.com/bentoml/OpenLLM/blob/main/openllm-python/src/openllm/utils/analytics.py). You can opt out of usage tracking by using the `--do-not-track` CLI option: diff --git a/compile.sh b/compile.sh index c3e8a361..8b1e843a 100644 --- a/compile.sh +++ b/compile.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) -HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 MYPYPATH="${SCRIPT_DIR}/typings" python -m build -w -C--global-option=--verbose "$@" +HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 MYPYPATH="${SCRIPT_DIR}/typings" python -m build openllm-python -w -C--global-option=--verbose "$@" hatch clean diff --git a/cz.py b/cz.py index e363e732..8068b77a 100755 --- a/cz.py +++ b/cz.py @@ -1,29 +1,24 @@ #!/usr/bin/env python3 from __future__ import annotations -import itertools -import os -import token -import tokenize - +import itertools, os, token, tokenize from tabulate import tabulate TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING] - -if __name__ == "__main__": +def main() -> int: headers = ["Name", "Lines", "Tokens/Line"] table = [] - for path, _subdirs, files in os.walk(os.path.join("src", "openllm")): + for path, _, files in os.walk(os.path.join("openllm-python", "src", "openllm")): for name in files: if not name.endswith(".py"): continue filepath = os.path.join(path, name) with tokenize.open(filepath) as file_: tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST] token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens])) - table.append([filepath, line_count, token_count / line_count if line_count != 0 else 0]) - + table.append([filepath.replace(os.path.join("openllm-python","src"), ""), line_count, token_count / line_count if line_count != 0 else 0]) print(tabulate([headers, *sorted(table, key=lambda x: -x[1])], headers="firstrow", floatfmt=".1f") + "\n") - for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1]) for x in table]), key=lambda x: x[0]): - print(f"{dir_name:40s} : {sum([x[1] for x in group]):6d}") - + print(f"{dir_name:35s} : {sum([x[1] for x in group]):6d}") print(f"\ntotal line count: {sum([x[1] for x in table])}") + return 0 + +if __name__ == "__main__": raise SystemExit(main()) diff --git a/hatch.toml b/hatch.toml index 027abb15..6a75a848 100644 --- a/hatch.toml +++ b/hatch.toml @@ -5,7 +5,7 @@ content-type = "text/markdown" text = """

- Banner for OpenLLM + Banner for OpenLLM

@@ -18,7 +18,7 @@ start-after = "\n" text = """

- Gif showing OpenLLM Intro + Gif showing OpenLLM Intro

""" [[metadata.hooks.fancy-pypi-readme.fragments]] @@ -29,7 +29,7 @@ start-after = "\n" text = """

- Gif showing Agent integration + Gif showing Agent integration

""" [[metadata.hooks.fancy-pypi-readme.fragments]] @@ -55,6 +55,7 @@ text = """ """ [envs.default] dependencies = [ + "openllm[opt,chatglm,fine-tune] @ {root:uri}/openllm-python", # NOTE: To run all hooks "pre-commit", # NOTE: towncrier for changelog @@ -73,6 +74,9 @@ dependencies = [ "types-PyYAML", "types-protobuf", ] +pre-install-commands = [ + "pip install -e ./openllm-python", # +] [envs.default.scripts] changelog = "towncrier build --version main --draft" check-stubs = ["./tools/update-config-stubs.py", "./tools/update-models-import.py", "update-dummy"] @@ -92,6 +96,7 @@ typing = ["- pre-commit run mypy {args:-a}", "- pre-commit run pyright {args:-a} update-dummy = ["- ./tools/update-dummy.py", "./tools/update-dummy.py"] [envs.tests] dependencies = [ + "openllm[opt,chatglm,fine-tune] @ {root:uri}/openllm-python", # NOTE: interact with docker for container tests. "docker", # NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy @@ -108,15 +113,14 @@ dependencies = [ "hypothesis", "syrupy", ] -features = ["flan-t5", "baichuan"] skip-install = false template = "tests" [envs.tests.scripts] _run_script = "pytest --cov --cov-report={env:COVERAGE_REPORT:term-missing} --cov-config=pyproject.toml -vv" -distributed = "_run_script --reruns 5 --reruns-delay 3 --ignore tests/models -n 3 -r aR {args:tests}" -models = "_run_script -s {args:tests/models}" -python = "_run_script --reruns 5 --reruns-delay 3 --ignore tests/models -r aR {args:tests}" -snapshot-models = "_run_script -s --snapshot-update {args:tests/models}" +distributed = "_run_script --reruns 5 --reruns-delay 3 --ignore openllm-python/tests/models -n 3 -r aR {args:openllm-python/tests}" +models = "_run_script -s {args:openllm-python/tests/models}" +python = "_run_script --reruns 5 --reruns-delay 3 --ignore openllm-python/tests/models -r aR {args:openllm-python/tests}" +snapshot-models = "_run_script -s --snapshot-update {args:openllm-python/tests/models}" [envs.tests.overrides] env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT=" [envs.coverage] @@ -125,6 +129,6 @@ detached = true [envs.coverage.scripts] combine = "coverage combine {args}" generate-summary = "python tools/generate-coverage.py" -report-uncovered-html = "coverage html --skip-covered --skip-empty" -report-xml = "coverage xml" +report-uncovered-html = "coverage html --skip-covered --skip-empty {args}" +report-xml = "coverage xml {args}" write-summary-report = "python tools/write-coverage-report.py" diff --git a/hatch_build.py b/hatch_build.py new file mode 100644 index 00000000..07d2c476 --- /dev/null +++ b/hatch_build.py @@ -0,0 +1,7 @@ +from __future__ import annotations +import os, typing as t +from hatchling.metadata.plugin.interface import MetadataHookInterface + +class CustomMetadataHook(MetadataHookInterface): + def update(self, metadata: dict[str, t.Any]) -> None: + if os.environ.get("HATCH_ENV_ACTIVE", "not-dev") != "dev": metadata["dependencies"] = [f"openllm[opt,chatglm,fine-tune]=={metadata['version']}"] diff --git a/nightly-requirements-gpu.txt b/nightly-requirements-gpu.txt deleted file mode 100644 index 387d3ab8..00000000 --- a/nightly-requirements-gpu.txt +++ /dev/null @@ -1,5 +0,0 @@ -# This file is generated by `tools/dependencies.py`. # DO NOT EDIT -# For Jax, Flax, Tensorflow, PyTorch CUDA support, please refers to their official installation for your specific setup. --r nightly-requirements.txt --e .[all] -vllm @ git+https://github.com/vllm-project/vllm.git@main diff --git a/nightly-requirements.txt b/nightly-requirements.txt deleted file mode 100644 index 7ec37722..00000000 --- a/nightly-requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -# This file is generated by `tools/dependencies.py`. DO NOT EDIT --e .[playground,flan-t5] -bentoml[grpc,io] @ git+https://github.com/bentoml/bentoml.git@main -peft @ git+https://github.com/huggingface/peft.git@main -transformers[torch,tokenizers,accelerate] @ git+https://github.com/huggingface/transformers.git@main -optimum @ git+https://github.com/huggingface/optimum.git@main -accelerate @ git+https://github.com/huggingface/accelerate.git@main -bitsandbytes @ git+https://github.com/TimDettmers/bitsandbytes.git@main -trl @ git+https://github.com/lvwerra/trl.git@main diff --git a/src/openllm-node/package.json b/openllm-node/package.json similarity index 100% rename from src/openllm-node/package.json rename to openllm-node/package.json diff --git a/src/openllm-node/src/index.ts b/openllm-node/src/index.ts similarity index 100% rename from src/openllm-node/src/index.ts rename to openllm-node/src/index.ts diff --git a/src/openllm-node/tsconfig.cjs.json b/openllm-node/tsconfig.cjs.json similarity index 100% rename from src/openllm-node/tsconfig.cjs.json rename to openllm-node/tsconfig.cjs.json diff --git a/src/openllm-node/tsconfig.json b/openllm-node/tsconfig.json similarity index 100% rename from src/openllm-node/tsconfig.json rename to openllm-node/tsconfig.json diff --git a/openllm-python/.git_archival.txt b/openllm-python/.git_archival.txt new file mode 120000 index 00000000..d7a42b25 --- /dev/null +++ b/openllm-python/.git_archival.txt @@ -0,0 +1 @@ +../.git_archival.txt \ No newline at end of file diff --git a/openllm-python/LICENSE.md b/openllm-python/LICENSE.md new file mode 100644 index 00000000..7fd0f057 --- /dev/null +++ b/openllm-python/LICENSE.md @@ -0,0 +1,194 @@ +Apache License +============== + +_Version 2.0, January 2004_ +_<>_ + +### Terms and Conditions for use, reproduction, and distribution + +#### 1. Definitions + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means **(i)** the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the +outstanding shares, or **(iii)** beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +“submitted” means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +#### 2. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +#### 3. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +#### 4. Redistribution + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +* **(a)** You must give any other recipients of the Work or Derivative Works a copy of +this License; and +* **(b)** You must cause any modified files to carry prominent notices stating that You +changed the files; and +* **(c)** You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +* **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +#### 5. Submission of Contributions + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +#### 6. Trademarks + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +#### 7. Disclaimer of Warranty + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +#### 8. Limitation of Liability + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +#### 9. Accepting Warranty or Additional Liability + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +_END OF TERMS AND CONDITIONS_ + +### APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets `[]` replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same “printed page” as the copyright notice for easier identification within +third-party archives. + + Copyright 2023 Atalaya Tech Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/openllm-python/README.md b/openllm-python/README.md new file mode 100644 index 00000000..1327dd53 --- /dev/null +++ b/openllm-python/README.md @@ -0,0 +1,786 @@ +![Banner for OpenLLM](/.github/assets/main-banner.png) + + + +
+

🦾 OpenLLM

+ + pypi_status + + Twitter + + Discord + + ci + + pre-commit.ci status +
+ + python_version + + Hatch + + code style + + Ruff + + types - mypy + + types - pyright +
+

An open platform for operating large language models (LLMs) in production.
+ Fine-tune, serve, deploy, and monitor any LLMs with ease.

+ +
+ +## 📖 Introduction + +With OpenLLM, you can run inference with any open-source large-language models, +deploy to the cloud or on-premises, and build powerful AI apps. + +🚂 **State-of-the-art LLMs**: built-in supports a wide range of open-source LLMs +and model runtime, including Llama 2,StableLM, Falcon, Dolly, Flan-T5, ChatGLM, +StarCoder and more. + +🔥 **Flexible APIs**: serve LLMs over RESTful API or gRPC with one command, +query via WebUI, CLI, our Python/Javascript client, or any HTTP client. + +⛓️ **Freedom To Build**: First-class support for LangChain, BentoML and Hugging +Face that allows you to easily create your own AI apps by composing LLMs with +other models and services. + +🎯 **Streamline Deployment**: Automatically generate your LLM server Docker +Images or deploy as serverless endpoint via +[☁️ BentoCloud](https://l.bentoml.com/bento-cloud). + +🤖️ **Bring your own LLM**: Fine-tune any LLM to suit your needs with +`LLM.tuning()`. (Coming soon) + + + +![Gif showing OpenLLM Intro](/.github/assets/output.gif) + +
+ + + +## 🏃 Getting Started + +To use OpenLLM, you need to have Python 3.8 (or newer) and `pip` installed on +your system. We highly recommend using a Virtual Environment to prevent package +conflicts. + +You can install OpenLLM using pip as follows: + +```bash +pip install openllm +``` + +To verify if it's installed correctly, run: + +``` +$ openllm -h + +Usage: openllm [OPTIONS] COMMAND [ARGS]... + + ██████╗ ██████╗ ███████╗███╗ ██╗██╗ ██╗ ███╗ ███╗ + ██╔═══██╗██╔══██╗██╔════╝████╗ ██║██║ ██║ ████╗ ████║ + ██║ ██║██████╔╝█████╗ ██╔██╗ ██║██║ ██║ ██╔████╔██║ + ██║ ██║██╔═══╝ ██╔══╝ ██║╚██╗██║██║ ██║ ██║╚██╔╝██║ + ╚██████╔╝██║ ███████╗██║ ╚████║███████╗███████╗██║ ╚═╝ ██║ + ╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═══╝╚══════╝╚══════╝╚═╝ ╚═╝ + + An open platform for operating large language models in production. + Fine-tune, serve, deploy, and monitor any LLMs with ease. +``` + +### Starting an LLM Server + +To start an LLM server, use `openllm start`. For example, to start a +[`OPT`](https://huggingface.co/docs/transformers/model_doc/opt) server, do the +following: + +```bash +openllm start opt +``` + +Following this, a Web UI will be accessible at http://localhost:3000 where you +can experiment with the endpoints and sample input prompts. + +OpenLLM provides a built-in Python client, allowing you to interact with the +model. In a different terminal window or a Jupyter Notebook, create a client to +start interacting with the model: + +```python +import openllm +client = openllm.client.HTTPClient('http://localhost:3000') +client.query('Explain to me the difference between "further" and "farther"') +``` + +You can also use the `openllm query` command to query the model from the +terminal: + +```bash +export OPENLLM_ENDPOINT=http://localhost:3000 +openllm query 'Explain to me the difference between "further" and "farther"' +``` + +Visit `http://localhost:3000/docs.json` for OpenLLM's API specification. + +OpenLLM seamlessly supports many models and their variants. Users can also +specify different variants of the model to be served, by providing the +`--model-id` argument, e.g.: + +```bash +openllm start flan-t5 --model-id google/flan-t5-large +``` + +> [!NOTE] +> `openllm` also supports all variants of fine-tuning weights, custom +> model path as well as quantized weights for any of the supported models as +> long as it can be loaded with the model architecture. Refer to +> [supported models](https://github.com/bentoml/OpenLLM/tree/main#-supported-models) +> section for models' architecture. + +Use the `openllm models` command to see the list of models and their variants +supported in OpenLLM. + +## 🧩 Supported Models + +The following models are currently supported in OpenLLM. By default, OpenLLM +doesn't include dependencies to run all models. The extra model-specific +dependencies can be installed with the instructions below: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelArchitectureModel IdsInstallation
chatglmChatGLMForConditionalGeneration + + + + + +```bash +pip install "openllm[chatglm]" +``` + +
dolly-v2GPTNeoXForCausalLM + + + + + +```bash +pip install openllm +``` + +
falconFalconForCausalLM + + + + + +```bash +pip install "openllm[falcon]" +``` + +
flan-t5T5ForConditionalGeneration + + + + + +```bash +pip install "openllm[flan-t5]" +``` + +
gpt-neoxGPTNeoXForCausalLM + + + + + +```bash +pip install openllm +``` + +
llamaLlamaForCausalLM + + + + + +```bash +pip install "openllm[llama]" +``` + +
mptMPTForCausalLM + + + + + +```bash +pip install "openllm[mpt]" +``` + +
optOPTForCausalLM + + + + + +```bash +pip install "openllm[opt]" +``` + +
stablelmGPTNeoXForCausalLM + + + + + +```bash +pip install openllm +``` + +
starcoderGPTBigCodeForCausalLM + + + + + +```bash +pip install "openllm[starcoder]" +``` + +
baichuanBaiChuanForCausalLM + + + + + +```bash +pip install "openllm[baichuan]" +``` + +
+ + + +### Runtime Implementations (Experimental) + +Different LLMs may have multiple runtime implementations. For instance, they +might use Pytorch (`pt`), Tensorflow (`tf`), or Flax (`flax`). + +If you wish to specify a particular runtime for a model, you can do so by +setting the `OPENLLM_{MODEL_NAME}_FRAMEWORK={runtime}` environment variable +before running `openllm start`. + +For example, if you want to use the Tensorflow (`tf`) implementation for the +`flan-t5` model, you can use the following command: + +```bash +OPENLLM_FLAN_T5_FRAMEWORK=tf openllm start flan-t5 +``` + +> [!NOTE] +> For GPU support on Flax, refers to +> [Jax's installation](https://github.com/google/jax#pip-installation-gpu-cuda-installed-via-pip-easier) +> to make sure that you have Jax support for the corresponding CUDA version. + +### Quantisation + +OpenLLM supports quantisation with +[bitsandbytes](https://github.com/TimDettmers/bitsandbytes) and +[GPTQ](https://arxiv.org/abs/2210.17323) + +```bash +openllm start mpt --quantize int8 +``` + +To run inference with `gptq`, simply pass `--quantize gptq`: + +```bash +openllm start falcon --model-id TheBloke/falcon-40b-instruct-GPTQ --quantize gptq --device 0 +``` + +> [!NOTE] +> In order to run GPTQ, make sure to install with +> `pip install "openllm[gptq]"`. The weights of all supported models should be +> quantized before serving. See +> [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa) for more +> information on GPTQ quantisation. + +### Fine-tuning support (Experimental) + +One can serve OpenLLM models with any PEFT-compatible layers with +`--adapter-id`: + +```bash +openllm start opt --model-id facebook/opt-6.7b --adapter-id aarnphm/opt-6-7b-quotes +``` + +It also supports adapters from custom paths: + +```bash +openllm start opt --model-id facebook/opt-6.7b --adapter-id /path/to/adapters +``` + +To use multiple adapters, use the following format: + +```bash +openllm start opt --model-id facebook/opt-6.7b --adapter-id aarnphm/opt-6.7b-lora --adapter-id aarnphm/opt-6.7b-lora:french_lora +``` + +By default, the first adapter-id will be the default Lora layer, but optionally +users can change what Lora layer to use for inference via `/v1/adapters`: + +```bash +curl -X POST http://localhost:3000/v1/adapters --json '{"adapter_name": "vn_lora"}' +``` + +Note that for multiple adapter-name and adapter-id, it is recommended to update +to use the default adapter before sending the inference, to avoid any +performance degradation + +To include this into the Bento, one can also provide a `--adapter-id` into +`openllm build`: + +```bash +openllm build opt --model-id facebook/opt-6.7b --adapter-id ... +``` + +> [!NOTE] +> We will gradually roll out support for fine-tuning all models. The +> following models contain fine-tuning support: OPT, Falcon, LlaMA. + +### Integrating a New Model + +OpenLLM encourages contributions by welcoming users to incorporate their custom +LLMs into the ecosystem. Check out +[Adding a New Model Guide](https://github.com/bentoml/OpenLLM/blob/main/ADDING_NEW_MODEL.md) +to see how you can do it yourself. + +### Embeddings + +OpenLLM tentatively provides embeddings endpoint for supported models. This can +be accessed via `/v1/embeddings`. + +To use via CLI, simply call `openllm embed`: + +```bash +openllm embed --endpoint http://localhost:3000 "I like to eat apples" -o json +{ + "embeddings": [ + 0.006569798570126295, + -0.031249752268195152, + -0.008072729222476482, + 0.00847396720200777, + -0.005293501541018486, + ...... + -0.002078012563288212, + -0.00676426338031888, + -0.002022686880081892 + ], + "num_tokens": 9 +} +``` + +To invoke this endpoint, use `client.embed` from the Python SDK: + +```python +import openllm + +client = openllm.client.HTTPClient("http://localhost:3000") + +client.embed("I like to eat apples") +``` + +> [!NOTE] +> Currently, the following model family supports embeddings: Llama, T5 +> (Flan-T5, FastChat, etc.), ChatGLM + +## ⚙️ Integrations + +OpenLLM is not just a standalone product; it's a building block designed to +integrate with other powerful tools easily. We currently offer integration with +[BentoML](https://github.com/bentoml/BentoML), +[LangChain](https://github.com/hwchase17/langchain), and +[Transformers Agents](https://huggingface.co/docs/transformers/transformers_agents). + +### BentoML + +OpenLLM models can be integrated as a +[Runner](https://docs.bentoml.com/en/latest/concepts/runner.html) in your +BentoML service. These runners have a `generate` method that takes a string as a +prompt and returns a corresponding output string. This will allow you to plug +and play any OpenLLM models with your existing ML workflow. + +```python +import bentoml +import openllm + +model = "opt" + +llm_config = openllm.AutoConfig.for_model(model) +llm_runner = openllm.Runner(model, llm_config=llm_config) + +svc = bentoml.Service( + name=f"llm-opt-service", runners=[llm_runner] +) + +@svc.api(input=Text(), output=Text()) +async def prompt(input_text: str) -> str: + answer = await llm_runner.generate(input_text) + return answer +``` + +### [LangChain](https://python.langchain.com/docs/ecosystem/integrations/openllm) + +To quickly start a local LLM with `langchain`, simply do the following: + +```python +from langchain.llms import OpenLLM + +llm = OpenLLM(model_name="llama", model_id='meta-llama/Llama-2-7b-hf') + +llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?") +``` + +> [!IMPORTANT] +> By default, OpenLLM use `safetensors` format for saving models. +> If the model doesn't support safetensors, make sure to pass +> `serialisation="legacy"` to use the legacy PyTorch bin format. + +`langchain.llms.OpenLLM` has the capability to interact with remote OpenLLM +Server. Given there is an OpenLLM server deployed elsewhere, you can connect to +it by specifying its URL: + +```python +from langchain.llms import OpenLLM + +llm = OpenLLM(server_url='http://44.23.123.1:3000', server_type='grpc') +llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?") +``` + +To integrate a LangChain agent with BentoML, you can do the following: + +```python +llm = OpenLLM( + model_name='flan-t5', + model_id='google/flan-t5-large', + embedded=False, + serialisation="legacy" +) +tools = load_tools(["serpapi", "llm-math"], llm=llm) +agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION +) +svc = bentoml.Service("langchain-openllm", runners=[llm.runner]) +@svc.api(input=Text(), output=Text()) +def chat(input_text: str): + return agent.run(input_text) +``` + +> [!NOTE] +> You can find out more examples under the +> [examples](https://github.com/bentoml/OpenLLM/tree/main/examples) folder. + +### Transformers Agents + +OpenLLM seamlessly integrates with +[Transformers Agents](https://huggingface.co/docs/transformers/transformers_agents). + +> [!WARNING] +> The Transformers Agent is still at an experimental stage. It is +> recommended to install OpenLLM with `pip install -r nightly-requirements.txt` +> to get the latest API update for HuggingFace agent. + +```python +import transformers + +agent = transformers.HfAgent("http://localhost:3000/hf/agent") # URL that runs the OpenLLM server + +agent.run("Is the following `text` positive or negative?", text="I don't like how this models is generate inputs") +``` + +> [!IMPORTANT] +> Only `starcoder` is currently supported with Agent integration. +> The example above was also run with four T4s on EC2 `g4dn.12xlarge` + +If you want to use OpenLLM client to ask questions to the running agent, you can +also do so: + +```python +import openllm + +client = openllm.client.HTTPClient("http://localhost:3000") + +client.ask_agent( + task="Is the following `text` positive or negative?", + text="What are you thinking about?", +) +``` + + + +![Gif showing Agent integration](/.github/assets/agent.gif) + +
+ + + +## 🚀 Deploying to Production + +There are several ways to deploy your LLMs: + +### 🐳 Docker container + +1. **Building a Bento**: With OpenLLM, you can easily build a Bento for a + specific model, like `dolly-v2`, using the `build` command.: + + ```bash + openllm build dolly-v2 + ``` + + A + [Bento](https://docs.bentoml.com/en/latest/concepts/bento.html#what-is-a-bento), + in BentoML, is the unit of distribution. It packages your program's source + code, models, files, artefacts, and dependencies. + +2. **Containerize your Bento** + + ```bash + bentoml containerize + ``` + This generates a OCI-compatible docker image that can be deployed anywhere + docker runs. For best scalability and reliability of your LLM service in + production, we recommend deploy with BentoCloud。 + +### ☁️ BentoCloud + +Deploy OpenLLM with [BentoCloud](https://www.bentoml.com/bento-cloud/), the +serverless cloud for shipping and scaling AI applications. + +1. **Create a BentoCloud account:** [sign up here](https://bentoml.com/cloud) + for early access + +2. **Log into your BentoCloud account:** + + ```bash + bentoml cloud login --api-token --endpoint + ``` + +> [!NOTE] +> Replace `` and `` with your +> specific API token and the BentoCloud endpoint respectively. + +3. **Bulding a Bento**: With OpenLLM, you can easily build a Bento for a + specific model, such as `dolly-v2`: + + ```bash + openllm build dolly-v2 + ``` + +4. **Pushing a Bento**: Push your freshly-built Bento service to BentoCloud via + the `push` command: + + ```bash + bentoml push + ``` + +5. **Deploying a Bento**: Deploy your LLMs to BentoCloud with a single + `bentoml deployment create` command following the + [deployment instructions](https://docs.bentoml.com/en/latest/reference/cli.html#bentoml-deployment-create). + +## 👥 Community + +Engage with like-minded individuals passionate about LLMs, AI, and more on our +[Discord](https://l.bentoml.com/join-openllm-discord)! + +OpenLLM is actively maintained by the BentoML team. Feel free to reach out and +join us in our pursuit to make LLMs more accessible and easy to use 👉 +[Join our Slack community!](https://l.bentoml.com/join-slack) + +## 🎁 Contributing + +We welcome contributions! If you're interested in enhancing OpenLLM's +capabilities or have any questions, don't hesitate to reach out in our +[discord channel](https://l.bentoml.com/join-openllm-discord). + +Checkout our +[Developer Guide](https://github.com/bentoml/OpenLLM/blob/main/DEVELOPMENT.md) +if you wish to contribute to OpenLLM's codebase. + +## 🍇 Telemetry + +OpenLLM collects usage data to enhance user experience and improve the product. +We only report OpenLLM's internal API calls and ensure maximum privacy by +excluding sensitive information. We will never collect user code, model data, or +stack traces. For usage tracking, check out the +[code](https://github.com/bentoml/OpenLLM/blob/main/openllm-python/src/openllm/utils/analytics.py). + +You can opt out of usage tracking by using the `--do-not-track` CLI option: + +```bash +openllm [command] --do-not-track +``` + +Or by setting the environment variable `OPENLLM_DO_NOT_TRACK=True`: + +```bash +export OPENLLM_DO_NOT_TRACK=True +``` + +## 📔 Citation + +If you use OpenLLM in your research, we provide a [citation](./CITATION.cff) to +use: + +```bibtex +@software{Pham_OpenLLM_Operating_LLMs_2023, +author = {Pham, Aaron and Yang, Chaoyu and Sheng, Sean and Zhao, Shenyang and Lee, Sauyon and Jiang, Bo and Dong, Fog and Guan, Xipeng and Ming, Frost}, +license = {Apache-2.0}, +month = jun, +title = {{OpenLLM: Operating LLMs in production}}, +url = {https://github.com/bentoml/OpenLLM}, +year = {2023} +} +``` + + diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml new file mode 100644 index 00000000..3a41a86e --- /dev/null +++ b/openllm-python/pyproject.toml @@ -0,0 +1,207 @@ +# NOTE: PEP517 is manged via ./tools/dependencies.py +[build-system] +build-backend = "hatchling.build" +requires = [ + "hatchling==1.18.0", + "hatch-vcs==0.3.0", + "hatch-fancy-pypi-readme==23.1.0", + "hatch-mypyc==0.16.0", +] + +[project] +authors = [ + {name = "Aaron Pham",email = "aarnphm@bentoml.com"}, + {name = "BentoML Team",email = "contact@bentoml.com"}, +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU :: NVIDIA CUDA", + "Environment :: GPU :: NVIDIA CUDA :: 12", + "Environment :: GPU :: NVIDIA CUDA :: 11.8", + "Environment :: GPU :: NVIDIA CUDA :: 11.7", + "License :: OSI Approved :: Apache Software License", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Intended Audience :: System Administrators", + "Typing :: Typed", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "bentoml[grpc,io]>=1.0.25", + "transformers[torch,tokenizers,accelerate]>=4.29.0", + "safetensors", + "optimum", + "attrs>=23.1.0", + "cattrs>=23.1.0", + "orjson", + "inflection", + "tabulate[widechars]>=0.9.0", + "httpx", + "click>=8.1.3", + "typing_extensions", + "mypy_extensions", + "ghapi", + "cuda-python;platform_system!=\"Darwin\"", + "bitsandbytes<0.42", +] +description = "OpenLLM: Operating LLMs in production" +dynamic = ["version", "readme"] +keywords = [ + "MLOps", + "AI", + "BentoML", + "Model Serving", + "Model Deployment", + "LLMOps", + "Falcon", + "Vicuna", + "Llama 2", + "Fine tuning", + "Serverless", + "Large Language Model", + "Generative AI", + "StableLM", + "Alpaca", + "PyTorch", + "Transformers", +] +license = "Apache-2.0" +name = "openllm" +requires-python = ">=3.8" +[project.scripts] +openllm = "openllm.cli.entrypoint:cli" +openllm-build-base-container = "openllm.cli.extension.build_base_container:cli" +openllm-dive-bentos = "openllm.cli.extension.dive_bentos:cli" +openllm-get-containerfile = "openllm.cli.extension.get_containerfile:cli" +openllm-get-prompt = "openllm.cli.extension.get_prompt:cli" +openllm-list-bentos = "openllm.cli.extension.list_bentos:cli" +openllm-list-models = "openllm.cli.extension.list_models:cli" +openllm-playground = "openllm.cli.extension.playground:cli" + +[project.urls] +Blog = "https://modelserving.com" +Chat = "https://discord.gg/openllm" +Documentation = "https://github.com/bentoml/openllm#readme" +GitHub = "https://github.com/bentoml/OpenLLM" +History = "https://github.com/bentoml/OpenLLM/blob/main/CHANGELOG.md" +Homepage = "https://bentoml.com" +Tracker = "https://github.com/bentoml/OpenLLM/issues" +Twitter = "https://twitter.com/bentomlai" + +[project.optional-dependencies] +agents = ["transformers[agents]>=4.30", "diffusers", "soundfile"] +all = [ + "openllm[agents]", + "openllm[baichuan]", + "openllm[chatglm]", + "openllm[falcon]", + "openllm[fine-tune]", + "openllm[flan-t5]", + "openllm[ggml]", + "openllm[gptq]", + "openllm[llama]", + "openllm[mpt]", + "openllm[openai]", + "openllm[opt]", + "openllm[playground]", + "openllm[starcoder]", + "openllm[vllm]", +] +baichuan = ["cpm-kernels", "sentencepiece"] +chatglm = ["cpm-kernels", "sentencepiece"] +falcon = ["einops", "xformers"] +fine-tune = ["peft>=0.4.0", "bitsandbytes", "datasets", "accelerate", "trl"] +flan-t5 = ["flax", "jax", "jaxlib", "tensorflow", "keras"] +ggml = ["ctransformers"] +gptq = ["auto-gptq[triton]"] +llama = ["fairscale", "sentencepiece"] +mpt = ["triton", "einops"] +openai = ["openai", "tiktoken"] +opt = ["flax", "jax", "jaxlib", "tensorflow", "keras"] +playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] +starcoder = ["bitsandbytes"] +vllm = ["vllm", "ray"] + +[tool.hatch.version] +fallback-version = "0.0.0" +source = "vcs" +[tool.hatch.build.hooks.vcs] +version-file = "src/openllm/_version.py" +[tool.hatch.version.raw-options] +git_describe_command = [ + "git", + "describe", + "--dirty", + "--tags", + "--long", + "--first-parent", +] +local_scheme = "no-local-version" +root = ".." +[tool.hatch.metadata] +allow-direct-references = true +[tool.hatch.build.targets.wheel] +only-include = ["src/openllm"] +sources = ["src"] +dev-mode-dirs = ["."] +[tool.hatch.build.targets.sdist] +exclude = ["/.git_archival.txt"] +[tool.hatch.build.targets.wheel.hooks.mypyc] +dependencies = [ + "hatch-mypyc==0.16.0", + "mypy==1.4.1", + # avoid https://github.com/pallets/click/issues/2558 + "click==8.1.3", + "bentoml==1.1.1", + "transformers>=4.31.0", + "pandas-stubs", + "types-psutil", + "types-tabulate", + "types-PyYAML", + "types-protobuf", +] +enable-by-default = false +include = [ + "src/openllm/bundle", + "src/openllm/models/__init__.py", + "src/openllm/models/auto/__init__.py", + "src/openllm/utils/__init__.py", + "src/openllm/utils/codegen.py", + "src/openllm/__init__.py", + "src/openllm/_prompt.py", + "src/openllm/_schema.py", + "src/openllm/_quantisation.py", + "src/openllm/_generation.py", + "src/openllm/_strategies.py", + "src/openllm/exceptions.py", + "src/openllm/testing.py", +] +# NOTE: This is consistent with pyproject.toml +mypy-args = [ + "--strict", + # this is because all transient library doesn't have types + "--allow-subclassing-any", + "--follow-imports=skip", + "--check-untyped-defs", + "--ignore-missing-imports", + "--no-warn-return-any", + "--warn-unreachable", + "--no-warn-no-return", + "--no-warn-unused-ignores", + "--exclude='/src\\/openllm\\/playground\\/**'", + "--exclude='/src\\/openllm\\/_typing_compat\\.py$'", +] +options = { verbose = true, strip_asserts = true, debug_level = "2", opt_level = "3", include_runtime_files = true } +require-runtime-dependencies = true diff --git a/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py similarity index 98% rename from src/openllm/__init__.py rename to openllm-python/src/openllm/__init__.py index aafd18e4..e1d45aec 100644 --- a/src/openllm/__init__.py +++ b/openllm-python/src/openllm/__init__.py @@ -155,7 +155,7 @@ else: from .models.opt import TFOPT as TFOPT # NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__ -__lazy = utils.LazyModule(__name__, globals()["__file__"], _import_structure, extra_objects={"COMPILED": COMPILED}) +__lazy = utils.LazyModule(__name__, _os.path.abspath("__file__"), _import_structure, extra_objects={"COMPILED": COMPILED}) __all__ = __lazy.__all__ __dir__ = __lazy.__dir__ __getattr__ = __lazy.__getattr__ diff --git a/src/openllm/__main__.py b/openllm-python/src/openllm/__main__.py similarity index 100% rename from src/openllm/__main__.py rename to openllm-python/src/openllm/__main__.py diff --git a/src/openllm/_configuration.py b/openllm-python/src/openllm/_configuration.py similarity index 99% rename from src/openllm/_configuration.py rename to openllm-python/src/openllm/_configuration.py index 682cfcdb..22cb65c9 100644 --- a/src/openllm/_configuration.py +++ b/openllm-python/src/openllm/_configuration.py @@ -1082,7 +1082,7 @@ class LLMConfig(_ConfigAttr): def keys(self) -> list[str]: return list(self.__openllm_accepted_keys__) + list(self.__openllm_extras__) def values(self) -> list[t.Any]: return ([getattr(self, k.name) for k in attr.fields(self.__class__)] + [getattr(self.generation_config, k.name) for k in attr.fields(self.__openllm_generation_class__)] + [getattr(self.sampling_config, k.name) for k in attr.fields(self.__openllm_sampling_class__)] + list(self.__openllm_extras__.values())) def items(self) -> list[tuple[str, t.Any]]: return ([(k.name, getattr(self, k.name)) for k in attr.fields(self.__class__)] + [(k.name, getattr(self.generation_config, k.name)) for k in attr.fields(self.__openllm_generation_class__)] + [(k.name, getattr(self.sampling_config, k.name)) for k in attr.fields(self.__openllm_sampling_class__)] + list(self.__openllm_extras__.items())) - def __iter__(self) -> t.Iterable[str]: return iter(self.keys()) + def __iter__(self) -> t.Iterator[str]: return iter(self.keys()) def __contains__(self, item: t.Any) -> bool: if item in self.__openllm_extras__: return True return item in self.__openllm_accepted_keys__ @@ -1230,7 +1230,7 @@ class LLMConfig(_ConfigAttr): bentoml_cattr.register_unstructure_hook_factory(lambda cls: lenient_issubclass(cls, LLMConfig), lambda cls: make_dict_unstructure_fn(cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True)) -def structure_llm_config(data: DictStrAny, cls: type[LLMConfig]) -> LLMConfig: +def structure_llm_config(data: t.Any, cls: type[LLMConfig]) -> LLMConfig: """Structure a dictionary to a LLMConfig object. Essentially, if the given dictionary contains a 'generation_config' key, then we will @@ -1240,7 +1240,6 @@ def structure_llm_config(data: DictStrAny, cls: type[LLMConfig]) -> LLMConfig: parse the remaining keys into LLMConfig.generation_config """ if not isinstance(data, dict): raise RuntimeError(f"Expected a dictionary, but got {type(data)}") - cls_attrs = {k: v for k, v in data.items() if k in cls.__openllm_accepted_keys__} generation_cls_fields = attr.fields_dict(cls.__openllm_generation_class__) if "generation_config" in data: diff --git a/src/openllm/_generation.py b/openllm-python/src/openllm/_generation.py similarity index 98% rename from src/openllm/_generation.py rename to openllm-python/src/openllm/_generation.py index bf00d0b0..8139b0fd 100644 --- a/src/openllm/_generation.py +++ b/openllm-python/src/openllm/_generation.py @@ -1,3 +1,4 @@ +# mypy: disable-error-code="misc" from __future__ import annotations import typing as t, transformers if t.TYPE_CHECKING: import torch, openllm diff --git a/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py similarity index 99% rename from src/openllm/_llm.py rename to openllm-python/src/openllm/_llm.py index 8fd78d06..77313d89 100644 --- a/src/openllm/_llm.py +++ b/openllm-python/src/openllm/_llm.py @@ -66,7 +66,7 @@ else: torch = LazyLoader("torch", globals(), "torch") peft = LazyLoader("peft", globals(), "peft") -ResolvedAdaptersMapping = t.Dict[AdapterType, t.Dict[t.Union[str, t.Literal["default"]], t.Tuple["peft.PeftConfig", str]]] +ResolvedAdaptersMapping = t.Dict[AdapterType, t.Dict[str, t.Tuple["peft.PeftConfig", str]]] logger = logging.getLogger(__name__) class ModelSignatureDict(t.TypedDict, total=False): diff --git a/src/openllm/_prompt.py b/openllm-python/src/openllm/_prompt.py similarity index 100% rename from src/openllm/_prompt.py rename to openllm-python/src/openllm/_prompt.py diff --git a/src/openllm/_quantisation.py b/openllm-python/src/openllm/_quantisation.py similarity index 99% rename from src/openllm/_quantisation.py rename to openllm-python/src/openllm/_quantisation.py index b1123d4a..06f110f8 100644 --- a/src/openllm/_quantisation.py +++ b/openllm-python/src/openllm/_quantisation.py @@ -6,7 +6,7 @@ if sys.version_info[:2] >= (3, 11): from typing import overload else: from typing_extensions import overload if t.TYPE_CHECKING: from ._llm import LLM - from ._types import DictStrAny + from ._typing_compat import DictStrAny autogptq, torch, transformers = LazyLoader("autogptq", globals(), "auto_gptq"), LazyLoader("torch", globals(), "torch"), LazyLoader("transformers", globals(), "transformers") diff --git a/src/openllm/_schema.py b/openllm-python/src/openllm/_schema.py similarity index 100% rename from src/openllm/_schema.py rename to openllm-python/src/openllm/_schema.py diff --git a/src/openllm/_service.py b/openllm-python/src/openllm/_service.py similarity index 100% rename from src/openllm/_service.py rename to openllm-python/src/openllm/_service.py diff --git a/src/openllm/_strategies.py b/openllm-python/src/openllm/_strategies.py similarity index 98% rename from src/openllm/_strategies.py rename to openllm-python/src/openllm/_strategies.py index 7a71e9d1..e40d43c1 100644 --- a/src/openllm/_strategies.py +++ b/openllm-python/src/openllm/_strategies.py @@ -230,7 +230,11 @@ class CascadingResourceStrategy(bentoml.Strategy, ReprMixin): TODO: Support CloudTPUResource """ @classmethod - def get_worker_count(cls, runnable_class: type[bentoml.Runnable], resource_request: dict[str, t.Any] | None, workers_per_resource: int | float) -> int: + def get_worker_count(cls, runnable_class: type[bentoml.Runnable], resource_request: dict[str, t.Any] | None, workers_per_resource: float) -> int: + """Return the number of workers to be used for the given runnable class. + + Note that for all available GPU, the number of workers will always be 1. + """ if resource_request is None: resource_request = system_resources() # use NVIDIA kind = "nvidia.com/gpu" diff --git a/src/openllm/_typing_compat.py b/openllm-python/src/openllm/_typing_compat.py similarity index 100% rename from src/openllm/_typing_compat.py rename to openllm-python/src/openllm/_typing_compat.py diff --git a/src/openllm/bundle/__init__.py b/openllm-python/src/openllm/bundle/__init__.py similarity index 100% rename from src/openllm/bundle/__init__.py rename to openllm-python/src/openllm/bundle/__init__.py diff --git a/src/openllm/bundle/_package.py b/openllm-python/src/openllm/bundle/_package.py similarity index 93% rename from src/openllm/bundle/_package.py rename to openllm-python/src/openllm/bundle/_package.py index 71deb419..288cf487 100644 --- a/src/openllm/bundle/_package.py +++ b/openllm-python/src/openllm/bundle/_package.py @@ -77,7 +77,7 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d if built_wheels is not None: wheels.append(llm_fs.getsyspath(f"/{built_wheels.split('/')[-1]}")) return PythonOptions(packages=packages, wheels=wheels, lock_packages=False, extra_index_url=["https://download.pytorch.org/whl/cu118"]) -def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: int | float, quantize: LiteralString | None, bettertransformer: bool | None, adapter_map: dict[str, str | None] | None, dockerfile_template: str | None, runtime: t.Literal["ggml", "transformers"], serialisation_format: t.Literal["safetensors", "legacy"], container_registry: LiteralContainerRegistry, container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions: +def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, bettertransformer: bool | None, adapter_map: dict[str, str | None] | None, dockerfile_template: str | None, runtime: t.Literal["ggml", "transformers"], serialisation_format: t.Literal["safetensors", "legacy"], container_registry: LiteralContainerRegistry, container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions: _bentoml_config_options = os.environ.pop("BENTOML_CONFIG_OPTIONS", "") _bentoml_config_options_opts = ["tracing.sample_rate=1.0", f'runners."llm-{llm.config["start_name"]}-runner".traffic.timeout={llm.config["timeout"]}', f'api_server.traffic.timeout={llm.config["timeout"]}', f'runners."llm-{llm.config["start_name"]}-runner".traffic.timeout={llm.config["timeout"]}', f'runners."llm-{llm.config["start_name"]}-runner".workers_per_resource={workers_per_resource}'] _bentoml_config_options += " " if _bentoml_config_options else "" + " ".join(_bentoml_config_options_opts) @@ -100,7 +100,7 @@ def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_ return DockerOptions(base_image=f"{oci.CONTAINER_NAMES[container_registry]}:{oci.get_base_container_tag(container_version_strategy)}", env=env_dict, dockerfile_template=dockerfile_template) @inject -def create_bento(bento_tag: bentoml.Tag, llm_fs: FS, llm: openllm.LLM[t.Any, t.Any], workers_per_resource: str | int | float, quantize: LiteralString | None, bettertransformer: bool | None, dockerfile_template: str | None, adapter_map: dict[str, str | None] | None = None, extra_dependencies: tuple[str, ...] | None = None, +def create_bento(bento_tag: bentoml.Tag, llm_fs: FS, llm: openllm.LLM[t.Any, t.Any], workers_per_resource: str | float, quantize: LiteralString | None, bettertransformer: bool | None, dockerfile_template: str | None, adapter_map: dict[str, str | None] | None = None, extra_dependencies: tuple[str, ...] | None = None, runtime: t.Literal[ "ggml", "transformers"] = "transformers", serialisation_format: t.Literal["safetensors", "legacy"] = "safetensors", container_registry: LiteralContainerRegistry = "ecr", container_version_strategy: LiteralContainerVersionStrategy = "release", _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], _model_store: ModelStore = Provide[BentoMLContainer.model_store]) -> bentoml.Bento: framework_envvar = llm.config["env"]["framework_value"] diff --git a/src/openllm/bundle/oci/Dockerfile b/openllm-python/src/openllm/bundle/oci/Dockerfile similarity index 93% rename from src/openllm/bundle/oci/Dockerfile rename to openllm-python/src/openllm/bundle/oci/Dockerfile index 829e661c..17a8ecd0 100644 --- a/src/openllm/bundle/oci/Dockerfile +++ b/openllm-python/src/openllm/bundle/oci/Dockerfile @@ -146,8 +146,8 @@ COPY --from=flash-attn-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x8 COPY --from=auto-gptq-builder /usr/src/AutoGPTQ/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages # Install required dependencies -COPY src src -COPY hatch.toml README.md CHANGELOG.md pyproject.toml ./ +COPY openllm-python/src src +COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml ./ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ build-essential \ @@ -158,7 +158,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins rm -rf /var/lib/apt/lists/* # Install all required dependencies -RUN --mount=type=cache,target=/root/.cache/pip pip install "ray==2.6.0" "einops" "jax[cuda11_local]" "torch>=2.0.1" xformers -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,mpt,fine-tune,llama,chatglm]" -v --no-cache-dir +RUN --mount=type=cache,target=/root/.cache/pip pip install "ray==2.6.0" "einops" "jax[cuda11_local]" "torch>=2.0.1" xformers -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,mpt,fine-tune,llama,chatglm]" -v --no-cache-dir FROM base-container diff --git a/src/openllm/bundle/oci/__init__.py b/openllm-python/src/openllm/bundle/oci/__init__.py similarity index 98% rename from src/openllm/bundle/oci/__init__.py rename to openllm-python/src/openllm/bundle/oci/__init__.py index fda395f5..b2ece9dc 100644 --- a/src/openllm/bundle/oci/__init__.py +++ b/openllm-python/src/openllm/bundle/oci/__init__.py @@ -8,7 +8,7 @@ from openllm.utils.lazy import VersionInfo if t.TYPE_CHECKING: from ghapi import all - from openllm._typing_compat import RefTuple + from openllm._typing_compat import RefTuple, LiteralString all = openllm.utils.LazyLoader("all", globals(), "ghapi.all") # noqa: F811 @@ -78,7 +78,7 @@ class RefResolver: return _RefTuple((*version, "release" if _use_base_strategy else "custom")) @classmethod @functools.lru_cache(maxsize=64) - def from_strategy(cls, strategy_or_version: t.Literal["release", "nightly"] | str | None = None) -> RefResolver: + def from_strategy(cls, strategy_or_version: t.Literal["release", "nightly"] | LiteralString | None = None) -> RefResolver: # using default strategy if strategy_or_version is None or strategy_or_version == "release": return cls(*cls._release_ref()) elif strategy_or_version == "latest": return cls("latest", "0.0.0", "latest") diff --git a/src/openllm/cli/__init__.py b/openllm-python/src/openllm/cli/__init__.py similarity index 100% rename from src/openllm/cli/__init__.py rename to openllm-python/src/openllm/cli/__init__.py diff --git a/src/openllm/cli/_factory.py b/openllm-python/src/openllm/cli/_factory.py similarity index 100% rename from src/openllm/cli/_factory.py rename to openllm-python/src/openllm/cli/_factory.py diff --git a/src/openllm/cli/_sdk.py b/openllm-python/src/openllm/cli/_sdk.py similarity index 96% rename from src/openllm/cli/_sdk.py rename to openllm-python/src/openllm/cli/_sdk.py index 27ab8069..6fba8e74 100644 --- a/src/openllm/cli/_sdk.py +++ b/openllm-python/src/openllm/cli/_sdk.py @@ -77,7 +77,7 @@ def _start(model_name: str, /, *, model_id: str | None = None, timeout: int = 30 return start_command_factory(start_command if not _serve_grpc else start_grpc_command, model_name, _context_settings=termui.CONTEXT_SETTINGS, _serve_grpc=_serve_grpc).main(args=args if len(args) > 0 else None, standalone_mode=False) @inject -def _build(model_name: str, /, *, model_id: str | None = None, model_version: str | None = None, quantize: t.Literal["int8", "int4", "gptq"] | None = None, bettertransformer: bool | None = None, adapter_map: dict[str, str | None] | None = None, build_ctx: str | None = None, enable_features: tuple[str, ...] | None = None, workers_per_resource: int | float | None = None, runtime: t.Literal["ggml", "transformers"] = "transformers", dockerfile_template: str | None = None, overwrite: bool = False, container_registry: LiteralContainerRegistry | None = None, container_version_strategy: LiteralContainerVersionStrategy | None = None, push: bool = False, containerize: bool = False, serialisation_format: t.Literal["safetensors", "legacy"] = "safetensors", additional_args: list[str] | None = None, bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> bentoml.Bento: +def _build(model_name: str, /, *, model_id: str | None = None, model_version: str | None = None, quantize: t.Literal["int8", "int4", "gptq"] | None = None, bettertransformer: bool | None = None, adapter_map: dict[str, str | None] | None = None, build_ctx: str | None = None, enable_features: tuple[str, ...] | None = None, workers_per_resource: float | None = None, runtime: t.Literal["ggml", "transformers"] = "transformers", dockerfile_template: str | None = None, overwrite: bool = False, container_registry: LiteralContainerRegistry | None = None, container_version_strategy: LiteralContainerVersionStrategy | None = None, push: bool = False, containerize: bool = False, serialisation_format: t.Literal["safetensors", "legacy"] = "safetensors", additional_args: list[str] | None = None, bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> bentoml.Bento: """Package a LLM into a Bento. The LLM will be built into a BentoService with the following structure: diff --git a/src/openllm/cli/entrypoint.py b/openllm-python/src/openllm/cli/entrypoint.py similarity index 100% rename from src/openllm/cli/entrypoint.py rename to openllm-python/src/openllm/cli/entrypoint.py diff --git a/src/openllm/cli/extension/__init__.py b/openllm-python/src/openllm/cli/extension/__init__.py similarity index 100% rename from src/openllm/cli/extension/__init__.py rename to openllm-python/src/openllm/cli/extension/__init__.py diff --git a/src/openllm/cli/extension/build_base_container.py b/openllm-python/src/openllm/cli/extension/build_base_container.py similarity index 100% rename from src/openllm/cli/extension/build_base_container.py rename to openllm-python/src/openllm/cli/extension/build_base_container.py diff --git a/src/openllm/cli/extension/dive_bentos.py b/openllm-python/src/openllm/cli/extension/dive_bentos.py similarity index 100% rename from src/openllm/cli/extension/dive_bentos.py rename to openllm-python/src/openllm/cli/extension/dive_bentos.py diff --git a/src/openllm/cli/extension/get_containerfile.py b/openllm-python/src/openllm/cli/extension/get_containerfile.py similarity index 100% rename from src/openllm/cli/extension/get_containerfile.py rename to openllm-python/src/openllm/cli/extension/get_containerfile.py diff --git a/src/openllm/cli/extension/get_prompt.py b/openllm-python/src/openllm/cli/extension/get_prompt.py similarity index 100% rename from src/openllm/cli/extension/get_prompt.py rename to openllm-python/src/openllm/cli/extension/get_prompt.py diff --git a/src/openllm/cli/extension/list_bentos.py b/openllm-python/src/openllm/cli/extension/list_bentos.py similarity index 100% rename from src/openllm/cli/extension/list_bentos.py rename to openllm-python/src/openllm/cli/extension/list_bentos.py diff --git a/src/openllm/cli/extension/list_models.py b/openllm-python/src/openllm/cli/extension/list_models.py similarity index 100% rename from src/openllm/cli/extension/list_models.py rename to openllm-python/src/openllm/cli/extension/list_models.py diff --git a/src/openllm/cli/extension/playground.py b/openllm-python/src/openllm/cli/extension/playground.py similarity index 100% rename from src/openllm/cli/extension/playground.py rename to openllm-python/src/openllm/cli/extension/playground.py diff --git a/src/openllm/cli/termui.py b/openllm-python/src/openllm/cli/termui.py similarity index 100% rename from src/openllm/cli/termui.py rename to openllm-python/src/openllm/cli/termui.py diff --git a/src/openllm/client/__init__.py b/openllm-python/src/openllm/client/__init__.py similarity index 100% rename from src/openllm/client/__init__.py rename to openllm-python/src/openllm/client/__init__.py diff --git a/src/openllm/client/runtimes/__init__.py b/openllm-python/src/openllm/client/runtimes/__init__.py similarity index 100% rename from src/openllm/client/runtimes/__init__.py rename to openllm-python/src/openllm/client/runtimes/__init__.py diff --git a/src/openllm/client/runtimes/base.py b/openllm-python/src/openllm/client/runtimes/base.py similarity index 100% rename from src/openllm/client/runtimes/base.py rename to openllm-python/src/openllm/client/runtimes/base.py diff --git a/src/openllm/client/runtimes/grpc.py b/openllm-python/src/openllm/client/runtimes/grpc.py similarity index 100% rename from src/openllm/client/runtimes/grpc.py rename to openllm-python/src/openllm/client/runtimes/grpc.py diff --git a/src/openllm/client/runtimes/http.py b/openllm-python/src/openllm/client/runtimes/http.py similarity index 100% rename from src/openllm/client/runtimes/http.py rename to openllm-python/src/openllm/client/runtimes/http.py diff --git a/src/openllm/exceptions.py b/openllm-python/src/openllm/exceptions.py similarity index 100% rename from src/openllm/exceptions.py rename to openllm-python/src/openllm/exceptions.py diff --git a/src/openllm/models/__init__.py b/openllm-python/src/openllm/models/__init__.py similarity index 100% rename from src/openllm/models/__init__.py rename to openllm-python/src/openllm/models/__init__.py diff --git a/src/openllm/models/auto/__init__.py b/openllm-python/src/openllm/models/auto/__init__.py similarity index 100% rename from src/openllm/models/auto/__init__.py rename to openllm-python/src/openllm/models/auto/__init__.py diff --git a/src/openllm/models/auto/configuration_auto.py b/openllm-python/src/openllm/models/auto/configuration_auto.py similarity index 100% rename from src/openllm/models/auto/configuration_auto.py rename to openllm-python/src/openllm/models/auto/configuration_auto.py diff --git a/src/openllm/models/auto/factory.py b/openllm-python/src/openllm/models/auto/factory.py similarity index 100% rename from src/openllm/models/auto/factory.py rename to openllm-python/src/openllm/models/auto/factory.py diff --git a/src/openllm/models/auto/modeling_auto.py b/openllm-python/src/openllm/models/auto/modeling_auto.py similarity index 100% rename from src/openllm/models/auto/modeling_auto.py rename to openllm-python/src/openllm/models/auto/modeling_auto.py diff --git a/src/openllm/models/auto/modeling_flax_auto.py b/openllm-python/src/openllm/models/auto/modeling_flax_auto.py similarity index 100% rename from src/openllm/models/auto/modeling_flax_auto.py rename to openllm-python/src/openllm/models/auto/modeling_flax_auto.py diff --git a/src/openllm/models/auto/modeling_tf_auto.py b/openllm-python/src/openllm/models/auto/modeling_tf_auto.py similarity index 100% rename from src/openllm/models/auto/modeling_tf_auto.py rename to openllm-python/src/openllm/models/auto/modeling_tf_auto.py diff --git a/src/openllm/models/auto/modeling_vllm_auto.py b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py similarity index 100% rename from src/openllm/models/auto/modeling_vllm_auto.py rename to openllm-python/src/openllm/models/auto/modeling_vllm_auto.py diff --git a/src/openllm/models/baichuan/__init__.py b/openllm-python/src/openllm/models/baichuan/__init__.py similarity index 100% rename from src/openllm/models/baichuan/__init__.py rename to openllm-python/src/openllm/models/baichuan/__init__.py diff --git a/src/openllm/models/baichuan/configuration_baichuan.py b/openllm-python/src/openllm/models/baichuan/configuration_baichuan.py similarity index 100% rename from src/openllm/models/baichuan/configuration_baichuan.py rename to openllm-python/src/openllm/models/baichuan/configuration_baichuan.py diff --git a/src/openllm/models/baichuan/modeling_baichuan.py b/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py similarity index 100% rename from src/openllm/models/baichuan/modeling_baichuan.py rename to openllm-python/src/openllm/models/baichuan/modeling_baichuan.py diff --git a/src/openllm/models/baichuan/modeling_vllm_baichuan.py b/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py similarity index 100% rename from src/openllm/models/baichuan/modeling_vllm_baichuan.py rename to openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py diff --git a/src/openllm/models/chatglm/__init__.py b/openllm-python/src/openllm/models/chatglm/__init__.py similarity index 100% rename from src/openllm/models/chatglm/__init__.py rename to openllm-python/src/openllm/models/chatglm/__init__.py diff --git a/src/openllm/models/chatglm/configuration_chatglm.py b/openllm-python/src/openllm/models/chatglm/configuration_chatglm.py similarity index 100% rename from src/openllm/models/chatglm/configuration_chatglm.py rename to openllm-python/src/openllm/models/chatglm/configuration_chatglm.py diff --git a/src/openllm/models/chatglm/modeling_chatglm.py b/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py similarity index 100% rename from src/openllm/models/chatglm/modeling_chatglm.py rename to openllm-python/src/openllm/models/chatglm/modeling_chatglm.py diff --git a/src/openllm/models/dolly_v2/__init__.py b/openllm-python/src/openllm/models/dolly_v2/__init__.py similarity index 100% rename from src/openllm/models/dolly_v2/__init__.py rename to openllm-python/src/openllm/models/dolly_v2/__init__.py diff --git a/src/openllm/models/dolly_v2/configuration_dolly_v2.py b/openllm-python/src/openllm/models/dolly_v2/configuration_dolly_v2.py similarity index 100% rename from src/openllm/models/dolly_v2/configuration_dolly_v2.py rename to openllm-python/src/openllm/models/dolly_v2/configuration_dolly_v2.py diff --git a/src/openllm/models/dolly_v2/modeling_dolly_v2.py b/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py similarity index 100% rename from src/openllm/models/dolly_v2/modeling_dolly_v2.py rename to openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py diff --git a/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py b/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py similarity index 100% rename from src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py rename to openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py diff --git a/src/openllm/models/falcon/__init__.py b/openllm-python/src/openllm/models/falcon/__init__.py similarity index 100% rename from src/openllm/models/falcon/__init__.py rename to openllm-python/src/openllm/models/falcon/__init__.py diff --git a/src/openllm/models/falcon/configuration_falcon.py b/openllm-python/src/openllm/models/falcon/configuration_falcon.py similarity index 100% rename from src/openllm/models/falcon/configuration_falcon.py rename to openllm-python/src/openllm/models/falcon/configuration_falcon.py diff --git a/src/openllm/models/falcon/modeling_falcon.py b/openllm-python/src/openllm/models/falcon/modeling_falcon.py similarity index 100% rename from src/openllm/models/falcon/modeling_falcon.py rename to openllm-python/src/openllm/models/falcon/modeling_falcon.py diff --git a/src/openllm/models/flan_t5/__init__.py b/openllm-python/src/openllm/models/flan_t5/__init__.py similarity index 100% rename from src/openllm/models/flan_t5/__init__.py rename to openllm-python/src/openllm/models/flan_t5/__init__.py diff --git a/src/openllm/models/flan_t5/configuration_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/configuration_flan_t5.py similarity index 100% rename from src/openllm/models/flan_t5/configuration_flan_t5.py rename to openllm-python/src/openllm/models/flan_t5/configuration_flan_t5.py diff --git a/src/openllm/models/flan_t5/modeling_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py similarity index 100% rename from src/openllm/models/flan_t5/modeling_flan_t5.py rename to openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py diff --git a/src/openllm/models/flan_t5/modeling_flax_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py similarity index 100% rename from src/openllm/models/flan_t5/modeling_flax_flan_t5.py rename to openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py diff --git a/src/openllm/models/flan_t5/modeling_tf_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py similarity index 100% rename from src/openllm/models/flan_t5/modeling_tf_flan_t5.py rename to openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py diff --git a/src/openllm/models/gpt_neox/__init__.py b/openllm-python/src/openllm/models/gpt_neox/__init__.py similarity index 100% rename from src/openllm/models/gpt_neox/__init__.py rename to openllm-python/src/openllm/models/gpt_neox/__init__.py diff --git a/src/openllm/models/gpt_neox/configuration_gpt_neox.py b/openllm-python/src/openllm/models/gpt_neox/configuration_gpt_neox.py similarity index 100% rename from src/openllm/models/gpt_neox/configuration_gpt_neox.py rename to openllm-python/src/openllm/models/gpt_neox/configuration_gpt_neox.py diff --git a/src/openllm/models/gpt_neox/modeling_gpt_neox.py b/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py similarity index 100% rename from src/openllm/models/gpt_neox/modeling_gpt_neox.py rename to openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py diff --git a/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py b/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py similarity index 100% rename from src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py rename to openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py diff --git a/src/openllm/models/llama/__init__.py b/openllm-python/src/openllm/models/llama/__init__.py similarity index 100% rename from src/openllm/models/llama/__init__.py rename to openllm-python/src/openllm/models/llama/__init__.py diff --git a/src/openllm/models/llama/configuration_llama.py b/openllm-python/src/openllm/models/llama/configuration_llama.py similarity index 100% rename from src/openllm/models/llama/configuration_llama.py rename to openllm-python/src/openllm/models/llama/configuration_llama.py diff --git a/src/openllm/models/llama/modeling_llama.py b/openllm-python/src/openllm/models/llama/modeling_llama.py similarity index 100% rename from src/openllm/models/llama/modeling_llama.py rename to openllm-python/src/openllm/models/llama/modeling_llama.py diff --git a/src/openllm/models/llama/modeling_vllm_llama.py b/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py similarity index 100% rename from src/openllm/models/llama/modeling_vllm_llama.py rename to openllm-python/src/openllm/models/llama/modeling_vllm_llama.py diff --git a/src/openllm/models/mpt/__init__.py b/openllm-python/src/openllm/models/mpt/__init__.py similarity index 100% rename from src/openllm/models/mpt/__init__.py rename to openllm-python/src/openllm/models/mpt/__init__.py diff --git a/src/openllm/models/mpt/configuration_mpt.py b/openllm-python/src/openllm/models/mpt/configuration_mpt.py similarity index 100% rename from src/openllm/models/mpt/configuration_mpt.py rename to openllm-python/src/openllm/models/mpt/configuration_mpt.py diff --git a/src/openllm/models/mpt/modeling_mpt.py b/openllm-python/src/openllm/models/mpt/modeling_mpt.py similarity index 100% rename from src/openllm/models/mpt/modeling_mpt.py rename to openllm-python/src/openllm/models/mpt/modeling_mpt.py diff --git a/src/openllm/models/mpt/modeling_vllm_mpt.py b/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py similarity index 100% rename from src/openllm/models/mpt/modeling_vllm_mpt.py rename to openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py diff --git a/src/openllm/models/opt/__init__.py b/openllm-python/src/openllm/models/opt/__init__.py similarity index 100% rename from src/openllm/models/opt/__init__.py rename to openllm-python/src/openllm/models/opt/__init__.py diff --git a/src/openllm/models/opt/configuration_opt.py b/openllm-python/src/openllm/models/opt/configuration_opt.py similarity index 100% rename from src/openllm/models/opt/configuration_opt.py rename to openllm-python/src/openllm/models/opt/configuration_opt.py diff --git a/src/openllm/models/opt/modeling_flax_opt.py b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py similarity index 100% rename from src/openllm/models/opt/modeling_flax_opt.py rename to openllm-python/src/openllm/models/opt/modeling_flax_opt.py diff --git a/src/openllm/models/opt/modeling_opt.py b/openllm-python/src/openllm/models/opt/modeling_opt.py similarity index 100% rename from src/openllm/models/opt/modeling_opt.py rename to openllm-python/src/openllm/models/opt/modeling_opt.py diff --git a/src/openllm/models/opt/modeling_tf_opt.py b/openllm-python/src/openllm/models/opt/modeling_tf_opt.py similarity index 100% rename from src/openllm/models/opt/modeling_tf_opt.py rename to openllm-python/src/openllm/models/opt/modeling_tf_opt.py diff --git a/src/openllm/models/opt/modeling_vllm_opt.py b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py similarity index 100% rename from src/openllm/models/opt/modeling_vllm_opt.py rename to openllm-python/src/openllm/models/opt/modeling_vllm_opt.py diff --git a/src/openllm/models/stablelm/__init__.py b/openllm-python/src/openllm/models/stablelm/__init__.py similarity index 100% rename from src/openllm/models/stablelm/__init__.py rename to openllm-python/src/openllm/models/stablelm/__init__.py diff --git a/src/openllm/models/stablelm/configuration_stablelm.py b/openllm-python/src/openllm/models/stablelm/configuration_stablelm.py similarity index 100% rename from src/openllm/models/stablelm/configuration_stablelm.py rename to openllm-python/src/openllm/models/stablelm/configuration_stablelm.py diff --git a/src/openllm/models/stablelm/modeling_stablelm.py b/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py similarity index 100% rename from src/openllm/models/stablelm/modeling_stablelm.py rename to openllm-python/src/openllm/models/stablelm/modeling_stablelm.py diff --git a/src/openllm/models/stablelm/modeling_vllm_stablelm.py b/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py similarity index 100% rename from src/openllm/models/stablelm/modeling_vllm_stablelm.py rename to openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py diff --git a/src/openllm/models/starcoder/__init__.py b/openllm-python/src/openllm/models/starcoder/__init__.py similarity index 100% rename from src/openllm/models/starcoder/__init__.py rename to openllm-python/src/openllm/models/starcoder/__init__.py diff --git a/src/openllm/models/starcoder/configuration_starcoder.py b/openllm-python/src/openllm/models/starcoder/configuration_starcoder.py similarity index 100% rename from src/openllm/models/starcoder/configuration_starcoder.py rename to openllm-python/src/openllm/models/starcoder/configuration_starcoder.py diff --git a/src/openllm/models/starcoder/modeling_starcoder.py b/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py similarity index 100% rename from src/openllm/models/starcoder/modeling_starcoder.py rename to openllm-python/src/openllm/models/starcoder/modeling_starcoder.py diff --git a/src/openllm/models/starcoder/modeling_vllm_starcoder.py b/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py similarity index 100% rename from src/openllm/models/starcoder/modeling_vllm_starcoder.py rename to openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py diff --git a/src/openllm/playground/README.md b/openllm-python/src/openllm/playground/README.md similarity index 100% rename from src/openllm/playground/README.md rename to openllm-python/src/openllm/playground/README.md diff --git a/src/openllm/playground/__init__.py b/openllm-python/src/openllm/playground/__init__.py similarity index 100% rename from src/openllm/playground/__init__.py rename to openllm-python/src/openllm/playground/__init__.py diff --git a/src/openllm/playground/_meta.yml b/openllm-python/src/openllm/playground/_meta.yml similarity index 100% rename from src/openllm/playground/_meta.yml rename to openllm-python/src/openllm/playground/_meta.yml diff --git a/src/openllm/playground/falcon_tuned.py b/openllm-python/src/openllm/playground/falcon_tuned.py similarity index 100% rename from src/openllm/playground/falcon_tuned.py rename to openllm-python/src/openllm/playground/falcon_tuned.py diff --git a/src/openllm/playground/features.py b/openllm-python/src/openllm/playground/features.py similarity index 100% rename from src/openllm/playground/features.py rename to openllm-python/src/openllm/playground/features.py diff --git a/src/openllm/playground/llama2_qlora.py b/openllm-python/src/openllm/playground/llama2_qlora.py similarity index 100% rename from src/openllm/playground/llama2_qlora.py rename to openllm-python/src/openllm/playground/llama2_qlora.py diff --git a/src/openllm/playground/opt_tuned.py b/openllm-python/src/openllm/playground/opt_tuned.py similarity index 100% rename from src/openllm/playground/opt_tuned.py rename to openllm-python/src/openllm/playground/opt_tuned.py diff --git a/src/openllm/py.typed b/openllm-python/src/openllm/py.typed similarity index 100% rename from src/openllm/py.typed rename to openllm-python/src/openllm/py.typed diff --git a/src/openllm/serialisation/__init__.py b/openllm-python/src/openllm/serialisation/__init__.py similarity index 92% rename from src/openllm/serialisation/__init__.py rename to openllm-python/src/openllm/serialisation/__init__.py index 495f85c4..4adadbe2 100644 --- a/src/openllm/serialisation/__init__.py +++ b/openllm-python/src/openllm/serialisation/__init__.py @@ -26,7 +26,7 @@ from __future__ import annotations import importlib, typing as t import cloudpickle, fs, openllm from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME -from openllm._typing_compat import M, T, ParamSpec, Concatenate +from openllm._typing_compat import M, T, ParamSpec if t.TYPE_CHECKING: import bentoml @@ -62,9 +62,12 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T: else: tokenizer.add_special_tokens({"pad_token": "[PAD]"}) return tokenizer +class _Caller(t.Protocol[P]): + def __call__(self, llm: openllm.LLM[M, T], *args: P.args, **kwargs: P.kwargs) -> t.Any: ... + _extras = ["get", "import_model", "save_pretrained", "load_model"] -def _make_dispatch_function(fn: str) -> t.Callable[Concatenate[openllm.LLM[t.Any, t.Any], P], t.Any]: - def caller(llm: openllm.LLM[t.Any, t.Any], *args: P.args, **kwargs: P.kwargs) -> t.Any: +def _make_dispatch_function(fn: str) -> _Caller[P]: + def caller(llm: openllm.LLM[M, T], *args: P.args, **kwargs: P.kwargs) -> t.Any: """Generic function dispatch to correct serialisation submodules based on LLM runtime. > [!NOTE] See 'openllm.serialisation.transformers' if 'llm.runtime="transformers"' diff --git a/src/openllm/serialisation/constants.py b/openllm-python/src/openllm/serialisation/constants.py similarity index 100% rename from src/openllm/serialisation/constants.py rename to openllm-python/src/openllm/serialisation/constants.py diff --git a/src/openllm/serialisation/ggml.py b/openllm-python/src/openllm/serialisation/ggml.py similarity index 100% rename from src/openllm/serialisation/ggml.py rename to openllm-python/src/openllm/serialisation/ggml.py diff --git a/src/openllm/serialisation/transformers/__init__.py b/openllm-python/src/openllm/serialisation/transformers/__init__.py similarity index 100% rename from src/openllm/serialisation/transformers/__init__.py rename to openllm-python/src/openllm/serialisation/transformers/__init__.py diff --git a/src/openllm/serialisation/transformers/_helpers.py b/openllm-python/src/openllm/serialisation/transformers/_helpers.py similarity index 100% rename from src/openllm/serialisation/transformers/_helpers.py rename to openllm-python/src/openllm/serialisation/transformers/_helpers.py diff --git a/src/openllm/serialisation/transformers/weights.py b/openllm-python/src/openllm/serialisation/transformers/weights.py similarity index 100% rename from src/openllm/serialisation/transformers/weights.py rename to openllm-python/src/openllm/serialisation/transformers/weights.py diff --git a/src/openllm/testing.py b/openllm-python/src/openllm/testing.py similarity index 100% rename from src/openllm/testing.py rename to openllm-python/src/openllm/testing.py diff --git a/src/openllm/utils/__init__.py b/openllm-python/src/openllm/utils/__init__.py similarity index 97% rename from src/openllm/utils/__init__.py rename to openllm-python/src/openllm/utils/__init__.py index cc39040f..147a1b16 100644 --- a/src/openllm/utils/__init__.py +++ b/openllm-python/src/openllm/utils/__init__.py @@ -84,7 +84,7 @@ def non_intrusive_setattr(obj: t.Any, name: str, value: t.Any) -> None: _setattr = functools.partial(setattr, obj) if isinstance(obj, type) else _object_setattr.__get__(obj) if not hasattr(obj, name): _setattr(name, value) -def field_env_key(model_name: str, key: str, suffix: str | t.Literal[""] | None = None) -> str: return "_".join(filter(None, map(str.upper, ["OPENLLM", model_name, suffix.strip("_") if suffix else "", key]))) +def field_env_key(model_name: str, key: str, suffix: str | None = None) -> str: return "_".join(filter(None, map(str.upper, ["OPENLLM", model_name, suffix.strip("_") if suffix else "", key]))) # Special debug flag controled via OPENLLMDEVDEBUG DEBUG: bool = sys.flags.dev_mode or (not sys.flags.ignore_environment and bool(os.environ.get(DEV_DEBUG_VAR))) @@ -242,7 +242,7 @@ def normalize_attrs_to_model_tokenizer_pair(**attrs: t.Any) -> tuple[dict[str, t if k.startswith(_TOKENIZER_PREFIX): del attrs[k] return attrs, tokenizer_attrs -def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM] | type[openllm.AutoTFLLM] | type[openllm.AutoFlaxLLM] | type[openllm.AutoVLLM]: +def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]: import openllm if implementation == "tf": return openllm.AutoTFLLM elif implementation == "flax": return openllm.AutoFlaxLLM diff --git a/src/openllm/utils/analytics.py b/openllm-python/src/openllm/utils/analytics.py similarity index 100% rename from src/openllm/utils/analytics.py rename to openllm-python/src/openllm/utils/analytics.py diff --git a/src/openllm/utils/codegen.py b/openllm-python/src/openllm/utils/codegen.py similarity index 100% rename from src/openllm/utils/codegen.py rename to openllm-python/src/openllm/utils/codegen.py diff --git a/src/openllm/utils/dantic.py b/openllm-python/src/openllm/utils/dantic.py similarity index 100% rename from src/openllm/utils/dantic.py rename to openllm-python/src/openllm/utils/dantic.py diff --git a/src/openllm/utils/dummy_flax_objects.py b/openllm-python/src/openllm/utils/dummy_flax_objects.py similarity index 100% rename from src/openllm/utils/dummy_flax_objects.py rename to openllm-python/src/openllm/utils/dummy_flax_objects.py diff --git a/src/openllm/utils/dummy_pt_objects.py b/openllm-python/src/openllm/utils/dummy_pt_objects.py similarity index 100% rename from src/openllm/utils/dummy_pt_objects.py rename to openllm-python/src/openllm/utils/dummy_pt_objects.py diff --git a/src/openllm/utils/dummy_tf_objects.py b/openllm-python/src/openllm/utils/dummy_tf_objects.py similarity index 100% rename from src/openllm/utils/dummy_tf_objects.py rename to openllm-python/src/openllm/utils/dummy_tf_objects.py diff --git a/src/openllm/utils/dummy_vllm_objects.py b/openllm-python/src/openllm/utils/dummy_vllm_objects.py similarity index 100% rename from src/openllm/utils/dummy_vllm_objects.py rename to openllm-python/src/openllm/utils/dummy_vllm_objects.py diff --git a/src/openllm/utils/import_utils.py b/openllm-python/src/openllm/utils/import_utils.py similarity index 100% rename from src/openllm/utils/import_utils.py rename to openllm-python/src/openllm/utils/import_utils.py diff --git a/src/openllm/utils/lazy.py b/openllm-python/src/openllm/utils/lazy.py similarity index 99% rename from src/openllm/utils/lazy.py rename to openllm-python/src/openllm/utils/lazy.py index 4989709f..da8c46dd 100644 --- a/src/openllm/utils/lazy.py +++ b/openllm-python/src/openllm/utils/lazy.py @@ -21,7 +21,7 @@ class VersionInfo: if not isinstance(cmp, tuple): raise NotImplementedError if not (1 <= len(cmp) <= 4): raise NotImplementedError return t.cast(t.Tuple[int, int, int, str], attr.astuple(self)[:len(cmp)]), t.cast(t.Tuple[int, int, int, str], cmp) - def __eq__(self, other: object) -> bool: + def __eq__(self, other: t.Any) -> bool: try: us, them = self._ensure_tuple(other) except NotImplementedError: return NotImplemented return us == them diff --git a/src/openllm/utils/representation.py b/openllm-python/src/openllm/utils/representation.py similarity index 100% rename from src/openllm/utils/representation.py rename to openllm-python/src/openllm/utils/representation.py diff --git a/tests/__init__.py b/openllm-python/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to openllm-python/tests/__init__.py diff --git a/tests/_strategies/__init__.py b/openllm-python/tests/_strategies/__init__.py similarity index 100% rename from tests/_strategies/__init__.py rename to openllm-python/tests/_strategies/__init__.py diff --git a/tests/_strategies/_configuration.py b/openllm-python/tests/_strategies/_configuration.py similarity index 100% rename from tests/_strategies/_configuration.py rename to openllm-python/tests/_strategies/_configuration.py diff --git a/tests/compiled_test.py b/openllm-python/tests/compiled_test.py similarity index 100% rename from tests/compiled_test.py rename to openllm-python/tests/compiled_test.py diff --git a/tests/configuration_test.py b/openllm-python/tests/configuration_test.py similarity index 100% rename from tests/configuration_test.py rename to openllm-python/tests/configuration_test.py diff --git a/tests/conftest.py b/openllm-python/tests/conftest.py similarity index 100% rename from tests/conftest.py rename to openllm-python/tests/conftest.py diff --git a/tests/models/__init__.py b/openllm-python/tests/models/__init__.py similarity index 100% rename from tests/models/__init__.py rename to openllm-python/tests/models/__init__.py diff --git a/tests/models/__snapshots__/flan_t5_test/test_flan_t5[container].json b/openllm-python/tests/models/__snapshots__/flan_t5_test/test_flan_t5[container].json similarity index 100% rename from tests/models/__snapshots__/flan_t5_test/test_flan_t5[container].json rename to openllm-python/tests/models/__snapshots__/flan_t5_test/test_flan_t5[container].json diff --git a/tests/models/__snapshots__/flan_t5_test/test_flan_t5[local].json b/openllm-python/tests/models/__snapshots__/flan_t5_test/test_flan_t5[local].json similarity index 100% rename from tests/models/__snapshots__/flan_t5_test/test_flan_t5[local].json rename to openllm-python/tests/models/__snapshots__/flan_t5_test/test_flan_t5[local].json diff --git a/tests/models/__snapshots__/opt_test/test_opt_125m[container].json b/openllm-python/tests/models/__snapshots__/opt_test/test_opt_125m[container].json similarity index 100% rename from tests/models/__snapshots__/opt_test/test_opt_125m[container].json rename to openllm-python/tests/models/__snapshots__/opt_test/test_opt_125m[container].json diff --git a/tests/models/__snapshots__/opt_test/test_opt_125m[local].json b/openllm-python/tests/models/__snapshots__/opt_test/test_opt_125m[local].json similarity index 100% rename from tests/models/__snapshots__/opt_test/test_opt_125m[local].json rename to openllm-python/tests/models/__snapshots__/opt_test/test_opt_125m[local].json diff --git a/tests/models/conftest.py b/openllm-python/tests/models/conftest.py similarity index 100% rename from tests/models/conftest.py rename to openllm-python/tests/models/conftest.py diff --git a/tests/models/flan_t5_test.py b/openllm-python/tests/models/flan_t5_test.py similarity index 100% rename from tests/models/flan_t5_test.py rename to openllm-python/tests/models/flan_t5_test.py diff --git a/tests/models/opt_test.py b/openllm-python/tests/models/opt_test.py similarity index 100% rename from tests/models/opt_test.py rename to openllm-python/tests/models/opt_test.py diff --git a/tests/models_test.py b/openllm-python/tests/models_test.py similarity index 100% rename from tests/models_test.py rename to openllm-python/tests/models_test.py diff --git a/tests/package_test.py b/openllm-python/tests/package_test.py similarity index 100% rename from tests/package_test.py rename to openllm-python/tests/package_test.py diff --git a/tests/strategies_test.py b/openllm-python/tests/strategies_test.py similarity index 100% rename from tests/strategies_test.py rename to openllm-python/tests/strategies_test.py diff --git a/package.json b/package.json index 9e74db7d..8f122de0 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "node": ">=16" }, "workspaces": [ - "src/openllm-node" + "openllm-node" ], "private": true, "devDependencies": { diff --git a/pyproject.toml b/pyproject.toml index 864723b8..1d25eb75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,17 @@ -# NOTE: The following are managed by ./tools/dependencies.py -# project.classifiers, project.dependencies, project.optional-dependencies, project.urls +# NOTE: PEP517 is manged via ./tools/dependencies.py [build-system] build-backend = "hatchling.build" -requires = ["hatchling", "hatch-vcs", "hatch-fancy-pypi-readme", "hatch-mypyc==0.16.0"] - +requires = [ + "hatchling==1.18.0", + "hatch-vcs==0.3.0", + "hatch-fancy-pypi-readme==23.1.0", + "hatch-mypyc==0.16.0", +] [project] -authors = [{ name = "Aaron Pham", email = "aarnphm@bentoml.com" }] +authors = [ + { name = "Aaron Pham", email = "aarnphm@bentoml.com" }, + { name = "BentoML Team", email = "contact@bentoml.com" }, +] classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: GPU :: NVIDIA CUDA", @@ -31,26 +37,8 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = [ - "bentoml[grpc,io]>=1.0.25", - "transformers[torch,tokenizers,accelerate]>=4.29.0", - "safetensors", - "optimum", - "attrs>=23.1.0", - "cattrs>=23.1.0", - "orjson", - "inflection", - "tabulate[widechars]>=0.9.0", - "httpx", - "click>=8.1.3", - "typing_extensions", - "mypy_extensions", - "ghapi", - "cuda-python;platform_system!=\"Darwin\"", - "bitsandbytes<0.42", -] -description = 'OpenLLM: Operating LLMs in production' -dynamic = ["version", "readme"] +description = "OpenLLM: Operating LLMs in production" +dynamic = ["version", "readme", "dependencies"] keywords = [ "MLOps", "AI", @@ -58,6 +46,11 @@ keywords = [ "Model Serving", "Model Deployment", "LLMOps", + "Falcon", + "Vicuna", + "Llama 2", + "Fine tuning", + "Serverless", "Large Language Model", "Generative AI", "StableLM", @@ -68,60 +61,38 @@ keywords = [ license = "Apache-2.0" name = "openllm" requires-python = ">=3.8" - -[project.scripts] -openllm = "openllm.cli.entrypoint:cli" -openllm-build-base-container = "openllm.cli.extension.build_base_container:cli" -openllm-dive-bentos = "openllm.cli.extension.dive_bentos:cli" -openllm-get-containerfile = "openllm.cli.extension.get_containerfile:cli" -openllm-get-prompt = "openllm.cli.extension.get_prompt:cli" -openllm-list-bentos = "openllm.cli.extension.list_bentos:cli" -openllm-list-models = "openllm.cli.extension.list_models:cli" -openllm-playground = "openllm.cli.extension.playground:cli" - [project.urls] Blog = "https://modelserving.com" Chat = "https://discord.gg/openllm" Documentation = "https://github.com/bentoml/openllm#readme" -GitHub = "https://github.com/bentoml/openllm" -History = "https://github.com/bentoml/openllm/blob/main/CHANGELOG.md" +GitHub = "https://github.com/bentoml/OpenLLM" +History = "https://github.com/bentoml/OpenLLM/blob/main/CHANGELOG.md" Homepage = "https://bentoml.com" -Tracker = "https://github.com/bentoml/openllm/issues" +Tracker = "https://github.com/bentoml/OpenLLM/issues" Twitter = "https://twitter.com/bentomlai" -[project.optional-dependencies] -agents = ["transformers[agents]>=4.30", "diffusers", "soundfile"] -all = [ - "openllm[agents]", - "openllm[baichuan]", - "openllm[chatglm]", - "openllm[falcon]", - "openllm[fine-tune]", - "openllm[flan-t5]", - "openllm[ggml]", - "openllm[gptq]", - "openllm[llama]", - "openllm[mpt]", - "openllm[openai]", - "openllm[opt]", - "openllm[playground]", - "openllm[starcoder]", - "openllm[vllm]", +[tool.hatch.build] +exclude = ["*"] +dev-mode-dirs = ["openllm-python"] +[tool.hatch.build.sources] +"openllm-python/src/openllm" = "openllm" +[tool.hatch.metadata.hooks.custom] + +[tool.hatch.version] +fallback-version = "0.0.0" +source = "vcs" +[tool.hatch.version.raw-options] +git_describe_command = [ + "git", + "describe", + "--dirty", + "--tags", + "--long", + "--first-parent", ] -baichuan = ["cpm-kernels", "sentencepiece"] -chatglm = ["cpm-kernels", "sentencepiece"] -falcon = ["einops", "xformers"] -fine-tune = ["peft>=0.4.0", "bitsandbytes", "datasets", "accelerate", "trl"] -flan-t5 = ["flax", "jax", "jaxlib", "tensorflow", "keras"] -ggml = ["ctransformers"] -gptq = ["auto-gptq[triton]"] -llama = ["fairscale", "sentencepiece"] -mpt = ["triton", "einops"] -openai = ["openai", "tiktoken"] -opt = ["flax", "jax", "jaxlib", "tensorflow", "keras"] -playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] -starcoder = ["bitsandbytes"] -vllm = ["vllm", "ray"] +local_scheme = "no-local-version" +[tool.hatch.metadata] +allow-direct-references = true [tool.cibuildwheel] build-verbosity = 1 @@ -187,20 +158,15 @@ whitelist-regex = ["test_.*"] [tool.check-wheel-contents] toplevel = ["openllm"] -[tool.pytest.ini_options] -addopts = ["-rfEX", "-pno:warnings", "--snapshot-warn-unused"] -python_files = ["test_*.py", "*_test.py"] -testpaths = ["tests"] - [tool.ruff] extend-exclude = [ "tools", "examples", - "src/openllm/playground", - "src/openllm/__init__.py", - "src/openllm/_version.py", - "src/openllm/utils/dummy_*.py", - "src/openllm/models/__init__.py", + "openllm-python/src/openllm/playground", + "openllm-python/src/openllm/__init__.py", + "openllm-python/src/openllm/_version.py", + "openllm-python/src/openllm/utils/dummy_*.py", + "openllm-python/src/openllm/models/__init__.py", ] extend-include = ["*.ipynb"] extend-select = [ @@ -216,7 +182,6 @@ extend-select = [ "PLW", # pylint-warning "PLR", # pylint-refactor "PT", # flake8-pytest-style - "PYI", # flake8-pyi "PERF", # perflint "FLY", # flynt "RUF", # Ruff-specific rules @@ -239,7 +204,6 @@ ignore = [ "E401", # ignore multiple line import "E702", "I001", # unsorted imports - "PYI021", # ignore docstring in stubs, as pyright will include docstring in stubs. "D103", # Just missing docstring for magic methods. "D102", "D101", @@ -255,7 +219,7 @@ target-version = "py38" typing-modules = ["openllm._typing_compat"] unfixable = ["TCH004"] [tool.ruff.flake8-type-checking] -exempt-modules = ["typing", "typing_extensions", "."] +exempt-modules = ["typing", "typing_extensions", "openllm._typing_compat"] runtime-evaluated-base-classes = [ "pydantic.BaseModel", "openllm._configuration.LLMConfig", @@ -272,7 +236,14 @@ combine-as-imports = true force-single-line = false force-wrap-aliases = true known-first-party = ["openllm", "bentoml"] -known-third-party = ["transformers", "click", "huggingface_hub", "torch", "vllm", "auto_gptq"] +known-third-party = [ + "transformers", + "click", + "huggingface_hub", + "torch", + "vllm", + "auto_gptq", +] lines-after-imports = 0 lines-between-types = 0 no-lines-before = ["future", "standard-library"] @@ -281,19 +252,14 @@ required-imports = ["from __future__ import annotations"] [tool.ruff.flake8-quotes] avoid-escape = false [tool.ruff.extend-per-file-ignores] -"src/openllm/_service.py" = ["E401"] -"src/openllm/cli/entrypoint.py" = ["D301"] -"src/openllm/client/runtimes/*" = ["D107"] -"src/openllm/models/**" = ["E", "D", "F"] -"src/openllm/utils/import_utils.py" = ["PLW0603"] -"tests/**/*" = [ - "S101", - "TID252", - "D", # No docstring in tests - "PT011", # ignore too broad raises, as it can be use pytest.raises().match() - "S307", # Ignore eval(compile) as it is a known script execution -] -"typings/**" = ["D", "F", "E", "PYI002"] +"openllm-python/src/openllm/_service.py" = ["E401"] +"openllm-python/src/openllm/cli/entrypoint.py" = ["D301"] +"openllm-python/src/openllm/client/runtimes/*" = ["D107"] +"openllm-python/src/openllm/models/**" = ["E", "D", "F"] +"openllm-python/src/openllm/utils/import_utils.py" = ["PLW0603"] +"openllm-python/src/openllm/_configuration.py" = ["F811"] +"openllm-python/tests/**/*" = ["S101", "TID252", "D", "PT011", "S307"] +"typings/**" = ["D", "F", "E"] [tool.yapf] ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT = true @@ -338,18 +304,23 @@ SPLIT_PENALTY_BEFORE_IF_EXPR = 10000 SPLIT_PENALTY_COMPREHENSION = 3000 SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 8000 +[tool.pytest.ini_options] +addopts = ["-rfEX", "-pno:warnings", "--snapshot-warn-unused"] +python_files = ["test_*.py", "*_test.py"] +testpaths = ["openllm-python/tests"] + [tool.coverage.paths] openllm = ["src/openllm", "*/openllm/src/openllm"] [tool.coverage.run] branch = true omit = [ "__pypackages__/*", - "src/openllm/_version.py", - "src/openllm/playground/", - "src/openllm/__init__.py", - "src/openllm/__main__.py", - "src/openllm/utils/dummy_*.py", - "src/openllm/_typing_compat.py", + "openllm-python/src/openllm/_version.py", + "openllm-python/src/openllm/playground/", + "openllm-python/src/openllm/__init__.py", + "openllm-python/src/openllm/__main__.py", + "openllm-python/src/openllm/utils/dummy_*.py", + "openllm-python/src/openllm/_typing_compat.py", ] source_pkgs = ["openllm"] [tool.coverage.report] @@ -375,32 +346,35 @@ exclude_lines = [ ] omit = [ "__pypackages__/*", - "src/openllm/_version.py", - "src/openllm/playground/", - "src/openllm/__init__.py", - "src/openllm/__main__.py", - "src/openllm/utils/dummy_*.py", - "src/openllm/_typing_compat.py", + "openllm-python/src/openllm/_version.py", + "openllm-python/src/openllm/playground/", + "openllm-python/src/openllm/__init__.py", + "openllm-python/src/openllm/__main__.py", + "openllm-python/src/openllm/utils/dummy_*.py", + "openllm-python/src/openllm/_typing_compat.py", ] precision = 2 show_missing = true +# NOTE: The following strict type checker we are using: +# - mypy: for both runtime and static type checking (since we compile the wheels ) +# - pyright: for static type checking only, as we want to use newer type feature [tool.pyright] analysis.useLibraryCodeForTypes = true exclude = [ "__pypackages__/*", - "src/openllm/playground/", - "src/openllm/models/", - "src/openllm/__init__.py", - "src/openllm/__main__.py", - "src/openllm/utils/dummy_*.py", - "src/openllm/_typing_compat.py", + "openllm-python/src/openllm/playground/", + "openllm-python/src/openllm/models/", + "openllm-python/src/openllm/__init__.py", + "openllm-python/src/openllm/__main__.py", + "openllm-python/src/openllm/utils/dummy_*.py", + "openllm-python/src/openllm/_typing_compat.py", + "openllm-python/tests", "tools", "examples", - "tests", ] -include = ["src/openllm"] -pythonVersion = "3.8" +include = ["openllm-python/src/openllm"] +pythonVersion = "3.12" reportMissingImports = "warning" reportMissingTypeStubs = false reportPrivateUsage = "warning" @@ -413,16 +387,21 @@ typeCheckingMode = "strict" [tool.mypy] exclude = [ - "src/openllm/playground/", - "src/openllm/utils/dummy_*.py", - "src/openllm/models", - "src/openllm/_typing_compat.py", + "examples", + "tools", + "cz.py", + "openllm-python/tests", + "openllm-python/src/openllm/playground", + "openllm-python/src/openllm/utils/dummy_*.py", + "openllm-python/src/openllm/models", + "openllm-python/src/openllm/_typing_compat.py", ] modules = ["openllm"] mypy_path = "typings" pretty = true python_version = "3.8" show_error_codes = true +strict = true warn_no_return = false warn_return_any = false warn_unreachable = true @@ -452,67 +431,3 @@ module = [ [[tool.mypy.overrides]] ignore_errors = true module = ["openllm.models.*", "openllm.playground.*", "openllm._typing_compat"] - -[tool.hatch.version] -fallback-version = "0.0.0" -source = "vcs" -[tool.hatch.build.hooks.vcs] -version-file = "src/openllm/_version.py" -[tool.hatch.version.raw-options] -git_describe_command = ["git", "describe", "--dirty", "--tags", "--long", "--first-parent"] -local_scheme = "no-local-version" -[tool.hatch.metadata] -allow-direct-references = true -[tool.hatch.build.targets.wheel] -exclude = ["/src/openllm-node"] -only-include = ["src"] -sources = ["src"] -[tool.hatch.build.targets.sdist] -exclude = ["/.github", "/typings", "/examples", "/assets", "/changelog.d", "/tools", ".git"] -[tool.hatch.build.targets.wheel.hooks.mypyc] -dependencies = [ - "hatch-mypyc==0.16.0", - "mypy==1.4.1", - # avoid https://github.com/pallets/click/issues/2558 - "click==8.1.3", - "bentoml==1.1.1", - "transformers>=4.31.0", - "pandas-stubs", - "types-psutil", - "types-tabulate", - "types-PyYAML", - "types-protobuf", -] -enable-by-default = false -include = [ - "src/openllm/bundle", - "src/openllm/models/__init__.py", - "src/openllm/models/auto/__init__.py", - "src/openllm/utils/__init__.py", - "src/openllm/utils/codegen.py", - "src/openllm/__init__.py", - "src/openllm/_prompt.py", - "src/openllm/_schema.py", - "src/openllm/_quantisation.py", - "src/openllm/_generation.py", - "src/openllm/_strategies.py", - "src/openllm/exceptions.py", - "src/openllm/testing.py", -] -# NOTE: This is consistent with pyproject.toml -mypy-args = [ - "--strict", - # this is because all transient library doesn't have types - "--allow-subclassing-any", - "--follow-imports=skip", - "--check-untyped-defs", - "--ignore-missing-imports", - "--no-warn-return-any", - "--warn-unreachable", - "--no-warn-no-return", - "--no-warn-unused-ignores", - "--exclude='/src\\/openllm\\/playground\\/**'", - "--exclude='/src\\/openllm\\/_typing_compat\\.py$'", -] -options = { verbose = true, strip_asserts = true, debug_level = "2", opt_level = "3", include_runtime_files = true } -require-runtime-dependencies = true diff --git a/taplo.toml b/taplo.toml deleted file mode 100644 index 994b031b..00000000 --- a/taplo.toml +++ /dev/null @@ -1,7 +0,0 @@ -include = ["*.toml"] - -[formatting] -align_entries = false -column_width = 120 -indent_string = " " -reorder_keys = true diff --git a/tools/assert-model-table-latest b/tools/assert-model-table-latest index c481c1fa..c0221136 100755 --- a/tools/assert-model-table-latest +++ b/tools/assert-model-table-latest @@ -1,25 +1,21 @@ #!/usr/bin/env python3 - from __future__ import annotations -import os -import subprocess -import sys - +import os, subprocess, sys from markdown_it import MarkdownIt md = MarkdownIt() ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -with open(os.path.join(ROOT, "README.md"), "r") as f: - readme = md.parse(f.read()) +with open(os.path.join(ROOT, "README.md"), "r") as f: readme = md.parse(f.read()) +sys.path.insert(0, os.path.join(ROOT,"openllm-python","src")) +import openllm # NOTE: Currently, we only have one table in README, which is the Model readme. table = [r for r in readme if r.type == "html_block" and r.content.startswith(" dict[int, str]: - return {v: status for v, status in zip(range(1, 8), ["1 - Planning", "2 - Pre-Alpha", "3 - Alpha", "4 - Beta", "5 - Production/Stable", "6 - Mature", "7 - Inactive",],)} - + def status() -> dict[int, str]: return {v: status for v, status in zip(range(1, 8), ["1 - Planning", "2 - Pre-Alpha", "3 - Alpha", "4 - Beta", "5 - Production/Stable", "6 - Mature", "7 - Inactive",],)} @staticmethod - def apache() -> str: - return Classifier.create_classifier("license", "OSI Approved", "Apache Software License") - + def apache() -> str: return Classifier.create_classifier("license", "OSI Approved", "Apache Software License") @staticmethod def create_classifier(identifier: str, *decls: t.Any) -> str: cls_ = Classifier() if identifier not in cls_.identifier: raise ValueError(f"{identifier} is not yet supported (supported alias: {Classifier.identifier})") return cls_.joiner.join([cls_.identifier[identifier], *decls]) - @staticmethod def create_python_classifier(implementation: list[str] | None = None, supported_version: list[str] | None = None) -> list[str]: - if supported_version is None: - supported_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] - if implementation is None: - implementation = ["CPython", "PyPy"] + if supported_version is None: supported_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] + if implementation is None: implementation = ["CPython", "PyPy"] base = [Classifier.create_classifier("language", "Python"), Classifier.create_classifier("language", "Python", "3"),] base.append(Classifier.create_classifier("language", "Python", "3", "Only")) base.extend([Classifier.create_classifier("language", "Python", version) for version in supported_version]) base.extend([Classifier.create_classifier("language", "Python", "Implementation", impl) for impl in implementation]) return base - @staticmethod - def create_status_classifier(level: int) -> str: - return Classifier.create_classifier("status", Classifier.status()[level]) + def create_status_classifier(level: int) -> str: return Classifier.create_classifier("status", Classifier.status()[level]) @dataclasses.dataclass(frozen=True) class Dependencies: @@ -62,47 +49,26 @@ class Dependencies: lower_constraint: t.Optional[str] = None upper_constraint: t.Optional[str] = None platform: t.Optional[t.Tuple[t.Literal["Linux", "Windows", "Darwin"], t.Literal["eq", "ne"]]] = None - - def with_options(self, **kwargs: t.Any) -> Dependencies: - return dataclasses.replace(self, **kwargs) - + def with_options(self, **kwargs: t.Any) -> Dependencies: return dataclasses.replace(self, **kwargs) @property - def has_constraint(self) -> bool: - return self.lower_constraint is not None or self.upper_constraint is not None - + def has_constraint(self) -> bool: return self.lower_constraint is not None or self.upper_constraint is not None @property - def pypi_extensions(self) -> str: - return "" if self.extensions is None else f"[{','.join(self.extensions)}]" - + def pypi_extensions(self) -> str: return "" if self.extensions is None else f"[{','.join(self.extensions)}]" @staticmethod - def platform_restriction(platform: t.LiteralString, op: t.Literal["eq", "ne"] = "eq") -> str: - return f'platform_system{"==" if op == "eq" else "!="}"{platform}"' - + def platform_restriction(platform: t.LiteralString, op: t.Literal["eq", "ne"] = "eq") -> str: return f'platform_system{"==" if op == "eq" else "!="}"{platform}"' def to_str(self) -> str: deps: list[str] = [] - if self.lower_constraint is not None and self.upper_constraint is not None: - dep = f"{self.name}{self.pypi_extensions}>={self.lower_constraint},<{self.upper_constraint}" - elif self.lower_constraint is not None: - dep = f"{self.name}{self.pypi_extensions}>={self.lower_constraint}" - elif self.upper_constraint is not None: - dep = f"{self.name}{self.pypi_extensions}<{self.upper_constraint}" - elif self.subdirectory is not None: - dep = f"{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git#subdirectory={self.subdirectory}" - elif self.branch is not None: - dep = f"{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git@{self.branch}" - else: - dep = f"{self.name}{self.pypi_extensions}" - + if self.lower_constraint is not None and self.upper_constraint is not None: dep = f"{self.name}{self.pypi_extensions}>={self.lower_constraint},<{self.upper_constraint}" + elif self.lower_constraint is not None: dep = f"{self.name}{self.pypi_extensions}>={self.lower_constraint}" + elif self.upper_constraint is not None: dep = f"{self.name}{self.pypi_extensions}<{self.upper_constraint}" + elif self.subdirectory is not None: dep = f"{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git#subdirectory={self.subdirectory}" + elif self.branch is not None: dep = f"{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git@{self.branch}" + else: dep = f"{self.name}{self.pypi_extensions}" deps.append(dep) - - if self.platform: - deps.append(self.platform_restriction(*self.platform)) - + if self.platform: deps.append(self.platform_restriction(*self.platform)) return ";".join(deps) - @classmethod - def from_tuple(cls, *decls: t.Any) -> Dependencies: - return cls(*decls) + def from_tuple(cls, *decls: t.Any) -> Dependencies: return cls(*decls) _BENTOML_EXT = ["grpc", "io"] _TRANSFORMERS_EXT = ["torch", "tokenizers", "accelerate"] @@ -126,11 +92,6 @@ _BASE_DEPENDENCIES = [ Dependencies(name="bitsandbytes", upper_constraint="0.42"), # 0.41 works with CUDA 11.8 ] -_NIGHTLY_MAPPING: dict[str, Dependencies] = { - "bentoml": Dependencies.from_tuple("bentoml", "bentoml/bentoml", "main", _BENTOML_EXT), "peft": Dependencies.from_tuple("peft", "huggingface/peft", "main", None), "transformers": Dependencies.from_tuple("transformers", "huggingface/transformers", "main", _TRANSFORMERS_EXT), "optimum": Dependencies.from_tuple("optimum", "huggingface/optimum", "main", None), - "accelerate": Dependencies.from_tuple("accelerate", "huggingface/accelerate", "main", None), "bitsandbytes": Dependencies.from_tuple("bitsandbytes", "TimDettmers/bitsandbytes", "main", None), "trl": Dependencies.from_tuple("trl", "lvwerra/trl", "main", None), "vllm": Dependencies.from_tuple("vllm", "vllm-project/vllm", "main", None, None, True, None), -} - _ALL_RUNTIME_DEPS = ["flax", "jax", "jaxlib", "tensorflow", "keras"] FINE_TUNE_DEPS = ["peft>=0.4.0", "bitsandbytes", "datasets", "accelerate", "trl"] FLAN_T5_DEPS = _ALL_RUNTIME_DEPS @@ -185,48 +146,89 @@ def create_optional_table() -> Table: return table -def create_url_table() -> Table: +def create_url_table(_info: t.Any) -> Table: table = tomlkit.table() _urls = { - "Blog": "https://modelserving.com", "Chat": "https://discord.gg/openllm", "Documentation": "https://github.com/bentoml/openllm#readme", "GitHub": "https://github.com/bentoml/openllm", "History": "https://github.com/bentoml/openllm/blob/main/CHANGELOG.md", "Homepage": "https://bentoml.com", "Tracker": "https://github.com/bentoml/openllm/issues", + "Blog": "https://modelserving.com", "Chat": "https://discord.gg/openllm", "Documentation": "https://github.com/bentoml/openllm#readme", + "GitHub": _info.html_url, + "History": f"{_info.html_url}/blob/main/CHANGELOG.md", + "Homepage": _info.homepage, + "Tracker": f"{_info.html_url}/issues", "Twitter": "https://twitter.com/bentomlai", } table.update({k: v for k, v in sorted(_urls.items())}) return table +def build_system() -> Table: + table = tomlkit.table() + table.add("build-backend", "hatchling.build") + requires_array = tomlkit.array() + requires_array.extend(["hatchling==1.18.0", "hatch-vcs==0.3.0", "hatch-fancy-pypi-readme==23.1.0", "hatch-mypyc==0.16.0"]) + table.add("requires", requires_array.multiline(True)) + return table + +def authors() -> Array: + arr = tomlkit.array() + arr.append(dict(name="Aaron Pham", email="aarnphm@bentoml.com")) + arr.append(dict(name="BentoML Team", email="contact@bentoml.com")) + return arr.multiline(True) + +def keywords() -> Array: + arr = tomlkit.array() + arr.extend([ + "MLOps", + "AI", + "BentoML", + "Model Serving", + "Model Deployment", + "LLMOps", + "Falcon", + "Vicuna", + "Llama 2", + "Fine tuning", + "Serverless", + "Large Language Model", + "Generative AI", + "StableLM", + "Alpaca", + "PyTorch", + "Transformers"]) + return arr + def build_cli_extensions() -> Table: table = tomlkit.table() ext: dict[str, str] = {"openllm": "openllm.cli.entrypoint:cli"} - ext.update({f"openllm-{inflection.dasherize(ke)}": f"openllm.cli.extension.{ke}:cli" for ke in sorted([fname[:-3] for fname in os.listdir(os.path.abspath(os.path.join(ROOT, "src", "openllm", "cli", "extension"))) if fname.endswith(".py") and not fname.startswith("__")])}) + ext.update({f"openllm-{inflection.dasherize(ke)}": f"openllm.cli.extension.{ke}:cli" for ke in sorted([fname[:-3] for fname in os.listdir(os.path.abspath(os.path.join(ROOT, "openllm-python", "src", "openllm", "cli", "extension"))) if fname.endswith(".py") and not fname.startswith("__")])}) table.update(ext) return table def main() -> int: - with open(os.path.join(ROOT, "pyproject.toml"), "r") as f: - pyproject = tomlkit.parse(f.read()) + api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False) + _info = api.repos.get() + with open(os.path.join(ROOT, "openllm-python", "pyproject.toml"), "r") as f: pyproject = tomlkit.parse(f.read()) dependencies_array = tomlkit.array() dependencies_array.extend([v.to_str() for v in _BASE_DEPENDENCIES]) + # dynamic field + dyn_arr = tomlkit.array() + dyn_arr.extend(["version", "readme"]) - pyproject["project"]["urls"] = create_url_table() - pyproject["project"]["scripts"] = build_cli_extensions() + pyproject["build-system"] = build_system() + pyproject["project"]["authors"] = authors() pyproject["project"]["classifiers"] = create_classifiers() - pyproject["project"]["optional-dependencies"] = create_optional_table() pyproject["project"]["dependencies"] = dependencies_array.multiline(True) + pyproject["project"]["description"] = f"{_info.name}: {_info.description}" + pyproject["project"]["dynamic"] = dyn_arr + pyproject["project"]["keywords"] = keywords().multiline(True) + pyproject["project"]["license"] = _info.license.spdx_id + pyproject["project"]["name"] = f"{_info.name.lower()}" + pyproject["project"]["requires-python"] = ">=3.8" - with open(os.path.join(ROOT, "pyproject.toml"), "w") as f: - f.write(tomlkit.dumps(pyproject)) - - with open(os.path.join(ROOT, "nightly-requirements.txt"), "w") as f: - f.write(f"# This file is generated by `{fname}`. DO NOT EDIT\n-e .[playground,flan-t5]\n") - f.writelines([f"{v.to_str()}\n" for v in _NIGHTLY_MAPPING.values() if not v.requires_gpu]) - with open(os.path.join(ROOT, "nightly-requirements-gpu.txt"), "w") as f: - f.write(f"# This file is generated by `{fname}`. # DO NOT EDIT\n") - f.write("# For Jax, Flax, Tensorflow, PyTorch CUDA support, please refers to their official installation for your specific setup.\n") - f.write("-r nightly-requirements.txt\n-e .[all]\n") - f.writelines([f"{v.to_str()}\n" for v in _NIGHTLY_MAPPING.values() if v.requires_gpu]) + pyproject["project"]["urls"] = create_url_table(_info) + pyproject["project"]["scripts"] = build_cli_extensions() + pyproject["project"]["optional-dependencies"] = create_optional_table() + with open(os.path.join(ROOT, "openllm-python", "pyproject.toml"), "w") as f: f.write(tomlkit.dumps(pyproject)) return 0 -if __name__ == "__main__": - raise SystemExit(main()) +if __name__ == "__main__": raise SystemExit(main()) diff --git a/tools/generate-coverage.py b/tools/generate-coverage.py index 12196f3f..65132443 100755 --- a/tools/generate-coverage.py +++ b/tools/generate-coverage.py @@ -8,10 +8,10 @@ from lxml import etree ROOT = Path(__file__).resolve().parent.parent -PACKAGES = {"src/openllm/": "openllm"} +PACKAGES = {"openllm-python/src/openllm/": "openllm"} def main() -> int: - coverage_report = ROOT / "coverage.xml" + coverage_report = ROOT/"coverage.xml" root = etree.fromstring(coverage_report.read_text()) raw_package_data: defaultdict[str, dict[str, int]] = defaultdict(lambda: {"hits": 0, "misses": 0}) @@ -27,10 +27,8 @@ def main() -> int: raise ValueError(message) for line in module.find("lines"): - if line.attrib["hits"] == "1": - data["hits"] += 1 - else: - data["misses"] += 1 + if line.attrib["hits"] == "1": data["hits"] += 1 + else: data["misses"] += 1 total_statements_covered = 0 total_statements = 0 @@ -40,14 +38,11 @@ def main() -> int: statements = statements_covered + data["misses"] total_statements_covered += statements_covered total_statements += statements - coverage_data[package_name] = {"statements_covered": statements_covered, "statements": statements} coverage_data["total"] = {"statements_covered": total_statements_covered, "statements": total_statements} - coverage_summary = ROOT / "coverage-summary.json" + coverage_summary = ROOT/"coverage-summary.json" coverage_summary.write_text(orjson.dumps(coverage_data, option=orjson.OPT_INDENT_2).decode(), encoding="utf-8") - return 0 -if __name__ == "__main__": - raise SystemExit(main()) +if __name__ == "__main__": raise SystemExit(main()) diff --git a/tools/lock-actions b/tools/lock-actions new file mode 100755 index 00000000..cda8f826 --- /dev/null +++ b/tools/lock-actions @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -ex + +GIT_ROOT=$(git rev-parse --show-toplevel) + +cd "$GIT_ROOT" || exit 1 + +[[ -x "$(command -v docker)" ]] || ( + echo "docker not found. Make sure to have docker running to run this job." + exit 1 +) + +find "${GIT_ROOT}/.github/workflows" -type f -iname '*.yml' -exec docker run -it --rm -v "${PWD}":"${PWD}" -w "${PWD}" ghcr.io/sethvargo/ratchet:0.4.0 pin {} \; diff --git a/tools/sync-readme b/tools/sync-readme new file mode 100755 index 00000000..3de68ab4 --- /dev/null +++ b/tools/sync-readme @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +if ! git diff --quiet README.md; then + cp README.md openllm-python/README.md + exit 1 +else + echo "README.md is up to date" + exit 0 +fi diff --git a/tools/update-brew-tap.py b/tools/update-brew-tap.py index ed9091da..43dd7bed 100755 --- a/tools/update-brew-tap.py +++ b/tools/update-brew-tap.py @@ -22,8 +22,7 @@ def determine_release_url(svn_url: str, tag: str, target: t.Literal["macos_arm", return f"{svn_url}/releases/download/{tag}/openllm-{tag.replace('v', '')}-{_gz_strategies[target]}.tar.gz" # curl -sSL /archive/refs/tags/.tar.gz | shasum -a256 | cut -d'' -f1 -def get_release_hash_command(svn_url: str, tag: str) -> Pipeline: - return curl["-sSL", svn_url] | shasum["-a256"] | cut["-d", " ", "-f1"] +def get_release_hash_command(svn_url: str, tag: str) -> Pipeline: return curl["-sSL", svn_url] | shasum["-a256"] | cut["-d", " ", "-f1"] def main() -> int: api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False) @@ -35,7 +34,7 @@ def main() -> int: ENVIRONMENT = Environment(extensions=["jinja2.ext.do", "jinja2.ext.loopcontrols", "jinja2.ext.debug"], trim_blocks=True, lstrip_blocks=True, loader=FileSystemLoader((ROOT / "Formula").__fspath__(), followlinks=True)) template_file = "openllm.rb.j2" - with (ROOT / "Formula" / "openllm.rb").open("w") as f: + with (ROOT/"Formula"/"openllm.rb").open("w") as f: f.write(ENVIRONMENT.get_template(template_file, globals={"determine_release_url": determine_release_url}).render(shadict=shadict, __tag__=release_tag, __cmd__=fs.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__)), __template_file__=fs.path.join("Formula", template_file), __gz_extension__=_gz_strategies, **_info)) f.write("\n") return 0 diff --git a/tools/update-config-stubs.py b/tools/update-config-stubs.py index 957ee0bd..dd4bad3e 100755 --- a/tools/update-config-stubs.py +++ b/tools/update-config-stubs.py @@ -1,12 +1,8 @@ #!/usr/bin/env python3 from __future__ import annotations -import importlib -import os +import os, sys from pathlib import Path -import openllm -from openllm._configuration import GenerationConfig, ModelSettings, PeftType, SamplingParams - # currently we are assuming the indentatio level is 2 for comments START_COMMENT = f"# {os.path.basename(__file__)}: start\n" END_COMMENT = f"# {os.path.basename(__file__)}: stop\n" @@ -15,8 +11,12 @@ END_SPECIAL_COMMENT = f"# {os.path.basename(__file__)}: special stop\n" START_ATTRS_COMMENT = f"# {os.path.basename(__file__)}: attrs start\n" END_ATTRS_COMMENT = f"# {os.path.basename(__file__)}: attrs stop\n" -_TARGET_FILE = Path(__file__).parent.parent/"src"/"openllm"/"_configuration.py" -_imported = importlib.import_module(ModelSettings.__module__) +ROOT = Path(__file__).parent.parent +_TARGET_FILE = ROOT/"openllm-python"/"src"/"openllm"/"_configuration.py" + +sys.path.insert(0, (ROOT/"openllm-python"/"src").__fspath__()) +from openllm._configuration import GenerationConfig, ModelSettings, PeftType, SamplingParams +from openllm.utils import codegen def process_annotations(annotations: str) -> str: if "NotRequired" in annotations: return annotations[len("NotRequired["):-1] @@ -93,34 +93,31 @@ def main() -> int: # NOTE: inline stubs __config__ attrs representation special_attrs_lines: list[str] = [] - for keys, ForwardRef in openllm.utils.codegen.get_annotations(ModelSettings).items(): special_attrs_lines.append(f"{' ' * 4}{keys}: {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}\n") - + for keys, ForwardRef in codegen.get_annotations(ModelSettings).items(): special_attrs_lines.append(f"{' ' * 4}{keys}: {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}\n") # NOTE: inline stubs for _ConfigAttr type stubs config_attr_lines: list[str] = [] - for keys, ForwardRef in openllm.utils.codegen.get_annotations(ModelSettings).items(): + for keys, ForwardRef in codegen.get_annotations(ModelSettings).items(): config_attr_lines.extend([" "*4 + line for line in [f"__openllm_{keys}__: {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))} = Field(None)\n", f'"""{_value_docstring[keys]}"""\n',]]) - # NOTE: inline runtime __getitem__ overload process lines: list[str] = [] lines.append(" "*2 + "# NOTE: ModelSettings arguments\n") - for keys, ForwardRef in openllm.utils.codegen.get_annotations(ModelSettings).items(): lines.extend([" "*2 + line for line in ["@overload\n" if "overload" in dir(_imported) else "@t.overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}: ...\n',]]) + for keys, ForwardRef in codegen.get_annotations(ModelSettings).items(): lines.extend([" "*2 + line for line in ["@overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}: ...\n',]]) # special case variables: generation_class, extras, sampling_class lines.append(" "*2 + "# NOTE: generation_class, sampling_class and extras arguments\n") lines.extend([ - " "*2 + line for line in [ - "@overload\n" if "overload" in dir(_imported) else "@t.overload\n", 'def __getitem__(self, item: t.Literal["generation_class"]) -> t.Type[openllm.GenerationConfig]: ...\n', "@overload\n" if "overload" in dir(_imported) else "@t.overload\n", 'def __getitem__(self, item: t.Literal["sampling_class"]) -> t.Type[openllm.SamplingParams]: ...\n', "@overload\n" - if "overload" in dir(_imported) else "@t.overload\n", 'def __getitem__(self, item: t.Literal["extras"]) -> t.Dict[str, t.Any]: ...\n', - ] - ]) + " "*2 + line for line in [ + "@overload\n", 'def __getitem__(self, item: t.Literal["generation_class"]) -> t.Type[openllm.GenerationConfig]: ...\n', + "@overload\n", 'def __getitem__(self, item: t.Literal["sampling_class"]) -> t.Type[openllm.SamplingParams]: ...\n', + "@overload\n", 'def __getitem__(self, item: t.Literal["extras"]) -> t.Dict[str, t.Any]: ...\n', + ]]) lines.append(" "*2 + "# NOTE: GenerationConfig arguments\n") - generation_config_anns = openllm.utils.codegen.get_annotations(GenerationConfig) - for keys, type_pep563 in generation_config_anns.items(): lines.extend([" "*2 + line for line in ["@overload\n" if "overload" in dir(_imported) else "@t.overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {type_pep563}: ...\n',]]) + generation_config_anns = codegen.get_annotations(GenerationConfig) + for keys, type_pep563 in generation_config_anns.items(): lines.extend([" "*2 + line for line in ["@overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {type_pep563}: ...\n']]) lines.append(" "*2 + "# NOTE: SamplingParams arguments\n") - for keys, type_pep563 in openllm.utils.codegen.get_annotations(SamplingParams).items(): - if keys not in generation_config_anns: lines.extend([" "*2 + line for line in ["@overload\n" if "overload" in dir(_imported) else "@t.overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {type_pep563}: ...\n',]]) - + for keys, type_pep563 in codegen.get_annotations(SamplingParams).items(): + if keys not in generation_config_anns: lines.extend([" "*2 + line for line in ["@overload\n", f'def __getitem__(self, item: t.Literal["{keys}"]) -> {type_pep563}: ...\n',]]) lines.append(" "*2 + "# NOTE: PeftType arguments\n") - for keys in PeftType._member_names_: lines.extend([" "*2 + line for line in ["@overload\n" if "overload" in dir(_imported) else "@t.overload\n", f'def __getitem__(self, item: t.Literal["{keys.lower()}"]) -> dict[str, t.Any]: ...\n',]]) + for keys in PeftType._member_names_: lines.extend([" "*2 + line for line in ["@overload\n", f'def __getitem__(self, item: t.Literal["{keys.lower()}"]) -> dict[str, t.Any]: ...\n',]]) processed = processed[:start_attrs_idx] + [" "*4 + START_ATTRS_COMMENT, *special_attrs_lines, " "*4 + END_ATTRS_COMMENT] + processed[end_attrs_idx + 1:start_stub_idx] + [" "*4 + START_SPECIAL_COMMENT, *config_attr_lines, " "*4 + END_SPECIAL_COMMENT] + processed[end_stub_idx + 1:start_idx] + [" "*2 + START_COMMENT, *lines, " "*2 + END_COMMENT] + processed[end_idx + 1:] with _TARGET_FILE.open("w") as f: f.writelines(processed) diff --git a/tools/update-dummy.py b/tools/update-dummy.py index 07cd9071..78038ee3 100755 --- a/tools/update-dummy.py +++ b/tools/update-dummy.py @@ -1,23 +1,23 @@ #!/usr/bin/env python3 from __future__ import annotations -import os -import typing as t +import os, typing as t, sys from pathlib import Path - -import openllm -from openllm._configuration import LiteralRuntime - -if t.TYPE_CHECKING: - from collections import OrderedDict - _ROOT = Path(__file__).parent.parent -config_requirements = {k:[_.replace("-", "_") for _ in v.__openllm_requirements__] if v.__openllm_requirements__ else None for k,v in openllm.CONFIG_MAPPING.items()} + +sys.path.insert(0, (_ROOT/"openllm-python"/"src").__fspath__()) +from openllm._configuration import LiteralRuntime +from openllm.models import auto +from openllm import CONFIG_MAPPING + +if t.TYPE_CHECKING: from collections import OrderedDict + +config_requirements = {k:[_.replace("-", "_") for _ in v.__openllm_requirements__] if v.__openllm_requirements__ else None for k,v in CONFIG_MAPPING.items()} _dependencies: dict[LiteralRuntime,str] = {k:v for k,v in zip(LiteralRuntime.__args__, ("torch", "tensorflow", "flax", "vllm"))} _auto: dict[str,str] = {k:v for k,v in zip(LiteralRuntime.__args__, ("AutoLLM", "AutoTFLLM", "AutoFlaxLLM", "AutoVLLM"))} -def get_target_dummy_file(framework: LiteralRuntime) -> Path: return _ROOT.joinpath("src","openllm","utils",f"dummy_{framework}_objects.py") +def get_target_dummy_file(framework: LiteralRuntime) -> Path: return _ROOT/"openllm-python"/"src"/"openllm"/"utils"/f"dummy_{framework}_objects.py" def mapping_names(framework: LiteralRuntime): return "MODEL_MAPPING_NAMES" if framework == "pt" else f"MODEL_{framework.upper()}_MAPPING_NAMES" -def get_mapping(framework: LiteralRuntime) -> OrderedDict[t.Any, t.Any]: return getattr(openllm.models.auto, mapping_names(framework)) +def get_mapping(framework: LiteralRuntime) -> OrderedDict[t.Any, t.Any]: return getattr(auto, mapping_names(framework)) def make_class_stub(model_name: str, framework: LiteralRuntime, indentation: int = 2, auto: bool = False) -> list[str]: _dep_list: list[str] = [f'"{v}"' for v in [_dependencies[framework], *(t.cast(t.List[str], config_requirements[model_name]) if model_name != "__default__" and config_requirements[model_name] else [])]] diff --git a/tools/update-models-import.py b/tools/update-models-import.py index f1b9ef5a..32022db2 100755 --- a/tools/update-models-import.py +++ b/tools/update-models-import.py @@ -3,7 +3,7 @@ from __future__ import annotations import os from pathlib import Path -_TARGET_FILE = Path(__file__).parent.parent/"src"/"openllm"/"models"/"__init__.py" +_TARGET_FILE = Path(__file__).parent.parent/"openllm-python"/"src"/"openllm"/"models"/"__init__.py" def create_module_import() -> str: r = [f'"{p.name}"' for p in _TARGET_FILE.parent.glob('*/') if p.name not in ['__pycache__', '__init__.py', '.DS_Store']] diff --git a/tools/update-readme.py b/tools/update-readme.py index 36904a27..a5f2f5ba 100755 --- a/tools/update-readme.py +++ b/tools/update-readme.py @@ -1,23 +1,18 @@ #!/usr/bin/env python3 from __future__ import annotations -import os +import os, inflection, tomlkit, sys import typing as t -import inflection -import tomlkit - -import openllm - START_COMMENT = f"\n" END_COMMENT = f"\n" ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.join(ROOT,"openllm-python","src")) +import openllm def main() -> int: - with open(os.path.join(ROOT, "pyproject.toml"), "r") as f: - deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"] - with open(os.path.join(ROOT, "README.md"), "r") as f: - readme = f.readlines() + with open(os.path.join(ROOT, "openllm-python", "pyproject.toml"), "r") as f: deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"] + with open(os.path.join(ROOT, "README.md"), "r") as f: readme = f.readlines() start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT) formatted: dict[t.Literal["Model", "Architecture", "URL", "Installation", "Model Ids"], list[str | list[str]]] = {"Model": [], "Architecture": [], "URL": [], "Model Ids": [], "Installation": [],} @@ -28,14 +23,10 @@ def main() -> int: formatted["Architecture"].append(config_cls.__openllm_architecture__) formatted["URL"].append(config_cls.__openllm_url__) formatted["Model Ids"].append(config_cls.__openllm_model_ids__) - if dashed in deps: - instruction = f'```bash\npip install "openllm[{dashed}]"\n```' - else: - instruction = "```bash\npip install openllm\n```" - if len(instruction) > max_install_len_div: - max_install_len_div = len(instruction) + if dashed in deps: instruction = f'```bash\npip install "openllm[{dashed}]"\n```' + else: instruction = "```bash\npip install openllm\n```" + if len(instruction) > max_install_len_div: max_install_len_div = len(instruction) formatted["Installation"].append(instruction) - meta: list[str] = ["\n", "\n"] # NOTE: headers @@ -47,23 +38,18 @@ def main() -> int: meta += "\n" # configure architecture URL cfg_cls = openllm.CONFIG_MAPPING[name] - if cfg_cls.__openllm_trust_remote_code__: - arch = f"\n" - else: - model_name = {"dolly_v2": "gpt_neox", "stablelm": "gpt_neox", "starcoder": "gpt_bigcode", "flan_t5": "t5",}.get(cfg_cls.__openllm_model_name__, cfg_cls.__openllm_model_name__) - arch = f"\n" - meta.extend([f"\n\n", arch,]) + if cfg_cls.__openllm_trust_remote_code__: arch = f"\n" + else: arch = f"\n" + meta.extend([f"\n\n", arch]) format_with_links: list[str] = [] - for lid in model_ids: - format_with_links.append(f"
  • {lid}
  • ") + for lid in model_ids: format_with_links.append(f"
  • {lid}
  • ") meta.append("\n") meta.append(f"\n") meta += "\n" meta.extend(["
    {architecture}{architecture}{name}{architecture}{architecture}{name}\n\n
      " + "\n".join(format_with_links) + "
    \n\n
    \n\n{installation}\n\n
    \n", "\n"]) readme = readme[:start_index] + [START_COMMENT] + meta + [END_COMMENT] + readme[stop_index + 1:] - with open(os.path.join(ROOT, "README.md"), "w") as f: - f.writelines(readme) + with open(os.path.join(ROOT, "README.md"), "w") as f: f.writelines(readme) return 0 if __name__ == "__main__": raise SystemExit(main()) diff --git a/tools/write-coverage-report.py b/tools/write-coverage-report.py index 2b9064e6..c3af337d 100755 --- a/tools/write-coverage-report.py +++ b/tools/write-coverage-report.py @@ -2,7 +2,6 @@ from __future__ import annotations from decimal import ROUND_DOWN, Decimal from pathlib import Path - import orjson PRECISION = Decimal(".01") @@ -10,7 +9,7 @@ PRECISION = Decimal(".01") ROOT = Path(__file__).resolve().parent.parent def main() -> int: - coverage_summary = ROOT / "coverage-summary.json" + coverage_summary = ROOT/"coverage-summary.json" coverage_data = orjson.loads(coverage_summary.read_text(encoding="utf-8")) total_data = coverage_data.pop("total") @@ -32,13 +31,10 @@ def main() -> int: color = "ok" if float(total_rate) >= 95 else "critical" lines.insert(0, f"![Code Coverage](https://img.shields.io/badge/coverage-{total_rate}%25-{color}?style=flat)\n") - lines.append(f"**Summary** | {100 if total_rate == 100 else total_rate}% " - f"({total_statements_covered} / {total_statements})\n") + lines.append(f"**Summary** | {100 if total_rate == 100 else total_rate}% ({total_statements_covered} / {total_statements})\n") - coverage_report = ROOT / "coverage-report.md" - with coverage_report.open("w", encoding="utf-8") as f: - f.write("".join(lines)) + coverage_report = ROOT/"coverage-report.md" + with coverage_report.open("w", encoding="utf-8") as f: f.write("".join(lines)) return 0 -if __name__ == "__main__": - raise SystemExit(main()) +if __name__ == "__main__": raise SystemExit(main())