mirror of
https://github.com/bentoml/OpenLLM.git
synced 2025-12-23 23:57:46 -05:00
chore: ready for 0.6 releases
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1,5 +1,3 @@
|
||||
**/_next/ linguist-generated=true
|
||||
|
||||
* text=auto eol=lf
|
||||
# Needed for setuptools-scm-git-archive
|
||||
.git_archival.txt export-subst
|
||||
|
||||
64
.github/INFRA.md
vendored
64
.github/INFRA.md
vendored
@@ -1,64 +0,0 @@
|
||||
## OpenLLM CI/CD
|
||||
|
||||
> [!NOTE]
|
||||
> All actions within this repository should always be locked to a specific version. We are using [ratchet](https://github.com/sethvargo/ratchet)
|
||||
> for doing this via [this script](https://github.com/bentoml/OpenLLM/blob/main/tools/lock-actions.sh)
|
||||
|
||||
OpenLLM uses a GitHub Action to run all CI/CD workflows. It also use [pre-commit.ci](https://pre-commit.ci/) to run CI for all pre-commit hooks.
|
||||
|
||||
The folder structure of this are as follow:
|
||||
|
||||
```prolog
|
||||
.
|
||||
├── CODEOWNERS # Code owners
|
||||
├── CODE_OF_CONDUCT.md # Code of conduct
|
||||
├── ISSUE_TEMPLATE # Contains issue templates
|
||||
├── SECURITY.md # Security policy
|
||||
├── actions # Contains helpers script for all actions
|
||||
├── assets # Contains static assets to be used throughout this repository
|
||||
├── dependabot.yml # Dependabot configuration
|
||||
└── workflows
|
||||
├── binary-releases.yml # Build and publish binary releases
|
||||
├── build.yml # Self-hosted EC2 runners
|
||||
├── ci.yml # CI workflow
|
||||
├── cleanup.yml # Cache cleanup
|
||||
├── build-pypi.yml # Build PyPI packages
|
||||
├── create-releases.yml # Create GitHub releases
|
||||
├── cron.yml # Cron jobs
|
||||
└── release-notes.yml # Generate release notes
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> All of the following jobs will and should only be run within the BentoML organisation and this repository.
|
||||
|
||||
### Self-hosted EC2 runners
|
||||
|
||||
The workflow for self-hosted EC2 runners is located in [build.yml](/.github/workflows/build.yml).
|
||||
This workflow is currently used for building OpenLLM base images that contains all compiled kernels
|
||||
for serving. It will then be published to the following registry:
|
||||
|
||||
- GitHub Container Registry (`ghcr.io/bentoml/openllm`): This is where users can extend the base image
|
||||
with their own custom kernels or use as base for building Bentos
|
||||
|
||||
- AWS Elastic Container Registry (`public.ecr.aws/y5w8i4y6/bentoml/openllm`): This is where all Bento
|
||||
created with `openllm` will be using. This is purely for build optimisation on BentoCloud.
|
||||
|
||||
There are a few ways to trigger this workflow:
|
||||
|
||||
- Automatically triggered when a new commit is pushed to the `main` branch and tag release
|
||||
|
||||
- On pull request: This will be triggered manually when the label `00 - EC2 Build`
|
||||
|
||||
- On commit with the following `[ec2 build]`
|
||||
|
||||
### Wheel compilation
|
||||
|
||||
The workflow for wheel compilation is located in [build-pypi.yml](/.github/workflows/build-pypi.yml).
|
||||
|
||||
To speed up CI, opt in to the following label `02 - Wheel Build` on pull request or add `[wheel build]` to commit message.
|
||||
|
||||
### Binary releases
|
||||
|
||||
The workflow for binary releases is located in [binary-releases.yml](/.github/workflows/binary-releases.yml).
|
||||
|
||||
To speed up CI, opt in to the following label `03 - Standalone Build` on pull request or add `[binary build]` to commit message.
|
||||
8
.github/actions/release.sh
vendored
8
.github/actions/release.sh
vendored
@@ -48,14 +48,8 @@ fi
|
||||
release_package() {
|
||||
local version="$1"
|
||||
echo "Releasing version ${version}..."
|
||||
jq --arg release_version "${version}" '.version = $release_version' <package.json >package.json.tmp && mv package.json.tmp package.json
|
||||
pushd openllm-node &>/dev/null
|
||||
jq --arg release_version "${version}" '.version = $release_version' <package.json >package.json.tmp && mv package.json.tmp package.json
|
||||
popd &>/dev/null
|
||||
towncrier build --yes --version "${version}"
|
||||
./tools/dependencies.py --release-version "${version}"
|
||||
cp CHANGELOG.md openllm-python/CHANGELOG.md
|
||||
git add CHANGELOG.md openllm-python/pyproject.toml openllm-python/CHANGELOG.md changelog.d package.json openllm-node/package.json
|
||||
git add CHANGELOG.md changelog.d
|
||||
git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]"
|
||||
git push origin main
|
||||
echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]"
|
||||
|
||||
BIN
.github/assets/agent.gif
vendored
BIN
.github/assets/agent.gif
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 7.7 MiB |
27
.github/dependabot.yml
vendored
27
.github/dependabot.yml
vendored
@@ -1,28 +1,13 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: github-actions
|
||||
directory: '/'
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: 'weekly'
|
||||
day: 'monday'
|
||||
time: '09:00'
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
time: "09:00"
|
||||
- package-ecosystem: pip
|
||||
directory: '/openllm-python'
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: 'weekly'
|
||||
interval: "weekly"
|
||||
open-pull-requests-limit: 5
|
||||
- package-ecosystem: pip
|
||||
directory: '/openllm-core'
|
||||
schedule:
|
||||
interval: 'weekly'
|
||||
open-pull-requests-limit: 5
|
||||
- package-ecosystem: pip
|
||||
directory: '/openllm-client'
|
||||
schedule:
|
||||
interval: 'weekly'
|
||||
open-pull-requests-limit: 5
|
||||
- package-ecosystem: npm
|
||||
directory: '/'
|
||||
schedule:
|
||||
interval: 'weekly'
|
||||
open-pull-requests-limit: 0
|
||||
|
||||
274
.github/workflows/binary-releases.yml
vendored
274
.github/workflows/binary-releases.yml
vendored
@@ -1,274 +0,0 @@
|
||||
# modified workflow from ofek/hatch, big kudos to them.
|
||||
name: Standalone build
|
||||
on:
|
||||
workflow_call:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
- 'openllm-node/**'
|
||||
- 'Formula/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
- 'openllm-node/**'
|
||||
- 'Formula/**'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -exo pipefail {0}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
|
||||
cancel-in-progress: true
|
||||
env:
|
||||
APP_NAME: openllm
|
||||
PYOXIDIZER_VERSION: '0.24.0'
|
||||
HATCH_VERBOSE: 2
|
||||
jobs:
|
||||
get_commit_message:
|
||||
name: Get commit message
|
||||
runs-on: ubuntu-latest
|
||||
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
|
||||
outputs:
|
||||
message: ${{ steps.commit_message.outputs.message }}
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
# Gets the correct commit message for pull request
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Get commit message
|
||||
id: commit_message
|
||||
run: |
|
||||
set -xe
|
||||
COMMIT_MSG=$(git log --no-merges -1 --oneline)
|
||||
echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT
|
||||
echo github.ref ${{ github.ref }}
|
||||
python-artifacts:
|
||||
name: Build wheel and source distribution
|
||||
runs-on: ubuntu-latest
|
||||
needs: get_commit_message
|
||||
if: >-
|
||||
contains(needs.get_commit_message.outputs.message, '[binary build]') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '03 - Standalone Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version-file: .python-version-default
|
||||
- name: Pull latest change
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
run: git pull --autostash --no-edit --gpg-sign --ff origin main
|
||||
- name: Install build frontend
|
||||
run: python -m pip install --upgrade build
|
||||
- name: Build
|
||||
run: python -m build -sw openllm-python/
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-artefacts
|
||||
path: openllm-python/dist/*
|
||||
if-no-files-found: error
|
||||
binaries:
|
||||
name: ${{ matrix.job.target }} (${{ matrix.job.os }})
|
||||
needs:
|
||||
- python-artifacts
|
||||
defaults:
|
||||
run:
|
||||
working-directory: openllm-python
|
||||
runs-on: ${{ matrix.job.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
job:
|
||||
# Linux
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-22.04
|
||||
cross: true
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-22.04
|
||||
cross: true
|
||||
- target: x86_64-unknown-linux-musl
|
||||
os: ubuntu-22.04
|
||||
cross: true
|
||||
- target: i686-unknown-linux-gnu
|
||||
os: ubuntu-22.04
|
||||
cross: true
|
||||
- target: powerpc64le-unknown-linux-gnu
|
||||
os: ubuntu-22.04
|
||||
cross: true
|
||||
# macOS
|
||||
- target: aarch64-apple-darwin
|
||||
os: macos-12
|
||||
- target: x86_64-apple-darwin
|
||||
os: macos-12
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
env:
|
||||
CARGO: cargo
|
||||
CARGO_BUILD_TARGET: ${{ matrix.job.target }}
|
||||
PYAPP_REPO: pyapp
|
||||
PYAPP_VERSION: '0.10.1'
|
||||
PYAPP_PIP_EXTERNAL: 'true'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Pull latest change
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
run: git pull --autostash --no-edit --gpg-sign --ff origin main
|
||||
- name: Fetch PyApp
|
||||
run: >-
|
||||
mkdir $PYAPP_REPO && curl -L https://github.com/ofek/pyapp/releases/download/v$PYAPP_VERSION/source.tar.gz | tar --strip-components=1 -xzf - -C $PYAPP_REPO
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5.1.0
|
||||
with:
|
||||
python-version-file: .python-version-default
|
||||
- name: Install Hatch
|
||||
run: pip install -U hatch
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@d388a4836fcdbde0e50e395dc79a2670ccdef13f # ratchet:dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: ${{ matrix.job.target }}
|
||||
- name: Set up cross compiling
|
||||
if: matrix.job.cross
|
||||
uses: taiki-e/install-action@64e4e2f995104968c78bd697b253d55bf557af66 # ratchet:taiki-e/install-action@v2.41.11
|
||||
with:
|
||||
tool: cross
|
||||
- name: Configure cross compiling
|
||||
if: matrix.job.cross
|
||||
run: echo "CARGO=cross" >> $GITHUB_ENV
|
||||
- name: Configure target
|
||||
run: |-
|
||||
config_file="$PYAPP_REPO/.cargo/config_${{ matrix.job.target }}.toml"
|
||||
if [[ -f "$config_file" ]]; then
|
||||
mv "$config_file" "$PYAPP_REPO/.cargo/config.toml"
|
||||
fi
|
||||
- name: Download Python artifacts
|
||||
if: ${{ !startsWith(github.event.ref, 'refs/tags') }}
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: binary-artefacts
|
||||
path: openllm-python/dist
|
||||
- name: Configure embedded project
|
||||
if: ${{ !startsWith(github.event.ref, 'refs/tags') }}
|
||||
run: |-
|
||||
cd dist
|
||||
wheel="$(echo *.whl)"
|
||||
mv "$wheel" "../$PYAPP_REPO"
|
||||
echo "PYAPP_PROJECT_PATH=$wheel" >> $GITHUB_ENV
|
||||
- name: Build binary
|
||||
run: hatch build --target app
|
||||
# Windows installers don't accept non-integer versions so we ubiquitously
|
||||
# perform the following transformation: X.Y.Z.devN -> X.Y.Z.N
|
||||
- name: Set project version
|
||||
id: version
|
||||
run: |-
|
||||
old_version="$(hatch version)"
|
||||
version="${old_version/dev/}"
|
||||
|
||||
if [[ "$version" != "$old_version" ]]; then
|
||||
cd dist/app
|
||||
find . -type f -iname "openllm-*" | while read -r file; do
|
||||
binary=$(echo "$file" | sed 's/dev//')
|
||||
mv "$file" "$binary"
|
||||
done
|
||||
fi
|
||||
|
||||
echo "version=$version" >> $GITHUB_OUTPUT
|
||||
echo "$version"
|
||||
- name: Archive binary
|
||||
run: |-
|
||||
mkdir packaging
|
||||
cd dist/app
|
||||
find . -type f -iname "openllm-*" | while read -r file; do
|
||||
if [[ "$file" =~ -pc-windows- ]]; then
|
||||
7z a "../../packaging/${file:0:-4}.zip" "$file"
|
||||
else
|
||||
binary="${file/dev/}"
|
||||
chmod +x "$file"
|
||||
tar -czf "../../packaging/$binary.tar.gz" "$file"
|
||||
fi
|
||||
done
|
||||
- name: Upload staged archive
|
||||
if: runner.os != 'Linux'
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: staged-${{ runner.os }}-${{ matrix.job.target }}
|
||||
path: openllm-python/packaging/*
|
||||
if-no-files-found: error
|
||||
- name: Upload archive
|
||||
if: runner.os == 'Linux'
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: standalone-${{ runner.os }}-${{ matrix.job.target }}
|
||||
path: openllm-python/packaging/*
|
||||
if-no-files-found: error
|
||||
windows-packaging:
|
||||
name: Build Windows installers
|
||||
needs: binaries
|
||||
if: false # ${{ github.event_name != 'pull_request' }}
|
||||
runs-on: windows-2022
|
||||
env:
|
||||
VERSION: ${{ needs.binaries.outputs.version }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: openllm-python
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5.1.0
|
||||
with:
|
||||
python-version-file: .python-version-default
|
||||
- name: Pull latest change
|
||||
run: git pull --autostash --no-edit --gpg-sign --ff origin main
|
||||
- name: Install PyOxidizer ${{ env.PYOXIDIZER_VERSION }}
|
||||
run: pip install pyoxidizer==${{ env.PYOXIDIZER_VERSION }}
|
||||
- name: Download staged binaries
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
name: staged-${{ runner.os }}
|
||||
path: openllm-python/archives
|
||||
- name: Extract staged binaries
|
||||
run: |-
|
||||
mkdir -p bin
|
||||
for f in archives/*; do
|
||||
7z e "$f" -obin
|
||||
done
|
||||
# bin/<APP_NAME>-<VERSION>-<TARGET>.exe -> targets/<TARGET>/<APP_NAME>.exe
|
||||
- name: Prepare binaries
|
||||
run: |-
|
||||
mkdir -p targets
|
||||
for f in bin/*; do
|
||||
if [[ "$f" =~ openllm-${{ env.VERSION }}-(.+).exe$ ]]; then
|
||||
target="${BASH_REMATCH[1]}"
|
||||
mkdir -p "targets/$target"
|
||||
mv "$f" "targets/$target/${{ env.APP_NAME }}.exe"
|
||||
fi
|
||||
done
|
||||
- name: Build installers
|
||||
run: >-
|
||||
pyoxidizer build windows_installers --release --var version ${{ env.VERSION }}
|
||||
- name: Prepare installers
|
||||
run: |-
|
||||
mkdir installers
|
||||
mv build/*/release/*/*.{exe,msi} installers
|
||||
- name: Upload binaries
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: standalone
|
||||
path: openllm-python/archives/*
|
||||
if-no-files-found: error
|
||||
- name: Upload installers
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: installers
|
||||
path: openllm-python/installers/*
|
||||
143
.github/workflows/build-pypi.yml
vendored
143
.github/workflows/build-pypi.yml
vendored
@@ -1,143 +0,0 @@
|
||||
name: Compile PyPI package
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tags:
|
||||
description: 'Tag for releases'
|
||||
required: true
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
tags:
|
||||
required: true
|
||||
type: string
|
||||
outputs:
|
||||
sucess:
|
||||
description: 'Whether the build is successful or not'
|
||||
value: ${{ jobs.sucess-build.outputs.success }}
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'changelog.d/**'
|
||||
- 'assets/**'
|
||||
env:
|
||||
LINES: 120
|
||||
COLUMNS: 120
|
||||
HATCH_VERBOSE: 2
|
||||
CIBW_BUILD_FRONTEND: build
|
||||
CIBW_ENVIRONMENT_PASS_LINUX: >
|
||||
HATCH_BUILD_HOOKS_ENABLE MYPYPATH
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
get_commit_message:
|
||||
name: Get commit message
|
||||
runs-on: ubuntu-latest
|
||||
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
|
||||
outputs:
|
||||
message: ${{ steps.commit_message.outputs.message }}
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
# Gets the correct commit message for pull request
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Get commit message
|
||||
id: commit_message
|
||||
run: |
|
||||
set -xe
|
||||
COMMIT_MSG=$(git log --no-merges -1 --oneline)
|
||||
echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT
|
||||
echo github.ref ${{ github.ref }}
|
||||
pure-wheels-sdist:
|
||||
name: Pure wheels and sdist distribution (${{ matrix.directory }})
|
||||
runs-on: ubuntu-latest
|
||||
needs: get_commit_message
|
||||
if: >-
|
||||
contains(needs.get_commit_message.outputs.message, '[wheel build]') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '02 - Wheel Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
directory: ['openllm-core', 'openllm-python', 'openllm-client']
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: '${{ inputs.tags }}'
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version-file: .python-version-default
|
||||
- name: Build
|
||||
run: hatch build
|
||||
working-directory: ${{ matrix.directory }}
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-artefacts-${{ matrix.directory }}
|
||||
path: ${{ matrix.directory }}/dist/*
|
||||
if-no-files-found: error
|
||||
check-download-artefacts:
|
||||
name: dry-run for downloading artefacts
|
||||
if: github.event_name == 'pull_request'
|
||||
needs: [pure-wheels-sdist]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: dry ls
|
||||
run: ls -rthlaR
|
||||
push-nightly:
|
||||
name: Push nightly wheels
|
||||
if: ${{ !github.event.repository.fork && github.event_name == 'push' }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
needs: [pure-wheels-sdist]
|
||||
steps:
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: Publish nightly wheels to test.pypi.org
|
||||
uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
sucess-push: # https://github.com/marketplace/actions/alls-green#why
|
||||
if: github.event_name == 'push'
|
||||
needs: [push-nightly, pure-wheels-sdist]
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
success: ${{ steps.everygreen.outputs.success }}
|
||||
steps:
|
||||
- name: Decide whether the needed jobs succeeded or failed
|
||||
uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
|
||||
id: evergreen
|
||||
with:
|
||||
jobs: ${{ toJSON(needs) }}
|
||||
sucess-pr: # https://github.com/marketplace/actions/alls-green#why
|
||||
if: github.event_name == 'pull_request'
|
||||
needs: [pure-wheels-sdist]
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
success: ${{ steps.everygreen.outputs.success }}
|
||||
steps:
|
||||
- name: Decide whether the needed jobs succeeded or failed
|
||||
uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
|
||||
id: evergreen
|
||||
with:
|
||||
jobs: ${{ toJSON(needs) }}
|
||||
71
.github/workflows/ci.yml
vendored
71
.github/workflows/ci.yml
vendored
@@ -1,71 +0,0 @@
|
||||
name: CI
|
||||
on:
|
||||
workflow_call:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
env:
|
||||
PAPERSPACE_API_KEY: ${{secrets.PAPERSPACE_API_KEY}}
|
||||
PAPERSPACE_NETWORK_ID: ${{secrets.PAPERSPACE_NETWORK_ID}}
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -eo pipefail {0}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'pull_request' || github.event_name == 'push'|| github.event_name == 'workflow_call' }}
|
||||
name: General API tests
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4
|
||||
- uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # ratchet:actions/setup-python@v5
|
||||
with:
|
||||
python-version-file: '.python-version-default'
|
||||
- name: setup tooling
|
||||
run: |
|
||||
python -m pip install uv
|
||||
uv pip install --system httpx orjson
|
||||
- name: startup machine
|
||||
id: paperspace-machine
|
||||
run: |
|
||||
PUBLIC_IP=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-ip)
|
||||
MACHINE_ID=$(python tools/machines.py --ci-template ${{secrets.PAPERSPACE_TEMPLATE_ID}} --output-id)
|
||||
echo "::add-mask::$PUBLIC_IP"
|
||||
echo "::add-mask::$MACHINE_ID"
|
||||
echo "publicIp=$PUBLIC_IP" >> $GITHUB_OUTPUT
|
||||
echo "machineId=$MACHINE_ID" >> $GITHUB_OUTPUT
|
||||
- name: running regression tests (PR)
|
||||
uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
host: ${{ steps.paperspace-machine.outputs.publicIp }}
|
||||
username: paperspace
|
||||
key: ${{ secrets.PAPERSPACE_SSH_KEY }}
|
||||
port: ${{ secrets.PAPERSPACE_PORT }}
|
||||
command_timeout: 1h
|
||||
script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --refresh -- --pr ${{github.event.number}}
|
||||
- name: running regression tests (main)
|
||||
uses: appleboy/ssh-action@029f5b4aeeeb58fdfe1410a5d17f967dacf36262 # ratchet:appleboy/ssh-action@v1.0.3
|
||||
if: ${{ !github.event.repository.fork && github.event_name == 'push' }}
|
||||
with:
|
||||
host: ${{ steps.paperspace-machine.outputs.publicIp }}
|
||||
username: paperspace
|
||||
key: ${{ secrets.PAPERSPACE_SSH_KEY }}
|
||||
port: ${{ secrets.PAPERSPACE_PORT }}
|
||||
command_timeout: 1h
|
||||
script: /nix/var/nix/profiles/default/bin/nix run github:aarnphm/dix#openllm-ci --accept-flake-config --refresh -- --head
|
||||
- name: shutdown machine
|
||||
run: python tools/machines.py --delete ${{ steps.paperspace-machine.outputs.machineId }}
|
||||
evergreen: # https://github.com/marketplace/actions/alls-green#why
|
||||
if: always()
|
||||
needs:
|
||||
- tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Decide whether the needed jobs succeeded or failed
|
||||
uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
|
||||
with:
|
||||
jobs: ${{ toJSON(needs) }}
|
||||
114
.github/workflows/create-releases.yml
vendored
114
.github/workflows/create-releases.yml
vendored
@@ -1,114 +0,0 @@
|
||||
name: release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_type:
|
||||
description: 'Release type'
|
||||
required: true
|
||||
type: choice
|
||||
default: 'patch'
|
||||
options:
|
||||
- major
|
||||
- minor
|
||||
- patch
|
||||
- alpha
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -exo pipefail {0}
|
||||
env:
|
||||
LINES: 120
|
||||
COLUMNS: 120
|
||||
OPENLLM_DO_NOT_TRACK: True
|
||||
PYTHONUNBUFFERED: '1'
|
||||
HATCH_VERBOSE: 2
|
||||
jobs:
|
||||
release:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version-file: .python-version-default
|
||||
- name: Import bot's GPG key for signing commits
|
||||
id: import-gpg
|
||||
uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # ratchet:crazy-max/ghaction-import-gpg@v6
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
git_config_global: true
|
||||
git_user_signingkey: true
|
||||
git_commit_gpgsign: true
|
||||
git_tag_gpgsign: true
|
||||
- name: Create a release
|
||||
env:
|
||||
GIT_AUTHOR_NAME: ${{ steps.import-gpg.outputs.name }}
|
||||
GIT_AUTHOR_EMAIL: ${{ steps.import-gpg.outputs.email }}
|
||||
GIT_COMMITTER_NAME: ${{ steps.import-gpg.outputs.name }}
|
||||
GIT_COMMITTER_EMAIL: ${{ steps.import-gpg.outputs.email }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |-
|
||||
pip install ghapi inflection tomlkit orjson
|
||||
./.github/actions/release.sh --release ${{ inputs.release_type }}
|
||||
- name: Set project version
|
||||
id: version
|
||||
run: |-
|
||||
version="$(git describe --tags "$(git rev-list --tags --max-count=1)")"
|
||||
echo "Release version: $version"
|
||||
echo "version=$version" >> $GITHUB_OUTPUT
|
||||
build-pypi:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs:
|
||||
- release
|
||||
name: Compile PyPI distribution for OpenLLM
|
||||
uses: bentoml/OpenLLM/.github/workflows/build-pypi.yml@main # ratchet:exclude
|
||||
with:
|
||||
tags: ${{ needs.release.outputs.version }}
|
||||
publish-python:
|
||||
needs:
|
||||
- build-pypi
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
ref: '${{ needs.release.outputs.version }}'
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# NOTE: Keep this section in sync with build-pypi.yml
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: Smoke test compiled artefacts
|
||||
run: ls -R dist
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
print-hash: true
|
||||
binary-distribution:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs: publish-python
|
||||
name: Create binary/wheels distribution
|
||||
uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main # ratchet:exclude
|
||||
release-notes:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs:
|
||||
- release
|
||||
- publish-python
|
||||
- binary-distribution
|
||||
name: Create release notes and setup for next cycle
|
||||
uses: bentoml/OpenLLM/.github/workflows/release-notes.yml@main # ratchet:exclude
|
||||
with:
|
||||
tags: ${{ needs.release.outputs.version }}
|
||||
66
.github/workflows/release-notes.yml
vendored
66
.github/workflows/release-notes.yml
vendored
@@ -1,66 +0,0 @@
|
||||
name: release-notes
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tags:
|
||||
description: 'Tag for releases'
|
||||
required: true
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
tags:
|
||||
required: true
|
||||
type: string
|
||||
env:
|
||||
HATCH_VERBOSE: 2
|
||||
defaults:
|
||||
run:
|
||||
shell: bash --noprofile --norc -exo pipefail {0}
|
||||
jobs:
|
||||
gen-release-notes:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # ratchet:actions/checkout@v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: '${{ inputs.tags }}'
|
||||
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
|
||||
with:
|
||||
bentoml-version: 'main'
|
||||
python-version-file: .python-version-default
|
||||
- name: Create release notes
|
||||
run: ./.github/actions/create_release_and_archive.sh ${{ inputs.tags }}
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: Download binaries
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: standalone-*
|
||||
merge-multiple: true
|
||||
path: archives
|
||||
- name: Download standalone MacOS
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: staged-*
|
||||
merge-multiple: true
|
||||
path: archives
|
||||
- name: Create release
|
||||
uses: softprops/action-gh-release@a74c6b72af54cfa997e81df42d94703d6313a2d0 # ratchet:softprops/action-gh-release@v2.0.6
|
||||
with:
|
||||
# Use GH feature to populate the changelog automatically
|
||||
generate_release_notes: true
|
||||
body_path: release_notes.txt
|
||||
fail_on_unmatched_files: true
|
||||
tag_name: '${{ inputs.tags }}'
|
||||
# TODO: add installers/* once windows support is back
|
||||
files: |-
|
||||
dist/*
|
||||
archives/*
|
||||
43
.github/workflows/releases.yml
vendored
Normal file
43
.github/workflows/releases.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: release
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version-file: .python-version-default
|
||||
- name: Build
|
||||
run: pipx run build
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-artefacts-openllm
|
||||
path: dist/*
|
||||
if-no-files-found: error
|
||||
release:
|
||||
if: github.repository_owner == 'bentoml'
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
name: Release
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Download Python artifacts
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4
|
||||
with:
|
||||
pattern: python-artefacts-*
|
||||
merge-multiple: true
|
||||
path: dist
|
||||
- name: dry ls
|
||||
run: ls -rthlaR
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # ratchet:pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
print-hash: true
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -161,3 +161,6 @@ cython_debug/
|
||||
*.whl
|
||||
# Environments
|
||||
venv/
|
||||
.envrc
|
||||
.python-version
|
||||
_version.py
|
||||
|
||||
7
.npmrc
7
.npmrc
@@ -1,7 +0,0 @@
|
||||
strict-peer-dependencies=false
|
||||
shell-emulator=true
|
||||
engine-strict=true
|
||||
save-exact=true
|
||||
unsafe-perm=true
|
||||
prefer-offline=true
|
||||
prefer-workspace-packages=true
|
||||
@@ -31,12 +31,7 @@ repos:
|
||||
- id: editorconfig-checker
|
||||
verbose: true
|
||||
alias: ec
|
||||
types_or: [python, javascript]
|
||||
exclude: |
|
||||
(?x)^(
|
||||
openllm-python/src/openllm_cli/entrypoint.py |
|
||||
openllm-python/src/_openllm_tiny/_entrypoint.py
|
||||
)$
|
||||
types_or: [python]
|
||||
- repo: meta
|
||||
hooks:
|
||||
- id: check-hooks-apply
|
||||
@@ -55,11 +50,3 @@ repos:
|
||||
- id: check-added-large-files
|
||||
- id: debug-statements
|
||||
- id: check-merge-conflict
|
||||
- repo: https://github.com/econchick/interrogate
|
||||
rev: 1.7.0
|
||||
hooks:
|
||||
- id: interrogate
|
||||
verbose: true
|
||||
types: [python]
|
||||
exclude: ^(docs|tools|openllm-python/tests)
|
||||
args: [--config=pyproject.toml]
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
"printWidth": 119,
|
||||
"quoteProps": "as-needed",
|
||||
"trailingComma": "none",
|
||||
"useTabs": false,
|
||||
"tabWidth": 2,
|
||||
"semi": false,
|
||||
"singleQuote": true
|
||||
}
|
||||
1
.python-version
Symbolic link
1
.python-version
Symbolic link
@@ -0,0 +1 @@
|
||||
/Users/aarnphm/workspace/bentoml/projects/openllm/upstream/.python-version-default
|
||||
@@ -1,4 +1,3 @@
|
||||
exclude = ["tools", "examples", "openllm-python/src/openllm_cli/playground/"]
|
||||
extend-include = ["*.ipynb"]
|
||||
preview = true
|
||||
line-length = 119
|
||||
@@ -12,13 +11,10 @@ skip-magic-trailing-comma = true
|
||||
docstring-code-format = true
|
||||
|
||||
[lint]
|
||||
typing-modules = [
|
||||
"openllm_core._typing_compat",
|
||||
"openllm_client._typing_compat",
|
||||
]
|
||||
ignore = [
|
||||
"RUF012",
|
||||
"ANN", # Mypy is better at this
|
||||
"E722",
|
||||
]
|
||||
select = [
|
||||
"F",
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
[formatting]
|
||||
indent_string = " "
|
||||
@@ -18,6 +18,10 @@ This changelog is managed by towncrier and is compiled at release time.
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [0.6.0](https://github.com/bentoml/openllm/tree/v0.6.0)
|
||||
No significant changes.
|
||||
|
||||
|
||||
## [0.5.7](https://github.com/bentoml/openllm/tree/v0.5.7)
|
||||
No significant changes.
|
||||
|
||||
|
||||
@@ -44,6 +44,12 @@ Before you can start developing, you'll need to set up your environment:
|
||||
git branch --set-upstream-to=upstream/main
|
||||
```
|
||||
|
||||
6. (Optional) Link `.python-version-default` to `.python-version`:
|
||||
|
||||
```bash
|
||||
ln .python-version-default .python-version
|
||||
```
|
||||
|
||||
## Development Workflow
|
||||
|
||||
There are a few ways to contribute to the repository structure for OpenLLM:
|
||||
|
||||
130
README.md
130
README.md
@@ -34,8 +34,6 @@ OpenLLM helps developers **run any open-source LLMs**, such as Llama 2 and Mistr
|
||||
|
||||
<br/>
|
||||
|
||||
<!-- hatch-fancy-pypi-readme interim start -->
|
||||
|
||||
## 💾 TL/DR
|
||||
|
||||
For starter, we provide two ways to quickly try out OpenLLM:
|
||||
@@ -98,7 +96,6 @@ OpenLLM currently supports the following models. By default, OpenLLM doesn't inc
|
||||
|
||||
<summary>Baichuan</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Baichuan server:
|
||||
@@ -106,23 +103,21 @@ Run the following command to quickly spin up a Baichuan server:
|
||||
```bash
|
||||
openllm start baichuan-inc/baichuan-7b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Baichuan variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=baichuan) to see more Baichuan-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Baichuan models via `openllm start`:
|
||||
|
||||
|
||||
- [baichuan-inc/baichuan2-7b-base](https://huggingface.co/baichuan-inc/baichuan2-7b-base)
|
||||
- [baichuan-inc/baichuan2-7b-chat](https://huggingface.co/baichuan-inc/baichuan2-7b-chat)
|
||||
- [baichuan-inc/baichuan2-13b-base](https://huggingface.co/baichuan-inc/baichuan2-13b-base)
|
||||
@@ -134,7 +129,6 @@ You can specify any of the following Baichuan models via `openllm start`:
|
||||
|
||||
<summary>ChatGLM</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a ChatGLM server:
|
||||
@@ -142,23 +136,21 @@ Run the following command to quickly spin up a ChatGLM server:
|
||||
```bash
|
||||
openllm start thudm/chatglm-6b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any ChatGLM variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=chatglm) to see more ChatGLM-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following ChatGLM models via `openllm start`:
|
||||
|
||||
|
||||
- [thudm/chatglm-6b](https://huggingface.co/thudm/chatglm-6b)
|
||||
- [thudm/chatglm-6b-int8](https://huggingface.co/thudm/chatglm-6b-int8)
|
||||
- [thudm/chatglm-6b-int4](https://huggingface.co/thudm/chatglm-6b-int4)
|
||||
@@ -172,7 +164,6 @@ You can specify any of the following ChatGLM models via `openllm start`:
|
||||
|
||||
<summary>Cohere</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Cohere server:
|
||||
@@ -180,23 +171,21 @@ Run the following command to quickly spin up a Cohere server:
|
||||
```bash
|
||||
openllm start CohereForAI/c4ai-command-r-plus --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Cohere variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=commandr) to see more Cohere-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Cohere models via `openllm start`:
|
||||
|
||||
|
||||
- [CohereForAI/c4ai-command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus)
|
||||
- [CohereForAI/c4ai-command-r-v01](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
|
||||
|
||||
@@ -206,7 +195,6 @@ You can specify any of the following Cohere models via `openllm start`:
|
||||
|
||||
<summary>Dbrx</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Dbrx server:
|
||||
@@ -214,23 +202,21 @@ Run the following command to quickly spin up a Dbrx server:
|
||||
```bash
|
||||
openllm start databricks/dbrx-instruct --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Dbrx variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dbrx) to see more Dbrx-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Dbrx models via `openllm start`:
|
||||
|
||||
|
||||
- [databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct)
|
||||
- [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base)
|
||||
|
||||
@@ -240,7 +226,6 @@ You can specify any of the following Dbrx models via `openllm start`:
|
||||
|
||||
<summary>DollyV2</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a DollyV2 server:
|
||||
@@ -248,23 +233,21 @@ Run the following command to quickly spin up a DollyV2 server:
|
||||
```bash
|
||||
openllm start databricks/dolly-v2-3b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any DollyV2 variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dolly_v2) to see more DollyV2-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following DollyV2 models via `openllm start`:
|
||||
|
||||
|
||||
- [databricks/dolly-v2-3b](https://huggingface.co/databricks/dolly-v2-3b)
|
||||
- [databricks/dolly-v2-7b](https://huggingface.co/databricks/dolly-v2-7b)
|
||||
- [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
|
||||
@@ -275,7 +258,6 @@ You can specify any of the following DollyV2 models via `openllm start`:
|
||||
|
||||
<summary>Falcon</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Falcon server:
|
||||
@@ -283,23 +265,21 @@ Run the following command to quickly spin up a Falcon server:
|
||||
```bash
|
||||
openllm start tiiuae/falcon-7b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Falcon variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=falcon) to see more Falcon-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Falcon models via `openllm start`:
|
||||
|
||||
|
||||
- [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b)
|
||||
- [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
|
||||
- [tiiuae/falcon-7b-instruct](https://huggingface.co/tiiuae/falcon-7b-instruct)
|
||||
@@ -311,7 +291,6 @@ You can specify any of the following Falcon models via `openllm start`:
|
||||
|
||||
<summary>Gemma</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Gemma server:
|
||||
@@ -319,23 +298,21 @@ Run the following command to quickly spin up a Gemma server:
|
||||
```bash
|
||||
openllm start google/gemma-7b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Gemma variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=gemma) to see more Gemma-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Gemma models via `openllm start`:
|
||||
|
||||
|
||||
- [google/gemma-7b](https://huggingface.co/google/gemma-7b)
|
||||
- [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)
|
||||
- [google/gemma-2b](https://huggingface.co/google/gemma-2b)
|
||||
@@ -347,7 +324,6 @@ You can specify any of the following Gemma models via `openllm start`:
|
||||
|
||||
<summary>GPTNeoX</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a GPTNeoX server:
|
||||
@@ -355,23 +331,21 @@ Run the following command to quickly spin up a GPTNeoX server:
|
||||
```bash
|
||||
openllm start eleutherai/gpt-neox-20b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any GPTNeoX variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=gpt_neox) to see more GPTNeoX-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following GPTNeoX models via `openllm start`:
|
||||
|
||||
|
||||
- [eleutherai/gpt-neox-20b](https://huggingface.co/eleutherai/gpt-neox-20b)
|
||||
|
||||
</details>
|
||||
@@ -380,7 +354,6 @@ You can specify any of the following GPTNeoX models via `openllm start`:
|
||||
|
||||
<summary>Llama</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Llama server:
|
||||
@@ -388,23 +361,21 @@ Run the following command to quickly spin up a Llama server:
|
||||
```bash
|
||||
openllm start NousResearch/llama-2-7b-hf --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Llama variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=llama) to see more Llama-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Llama models via `openllm start`:
|
||||
|
||||
|
||||
- [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf)
|
||||
- [meta-llama/Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf)
|
||||
- [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
|
||||
@@ -424,7 +395,6 @@ You can specify any of the following Llama models via `openllm start`:
|
||||
|
||||
<summary>Mistral</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Mistral server:
|
||||
@@ -432,23 +402,21 @@ Run the following command to quickly spin up a Mistral server:
|
||||
```bash
|
||||
openllm start mistralai/Mistral-7B-Instruct-v0.1 --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Mistral variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mistral) to see more Mistral-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Mistral models via `openllm start`:
|
||||
|
||||
|
||||
- [HuggingFaceH4/zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha)
|
||||
- [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
|
||||
- [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
|
||||
@@ -461,7 +429,6 @@ You can specify any of the following Mistral models via `openllm start`:
|
||||
|
||||
<summary>Mixtral</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Mixtral server:
|
||||
@@ -469,23 +436,21 @@ Run the following command to quickly spin up a Mixtral server:
|
||||
```bash
|
||||
openllm start mistralai/Mixtral-8x7B-Instruct-v0.1 --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Mixtral variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mixtral) to see more Mixtral-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Mixtral models via `openllm start`:
|
||||
|
||||
|
||||
- [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
|
||||
- [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)
|
||||
|
||||
@@ -495,7 +460,6 @@ You can specify any of the following Mixtral models via `openllm start`:
|
||||
|
||||
<summary>MPT</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a MPT server:
|
||||
@@ -503,23 +467,21 @@ Run the following command to quickly spin up a MPT server:
|
||||
```bash
|
||||
openllm start mosaicml/mpt-7b-instruct --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any MPT variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=mpt) to see more MPT-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following MPT models via `openllm start`:
|
||||
|
||||
|
||||
- [mosaicml/mpt-7b](https://huggingface.co/mosaicml/mpt-7b)
|
||||
- [mosaicml/mpt-7b-instruct](https://huggingface.co/mosaicml/mpt-7b-instruct)
|
||||
- [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
|
||||
@@ -534,7 +496,6 @@ You can specify any of the following MPT models via `openllm start`:
|
||||
|
||||
<summary>OPT</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a OPT server:
|
||||
@@ -542,23 +503,21 @@ Run the following command to quickly spin up a OPT server:
|
||||
```bash
|
||||
openllm start facebook/opt-1.3b
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any OPT variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=opt) to see more OPT-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following OPT models via `openllm start`:
|
||||
|
||||
|
||||
- [facebook/opt-125m](https://huggingface.co/facebook/opt-125m)
|
||||
- [facebook/opt-350m](https://huggingface.co/facebook/opt-350m)
|
||||
- [facebook/opt-1.3b](https://huggingface.co/facebook/opt-1.3b)
|
||||
@@ -572,7 +531,6 @@ You can specify any of the following OPT models via `openllm start`:
|
||||
|
||||
<summary>Phi</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Phi server:
|
||||
@@ -580,23 +538,21 @@ Run the following command to quickly spin up a Phi server:
|
||||
```bash
|
||||
openllm start microsoft/Phi-3-mini-4k-instruct --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Phi variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=phi) to see more Phi-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Phi models via `openllm start`:
|
||||
|
||||
|
||||
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)
|
||||
- [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)
|
||||
- [microsoft/Phi-3-small-8k-instruct](https://huggingface.co/microsoft/Phi-3-small-8k-instruct)
|
||||
@@ -610,7 +566,6 @@ You can specify any of the following Phi models via `openllm start`:
|
||||
|
||||
<summary>Qwen</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Qwen server:
|
||||
@@ -618,23 +573,21 @@ Run the following command to quickly spin up a Qwen server:
|
||||
```bash
|
||||
openllm start qwen/Qwen-7B-Chat --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Qwen variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=qwen) to see more Qwen-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Qwen models via `openllm start`:
|
||||
|
||||
|
||||
- [qwen/Qwen-7B-Chat](https://huggingface.co/qwen/Qwen-7B-Chat)
|
||||
- [qwen/Qwen-7B-Chat-Int8](https://huggingface.co/qwen/Qwen-7B-Chat-Int8)
|
||||
- [qwen/Qwen-7B-Chat-Int4](https://huggingface.co/qwen/Qwen-7B-Chat-Int4)
|
||||
@@ -648,7 +601,6 @@ You can specify any of the following Qwen models via `openllm start`:
|
||||
|
||||
<summary>StableLM</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a StableLM server:
|
||||
@@ -656,23 +608,21 @@ Run the following command to quickly spin up a StableLM server:
|
||||
```bash
|
||||
openllm start stabilityai/stablelm-tuned-alpha-3b --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any StableLM variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=stablelm) to see more StableLM-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following StableLM models via `openllm start`:
|
||||
|
||||
|
||||
- [stabilityai/stablelm-tuned-alpha-3b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b)
|
||||
- [stabilityai/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
|
||||
- [stabilityai/stablelm-base-alpha-3b](https://huggingface.co/stabilityai/stablelm-base-alpha-3b)
|
||||
@@ -684,7 +634,6 @@ You can specify any of the following StableLM models via `openllm start`:
|
||||
|
||||
<summary>StarCoder</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a StarCoder server:
|
||||
@@ -692,23 +641,21 @@ Run the following command to quickly spin up a StarCoder server:
|
||||
```bash
|
||||
openllm start bigcode/starcoder --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any StarCoder variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=starcoder) to see more StarCoder-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following StarCoder models via `openllm start`:
|
||||
|
||||
|
||||
- [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
|
||||
- [bigcode/starcoderbase](https://huggingface.co/bigcode/starcoderbase)
|
||||
|
||||
@@ -718,7 +665,6 @@ You can specify any of the following StarCoder models via `openllm start`:
|
||||
|
||||
<summary>Yi</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
Run the following command to quickly spin up a Yi server:
|
||||
@@ -726,23 +672,21 @@ Run the following command to quickly spin up a Yi server:
|
||||
```bash
|
||||
openllm start 01-ai/Yi-6B --trust-remote-code
|
||||
```
|
||||
|
||||
You can run the following code in a different terminal to interact with the server:
|
||||
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Yi variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=yi) to see more Yi-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Yi models via `openllm start`:
|
||||
|
||||
|
||||
- [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B)
|
||||
- [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
|
||||
- [01-ai/Yi-6B-200K](https://huggingface.co/01-ai/Yi-6B-200K)
|
||||
@@ -830,8 +774,6 @@ async for it in llm.astream('What is the difference between a duck and a goose?
|
||||
print(it, flush=True, end='')
|
||||
```
|
||||
|
||||
<!-- hatch-fancy-pypi-readme interim stop -->
|
||||
|
||||
<!-- hatch-fancy-pypi-readme meta start -->
|
||||
|
||||
## 🚀 Deploying models to production
|
||||
|
||||
84
hatch.toml
84
hatch.toml
@@ -1,84 +0,0 @@
|
||||
[envs.default]
|
||||
installer = "uv"
|
||||
type = "virtual"
|
||||
dependencies = [
|
||||
"openllm-core @ {root:uri}/openllm-core",
|
||||
"openllm-client @ {root:uri}/openllm-client",
|
||||
"openllm[chatglm,fine-tune] @ {root:uri}/openllm-python",
|
||||
# NOTE: To run all hooks
|
||||
"pre-commit",
|
||||
# NOTE: towncrier for changelog
|
||||
"towncrier",
|
||||
"tomlkit",
|
||||
# NOTE: For fancy PyPI readme
|
||||
"hatch-fancy-pypi-readme",
|
||||
# NOTE: For working with shell pipe
|
||||
"plumbum",
|
||||
# The below sync with mypyc deps and pre-commit mypy
|
||||
"types-psutil",
|
||||
"types-tabulate",
|
||||
"types-PyYAML",
|
||||
"types-protobuf",
|
||||
]
|
||||
[envs.default.scripts]
|
||||
changelog = "towncrier build --version main --draft"
|
||||
inplace-changelog = "towncrier build --version main --keep"
|
||||
setup = [
|
||||
"pre-commit install",
|
||||
"- ln -s .python-version-default .python-version",
|
||||
]
|
||||
quality = [
|
||||
"bash ./local.sh --tools",
|
||||
"- pre-commit run --all-files",
|
||||
"- pnpm format",
|
||||
]
|
||||
tool = ["quality", "bash ./clean.sh", 'python ./cz.py']
|
||||
[envs.tests]
|
||||
installer = "uv"
|
||||
type = "virtual"
|
||||
dependencies = [
|
||||
"openllm @ {root:uri}/openllm-python",
|
||||
"openllm-core @ {root:uri}/openllm-core",
|
||||
"openllm-client @ {root:uri}/openllm-client",
|
||||
# NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy
|
||||
"coverage[toml]>=6.5",
|
||||
"filelock>=3.7.1",
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
"pytest-mock",
|
||||
"pytest-randomly",
|
||||
"pytest-rerunfailures",
|
||||
"pytest-asyncio>=0.21.0",
|
||||
"pytest-xdist[psutil]",
|
||||
"trustme",
|
||||
"hypothesis",
|
||||
"syrupy",
|
||||
]
|
||||
skip-install = false
|
||||
template = "tests"
|
||||
[envs.tests.scripts]
|
||||
_run_script = "pytest --cov --cov-report={env:COVERAGE_REPORT:term-missing} --cov-config=pyproject.toml -vv"
|
||||
python = "_run_script -r aR -x {args:openllm-python/tests}"
|
||||
[envs.tests.overrides]
|
||||
env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT="
|
||||
[envs.coverage]
|
||||
dependencies = ["coverage[toml]>=6.5", "lxml", "orjson"]
|
||||
detached = true
|
||||
[envs.coverage.scripts]
|
||||
combine = "coverage combine {args}"
|
||||
generate-summary = "python tools/generate-coverage.py"
|
||||
report-uncovered-html = "coverage html --skip-covered --skip-empty {args}"
|
||||
report-xml = "coverage xml {args}"
|
||||
write-summary-report = "python tools/write-coverage-report.py"
|
||||
[envs.ui]
|
||||
detached = true
|
||||
[envs.ui.scripts]
|
||||
server = "openllm start {args:facebook/opt-250m} --working-dir {root:uri} --cors --debug"
|
||||
[envs.ci]
|
||||
detached = true
|
||||
[envs.ci.scripts]
|
||||
client-stubs = "bash openllm-client/generate-grpc-stubs"
|
||||
compile = "bash ./tools/compile.sh {args}"
|
||||
recompile = ["bash ./clean.sh", "compile"]
|
||||
edi = "bash local.sh"
|
||||
lock = ["bash tools/lock-actions.sh"]
|
||||
@@ -1,338 +0,0 @@
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import (
|
||||
DeploymentTarget,
|
||||
can_run,
|
||||
get_local_machine_spec,
|
||||
)
|
||||
from openllm_next.analytic import DO_NOT_TRACK, OpenLLMTyper
|
||||
from openllm_next.clean import app as clean_app
|
||||
from openllm_next.cloud import deploy as cloud_deploy
|
||||
from openllm_next.cloud import ensure_cloud_context, get_cloud_machine_spec
|
||||
from openllm_next.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
|
||||
from openllm_next.local import run as local_run
|
||||
from openllm_next.local import serve as local_serve
|
||||
from openllm_next.model import app as model_app
|
||||
from openllm_next.model import ensure_bento, list_bento
|
||||
from openllm_next.repo import app as repo_app
|
||||
|
||||
app = OpenLLMTyper(
|
||||
help="`openllm hello` to get started. "
|
||||
"OpenLLM is a CLI tool to manage and deploy open source LLMs and"
|
||||
" get an OpenAI API compatible chat server in seconds.",
|
||||
)
|
||||
|
||||
app.add_typer(repo_app, name="repo")
|
||||
app.add_typer(model_app, name="model")
|
||||
app.add_typer(clean_app, name="clean")
|
||||
|
||||
|
||||
def _select_bento_name(models, target):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
model_infos = [
|
||||
[model.repo.name, model.name, can_run(model, target)] for model in models
|
||||
]
|
||||
model_name_groups = defaultdict(lambda: 0)
|
||||
for repo, name, score in model_infos:
|
||||
model_name_groups[(repo, name)] += score
|
||||
table_data = [
|
||||
[name, repo, CHECKED if score > 0 else ""]
|
||||
for (repo, name), score in model_name_groups.items()
|
||||
]
|
||||
if not table_data:
|
||||
output("No model found", style="red")
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(
|
||||
table_data,
|
||||
headers=["model", "repo", "locally runnable"],
|
||||
).split("\n")
|
||||
headers = f"{table[0]}\n {table[1]}"
|
||||
|
||||
options.append(questionary.Separator(headers))
|
||||
for table_data, table_line in zip(table_data, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data[:2]))
|
||||
selected = questionary.select("Select a model", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_bento_version(models, target, bento_name, repo):
|
||||
from tabulate import tabulate
|
||||
|
||||
model_infos = [
|
||||
[model, can_run(model, target)]
|
||||
for model in models
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
|
||||
table_data = [
|
||||
[model.tag, CHECKED if score > 0 else ""]
|
||||
for model, score in model_infos
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
if not table_data:
|
||||
output(f"No model found for {bento_name} in {repo}", style="red")
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(
|
||||
table_data,
|
||||
headers=["version", "locally runnable"],
|
||||
).split("\n")
|
||||
|
||||
options = []
|
||||
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
|
||||
for table_data, table_line in zip(model_infos, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data))
|
||||
selected = questionary.select("Select a version", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_target(bento, targets):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output(
|
||||
"No available instance type, check your bentocloud account",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
table = tabulate(
|
||||
[
|
||||
[
|
||||
target.name,
|
||||
target.accelerators_repr,
|
||||
f"${target.price}",
|
||||
CHECKED if can_run(bento, target) else "insufficient res.",
|
||||
]
|
||||
for target in targets
|
||||
],
|
||||
headers=["instance type", "accelerator", "price/hr", "deployable"],
|
||||
).split("\n")
|
||||
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
|
||||
|
||||
for target, line in zip(targets, table[2:]):
|
||||
options.append(
|
||||
questionary.Choice(
|
||||
f"{line}",
|
||||
value=target,
|
||||
)
|
||||
)
|
||||
selected = questionary.select("Select an instance type", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_action(bento, score):
|
||||
if score > 0:
|
||||
options = [
|
||||
questionary.Separator("Available actions"),
|
||||
questionary.Choice(
|
||||
"0. Run the model in terminal",
|
||||
value="run",
|
||||
shortcut_key="0",
|
||||
),
|
||||
questionary.Separator(f" $ openllm run {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"1. Serve the model locally and get a chat server",
|
||||
value="serve",
|
||||
shortcut_key="1",
|
||||
),
|
||||
questionary.Separator(f" $ openllm serve {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"2. Deploy the model to bentocloud and get a scalable chat server",
|
||||
value="deploy",
|
||||
shortcut_key="2",
|
||||
),
|
||||
questionary.Separator(f" $ openllm deploy {bento}"),
|
||||
]
|
||||
else:
|
||||
options = [
|
||||
questionary.Separator("Available actions"),
|
||||
questionary.Choice(
|
||||
"0. Run the model in terminal",
|
||||
value="run",
|
||||
disabled="insufficient res.",
|
||||
shortcut_key="0",
|
||||
),
|
||||
questionary.Separator(f" $ openllm run {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"1. Serve the model locally and get a chat server",
|
||||
value="serve",
|
||||
disabled="insufficient res.",
|
||||
shortcut_key="1",
|
||||
),
|
||||
questionary.Separator(f" $ openllm serve {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"2. Deploy the model to bentocloud and get a scalable chat server",
|
||||
value="deploy",
|
||||
shortcut_key="2",
|
||||
),
|
||||
questionary.Separator(f" $ openllm deploy {bento}"),
|
||||
]
|
||||
action = questionary.select("Select an action", options).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
if action == "run":
|
||||
try:
|
||||
local_run(bento)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(f" $ openllm run {bento}", style="orange")
|
||||
elif action == "serve":
|
||||
try:
|
||||
local_serve(bento)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(f" $ openllm serve {bento}", style="orange")
|
||||
elif action == "deploy":
|
||||
ensure_cloud_context()
|
||||
targets = get_cloud_machine_spec()
|
||||
target = _select_target(bento, targets)
|
||||
try:
|
||||
cloud_deploy(bento, target)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(
|
||||
f" $ openllm deploy {bento} --instance-type {target.name}",
|
||||
style="orange",
|
||||
)
|
||||
|
||||
|
||||
@app.command(help="get started interactively")
|
||||
def hello():
|
||||
INTERACTIVE.set(True)
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
target = get_local_machine_spec()
|
||||
output(f" Detected Platform: {target.platform}", style="green")
|
||||
if target.accelerators:
|
||||
output(" Detected Accelerators: ", style="green")
|
||||
for a in target.accelerators:
|
||||
output(f" - {a.model} {a.memory_size}GB", style="green")
|
||||
else:
|
||||
output(" Detected Accelerators: None", style="yellow")
|
||||
|
||||
models = list_bento()
|
||||
if not models:
|
||||
output(
|
||||
"No model found, you probably need to update the model repo:",
|
||||
style="red",
|
||||
)
|
||||
output(
|
||||
" $ openllm repo update",
|
||||
style="orange",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
bento_name, repo = _select_bento_name(models, target)
|
||||
bento, score = _select_bento_version(models, target, bento_name, repo)
|
||||
_select_action(bento, score)
|
||||
|
||||
|
||||
@app.command(help="start an OpenAI API compatible chat server and chat in browser")
|
||||
def serve(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
repo: Optional[str] = None,
|
||||
port: int = 3000,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
|
||||
|
||||
@app.command(help="run the model and chat in terminal")
|
||||
def run(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
repo: Optional[str] = None,
|
||||
port: Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
|
||||
|
||||
@app.command(
|
||||
help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)",
|
||||
)
|
||||
def deploy(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
instance_type: Optional[str] = None,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento = ensure_bento(model, repo_name=repo)
|
||||
if instance_type is not None:
|
||||
cloud_deploy(bento, DeploymentTarget(name=instance_type))
|
||||
return
|
||||
targets = get_cloud_machine_spec()
|
||||
targets = filter(lambda x: can_run(bento, x) > 0, targets)
|
||||
targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output(
|
||||
"No available instance type, check your bentocloud account",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
target = targets[0]
|
||||
output(
|
||||
f"Recommended instance type: {target.name}",
|
||||
style="green",
|
||||
)
|
||||
cloud_deploy(bento, target)
|
||||
|
||||
|
||||
@app.callback(invoke_without_command=True)
|
||||
def typer_callback(
|
||||
verbose: int = 0,
|
||||
do_not_track: bool = typer.Option(
|
||||
False,
|
||||
"--do-not-track",
|
||||
help="Whether to disable usage tracking",
|
||||
envvar=DO_NOT_TRACK,
|
||||
),
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(verbose)
|
||||
if do_not_track:
|
||||
os.environ[DO_NOT_TRACK] = str(True)
|
||||
|
||||
|
||||
def main():
|
||||
if sys.version_info < (3, 9):
|
||||
output("Python 3.8 or higher is required", style="red")
|
||||
sys.exit(1)
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,166 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import math
|
||||
import typing
|
||||
from types import SimpleNamespace
|
||||
|
||||
import psutil
|
||||
|
||||
from openllm_next.common import BentoInfo, DeploymentTarget, output
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.model}({self.memory_size}GB)"
|
||||
|
||||
|
||||
class Resource(SimpleNamespace):
|
||||
cpu: int = 0
|
||||
memory: float
|
||||
gpu: int = 0
|
||||
gpu_type: str = ""
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
|
||||
|
||||
def __bool__(self):
|
||||
return any(value is not None for value in self.__dict__.values())
|
||||
|
||||
|
||||
ACCELERATOR_SPEC_DICT: dict[str, dict] = {
|
||||
"nvidia-gtx-1650": {"model": "GTX 1650", "memory_size": 4.0},
|
||||
"nvidia-gtx-1060": {"model": "GTX 1060", "memory_size": 6.0},
|
||||
"nvidia-gtx-1080-ti": {"model": "GTX 1080 Ti", "memory_size": 11.0},
|
||||
"nvidia-rtx-3060": {"model": "RTX 3060", "memory_size": 12.0},
|
||||
"nvidia-rtx-3060-ti": {"model": "RTX 3060 Ti", "memory_size": 8.0},
|
||||
"nvidia-rtx-3070-ti": {"model": "RTX 3070 Ti", "memory_size": 8.0},
|
||||
"nvidia-rtx-3080": {"model": "RTX 3080", "memory_size": 10.0},
|
||||
"nvidia-rtx-3080-ti": {"model": "RTX 3080 Ti", "memory_size": 12.0},
|
||||
"nvidia-rtx-3090": {"model": "RTX 3090", "memory_size": 24.0},
|
||||
"nvidia-rtx-4070-ti": {"model": "RTX 4070 Ti", "memory_size": 12.0},
|
||||
"nvidia-tesla-p4": {"model": "P4", "memory_size": 8.0},
|
||||
"nvidia-tesla-p100": {"model": "P100", "memory_size": 16.0},
|
||||
"nvidia-tesla-k80": {"model": "K80", "memory_size": 12.0},
|
||||
"nvidia-tesla-t4": {"model": "T4", "memory_size": 16.0},
|
||||
"nvidia-tesla-v100": {"model": "V100", "memory_size": 16.0},
|
||||
"nvidia-l4": {"model": "L4", "memory_size": 24.0},
|
||||
"nvidia-tesla-l4": {"model": "L4", "memory_size": 24.0},
|
||||
"nvidia-tesla-a10g": {"model": "A10G", "memory_size": 24.0},
|
||||
"nvidia-a100-80g": {"model": "A100", "memory_size": 80.0},
|
||||
"nvidia-a100-80gb": {"model": "A100", "memory_size": 80.0},
|
||||
"nvidia-tesla-a100": {"model": "A100", "memory_size": 40.0},
|
||||
}
|
||||
|
||||
|
||||
ACCELERATOR_SPECS: dict[str, Accelerator] = {
|
||||
key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()
|
||||
}
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_local_machine_spec():
|
||||
if psutil.MACOS:
|
||||
return DeploymentTarget(accelerators=[], source="local", platform="macos")
|
||||
|
||||
if psutil.WINDOWS:
|
||||
platform = "windows"
|
||||
elif psutil.LINUX:
|
||||
platform = "linux"
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported platform")
|
||||
|
||||
from pynvml import (
|
||||
nvmlDeviceGetCount,
|
||||
nvmlDeviceGetCudaComputeCapability,
|
||||
nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetMemoryInfo,
|
||||
nvmlDeviceGetName,
|
||||
nvmlInit,
|
||||
nvmlShutdown,
|
||||
)
|
||||
|
||||
try:
|
||||
nvmlInit()
|
||||
device_count = nvmlDeviceGetCount()
|
||||
accelerators: list[Accelerator] = []
|
||||
for i in range(device_count):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
name = nvmlDeviceGetName(handle)
|
||||
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
||||
accelerators.append(
|
||||
Accelerator(
|
||||
model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
|
||||
)
|
||||
)
|
||||
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
|
||||
if compute_capability < (7, 5):
|
||||
output(
|
||||
f"GPU {name} with compute capability {compute_capability} "
|
||||
"may not be supported, 7.5 or higher is recommended. check "
|
||||
"https://developer.nvidia.com/cuda-gpus for more information",
|
||||
style="yellow",
|
||||
)
|
||||
nvmlShutdown()
|
||||
return DeploymentTarget(
|
||||
accelerators=accelerators,
|
||||
source="local",
|
||||
platform=platform,
|
||||
)
|
||||
except Exception as e:
|
||||
output(
|
||||
f"Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment",
|
||||
style="yellow",
|
||||
)
|
||||
output(f"Error: {e}", style="red", level=20)
|
||||
return DeploymentTarget(accelerators=[], source="local", platform=platform)
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def can_run(
|
||||
bento: typing.Union[Resource, BentoInfo],
|
||||
target: typing.Optional[DeploymentTarget] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate if the bento can be deployed on the target.
|
||||
"""
|
||||
if target is None:
|
||||
target = get_local_machine_spec()
|
||||
|
||||
resource_spec = Resource(**(bento.bento_yaml["services"][0]["config"].get("resources", {})))
|
||||
labels = bento.bento_yaml.get("labels", {})
|
||||
platforms = labels.get("platforms", "linux").split(",")
|
||||
|
||||
if target.platform not in platforms:
|
||||
return 0.0
|
||||
|
||||
# return 1.0 if no resource is specified
|
||||
if not resource_spec:
|
||||
return 0.5
|
||||
|
||||
if resource_spec.gpu > 0:
|
||||
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
|
||||
filtered_accelerators = [
|
||||
ac
|
||||
for ac in target.accelerators
|
||||
if ac.memory_size >= required_gpu.memory_size
|
||||
]
|
||||
if resource_spec.gpu > len(filtered_accelerators):
|
||||
return 0.0
|
||||
return (
|
||||
required_gpu.memory_size
|
||||
* resource_spec.gpu
|
||||
/ sum(ac.memory_size for ac in target.accelerators)
|
||||
)
|
||||
if target.accelerators:
|
||||
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
|
||||
return 1.0
|
||||
@@ -1,118 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import typing
|
||||
from abc import ABC
|
||||
|
||||
import attr
|
||||
import click
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
DO_NOT_TRACK = "BENTOML_DO_NOT_TRACK"
|
||||
|
||||
|
||||
class EventMeta(ABC):
|
||||
@property
|
||||
def event_name(self):
|
||||
# camel case to snake case
|
||||
event_name = re.sub(r"(?<!^)(?=[A-Z])", "_", self.__class__.__name__).lower()
|
||||
# remove "_event" suffix
|
||||
suffix_to_remove = "_event"
|
||||
if event_name.endswith(suffix_to_remove):
|
||||
event_name = event_name[: -len(suffix_to_remove)]
|
||||
return event_name
|
||||
|
||||
|
||||
@attr.define
|
||||
class CliEvent(EventMeta):
|
||||
cmd_group: str
|
||||
cmd_name: str
|
||||
duration_in_ms: float = attr.field(default=0)
|
||||
error_type: typing.Optional[str] = attr.field(default=None)
|
||||
return_code: typing.Optional[int] = attr.field(default=None)
|
||||
|
||||
|
||||
@attr.define
|
||||
class OpenllmCliEvent(CliEvent):
|
||||
pass
|
||||
|
||||
|
||||
class OrderedCommands(typer.core.TyperGroup):
|
||||
def list_commands(self, _: click.Context) -> typing.Iterable[str]:
|
||||
return list(self.commands)
|
||||
|
||||
|
||||
class OpenLLMTyper(typer.Typer):
|
||||
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
no_args_is_help = kwargs.pop("no_args_is_help", True)
|
||||
context_settings = kwargs.pop("context_settings", {})
|
||||
if "help_option_names" not in context_settings:
|
||||
context_settings["help_option_names"] = ("-h", "--help")
|
||||
if "max_content_width" not in context_settings:
|
||||
context_settings["max_content_width"] = int(
|
||||
os.environ.get("COLUMNS", str(120))
|
||||
)
|
||||
klass = kwargs.pop("cls", OrderedCommands)
|
||||
|
||||
super().__init__(
|
||||
*args,
|
||||
cls=klass,
|
||||
no_args_is_help=no_args_is_help,
|
||||
context_settings=context_settings,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def command(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
@click.pass_context
|
||||
def wrapped(ctx: click.Context, *args, **kwargs):
|
||||
from bentoml._internal.utils.analytics import track
|
||||
|
||||
do_not_track = (
|
||||
os.environ.get(DO_NOT_TRACK, str(False)).lower() == "true"
|
||||
)
|
||||
|
||||
# so we know that the root program is openllm
|
||||
command_name = ctx.info_name
|
||||
if ctx.parent.parent is not None:
|
||||
# openllm model list
|
||||
command_group = ctx.parent.info_name
|
||||
elif ctx.parent.info_name == ctx.find_root().info_name:
|
||||
# openllm run
|
||||
command_group = "openllm"
|
||||
|
||||
if do_not_track:
|
||||
return f(*args, **kwargs)
|
||||
start_time = time.time_ns()
|
||||
try:
|
||||
return_value = f(*args, **kwargs)
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
)
|
||||
)
|
||||
return return_value
|
||||
except BaseException as e:
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
error_type=type(e).__name__,
|
||||
return_code=2 if isinstance(e, KeyboardInterrupt) else 1,
|
||||
)
|
||||
)
|
||||
raise
|
||||
|
||||
return typer.Typer.command(self, *args, **kwargs)(wrapped)
|
||||
|
||||
return decorator
|
||||
@@ -1,75 +0,0 @@
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
import questionary
|
||||
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
CONFIG_FILE,
|
||||
REPO_DIR,
|
||||
VENV_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
output,
|
||||
)
|
||||
|
||||
app = OpenLLMTyper(help="clean up and release disk space used by OpenLLM")
|
||||
|
||||
|
||||
HUGGINGFACE_CACHE = pathlib.Path.home() / ".cache" / "huggingface" / "hub"
|
||||
|
||||
|
||||
@app.command(help="Clean up all the cached models from huggingface")
|
||||
def model_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in HUGGINGFACE_CACHE.rglob("*"))
|
||||
sure = questionary.confirm(
|
||||
f"This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?"
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
|
||||
output("All models cached by Huggingface have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Clean up all the virtual environments created by OpenLLM")
|
||||
def venvs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in VENV_DIR.rglob("*"))
|
||||
sure = questionary.confirm(
|
||||
f"This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?"
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(VENV_DIR, ignore_errors=True)
|
||||
output("All virtual environments have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Clean up all the repositories cloned by OpenLLM")
|
||||
def repos(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(REPO_DIR, ignore_errors=True)
|
||||
output("All repositories have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Reset configurations to default")
|
||||
def configs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
|
||||
output("All configurations have been reset", style="green")
|
||||
|
||||
|
||||
@app.command(
|
||||
name="all",
|
||||
help="Clean up all above and bring OpenLLM to a fresh start",
|
||||
)
|
||||
def all_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
repos()
|
||||
venvs()
|
||||
model_cache()
|
||||
configs()
|
||||
@@ -1,174 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing
|
||||
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
INTERACTIVE,
|
||||
BentoInfo,
|
||||
DeploymentTarget,
|
||||
output,
|
||||
run_command,
|
||||
)
|
||||
|
||||
app = OpenLLMTyper()
|
||||
|
||||
|
||||
def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None):
|
||||
cmd = ["bentoml", "deploy", bento.bentoml_tag]
|
||||
env = {
|
||||
"BENTOML_HOME": f"{bento.repo.path}/bentoml",
|
||||
}
|
||||
|
||||
required_envs = bento.bento_yaml.get("envs", [])
|
||||
required_env_names = [env["name"] for env in required_envs if "name" in env]
|
||||
if required_env_names:
|
||||
output(
|
||||
f"This model requires the following environment variables to run: {repr(required_env_names)}",
|
||||
style="yellow",
|
||||
)
|
||||
|
||||
for env_info in bento.bento_yaml.get("envs", []):
|
||||
if "name" not in env_info:
|
||||
continue
|
||||
if os.environ.get(env_info["name"]):
|
||||
default = os.environ[env_info["name"]]
|
||||
elif "value" in env_info:
|
||||
default = env_info["value"]
|
||||
else:
|
||||
default = ""
|
||||
|
||||
if INTERACTIVE.get():
|
||||
import questionary
|
||||
|
||||
value = questionary.text(
|
||||
f"{env_info['name']}:",
|
||||
default=default,
|
||||
).ask()
|
||||
else:
|
||||
if default == "":
|
||||
output(
|
||||
f"Environment variable {env_info['name']} is required but not provided",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
value = default
|
||||
|
||||
if value is None:
|
||||
raise typer.Exit(1)
|
||||
cmd += ["--env", f"{env_info['name']}={value}"]
|
||||
|
||||
if target:
|
||||
cmd += ["--instance-type", target.name]
|
||||
|
||||
assert (pathlib.Path.home() / "bentoml" / ".yatai.yaml").exists()
|
||||
shutil.copy(
|
||||
pathlib.Path.home() / "bentoml" / ".yatai.yaml",
|
||||
bento.repo.path / "bentoml" / ".yatai.yaml",
|
||||
)
|
||||
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def ensure_cloud_context():
|
||||
import questionary
|
||||
|
||||
cmd = ["bentoml", "cloud", "current-context"]
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
context = json.loads(result)
|
||||
output(f" bentoml already logged in: {context['endpoint']}", style="green")
|
||||
except subprocess.CalledProcessError:
|
||||
output(" bentoml not logged in", style="red")
|
||||
if not INTERACTIVE.get():
|
||||
output(
|
||||
"\n get bentoml logged in by:",
|
||||
)
|
||||
output(
|
||||
" $ bentoml cloud login",
|
||||
style="orange",
|
||||
)
|
||||
output("")
|
||||
output(
|
||||
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
|
||||
style="yellow",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
action = questionary.select(
|
||||
"Choose an action:",
|
||||
choices=[
|
||||
"I have a BentoCloud account",
|
||||
"get an account in two minutes",
|
||||
],
|
||||
).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
elif action == "get an account in two minutes":
|
||||
output(
|
||||
"Please visit https://cloud.bentoml.com to get your token",
|
||||
style="yellow",
|
||||
)
|
||||
endpoint = questionary.text(
|
||||
"Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)"
|
||||
).ask()
|
||||
if endpoint is None:
|
||||
raise typer.Exit(1)
|
||||
token = questionary.text(
|
||||
"Enter your token: (similar to cniluaxxxxxxxx)"
|
||||
).ask()
|
||||
if token is None:
|
||||
raise typer.Exit(1)
|
||||
cmd = [
|
||||
"bentoml",
|
||||
"cloud",
|
||||
"login",
|
||||
"--api-token",
|
||||
token,
|
||||
"--endpoint",
|
||||
endpoint,
|
||||
]
|
||||
try:
|
||||
result = subprocess.check_output(cmd)
|
||||
output(" Logged in successfully", style="green")
|
||||
except subprocess.CalledProcessError:
|
||||
output(" Failed to login", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def get_cloud_machine_spec():
|
||||
ensure_cloud_context()
|
||||
cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"]
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
instance_types = json.loads(result)
|
||||
return [
|
||||
DeploymentTarget(
|
||||
source="cloud",
|
||||
name=it["name"],
|
||||
price=it["price"],
|
||||
platform="linux",
|
||||
accelerators=(
|
||||
[ACCELERATOR_SPECS[it["gpu_type"]] for _ in range(int(it["gpu"]))]
|
||||
if it.get("gpu") and it["gpu_type"] in ACCELERATOR_SPECS
|
||||
else []
|
||||
),
|
||||
)
|
||||
for it in instance_types
|
||||
]
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
||||
output("Failed to get cloud instance types", style="red")
|
||||
return []
|
||||
|
||||
|
||||
def deploy(bento: BentoInfo, target: DeploymentTarget):
|
||||
ensure_cloud_context()
|
||||
cmd, env, cwd = _get_deploy_cmd(bento, target)
|
||||
run_command(cmd, env=env, cwd=cwd)
|
||||
@@ -1,422 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import sysconfig
|
||||
import typing
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from types import SimpleNamespace
|
||||
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
ERROR_STYLE = "red"
|
||||
SUCCESS_STYLE = "green"
|
||||
|
||||
|
||||
CLLAMA_HOME = pathlib.Path.home() / ".openllm_next"
|
||||
REPO_DIR = CLLAMA_HOME / "repos"
|
||||
TEMP_DIR = CLLAMA_HOME / "temp"
|
||||
VENV_DIR = CLLAMA_HOME / "venv"
|
||||
|
||||
REPO_DIR.mkdir(exist_ok=True, parents=True)
|
||||
TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
||||
VENV_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
CONFIG_FILE = CLLAMA_HOME / "config.json"
|
||||
|
||||
CHECKED = "☆"
|
||||
|
||||
T = typing.TypeVar("T")
|
||||
|
||||
|
||||
class ContextVar(typing.Generic[T]):
|
||||
def __init__(self, default: T):
|
||||
self._stack: list[T] = []
|
||||
self._default = default
|
||||
|
||||
def get(self) -> T:
|
||||
if self._stack:
|
||||
return self._stack[-1]
|
||||
return self._default
|
||||
|
||||
def set(self, value):
|
||||
self._stack.append(value)
|
||||
|
||||
@contextmanager
|
||||
def patch(self, value):
|
||||
self._stack.append(value)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._stack.pop()
|
||||
|
||||
|
||||
VERBOSE_LEVEL = ContextVar(10)
|
||||
INTERACTIVE = ContextVar(False)
|
||||
FORCE = ContextVar(False)
|
||||
|
||||
|
||||
def output(content, level=0, style=None, end=None):
|
||||
import questionary
|
||||
|
||||
if level > VERBOSE_LEVEL.get():
|
||||
return
|
||||
|
||||
if not isinstance(content, str):
|
||||
import pyaml
|
||||
|
||||
out = io.StringIO()
|
||||
pyaml.pprint(
|
||||
content,
|
||||
dst=out,
|
||||
sort_dicts=False,
|
||||
sort_keys=False,
|
||||
)
|
||||
questionary.print(out.getvalue(), style=style, end="" if end is None else end)
|
||||
out.close()
|
||||
|
||||
if isinstance(content, str):
|
||||
questionary.print(content, style=style, end="\n" if end is None else end)
|
||||
|
||||
|
||||
class Config(SimpleNamespace):
|
||||
repos: dict[str, str] = {
|
||||
"default": "git+https://github.com/bentoml/openllm-models@main"
|
||||
}
|
||||
default_repo: str = "default"
|
||||
|
||||
def tolist(self):
|
||||
return dict(
|
||||
repos=self.repos,
|
||||
default_repo=self.default_repo,
|
||||
)
|
||||
|
||||
|
||||
def load_config():
|
||||
if CONFIG_FILE.exists():
|
||||
try:
|
||||
with open(CONFIG_FILE) as f:
|
||||
return Config(**json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
return Config()
|
||||
return Config()
|
||||
|
||||
|
||||
def save_config(config):
|
||||
with open(CONFIG_FILE, "w") as f:
|
||||
json.dump(config.tolist(), f, indent=2)
|
||||
|
||||
|
||||
class RepoInfo(SimpleNamespace):
|
||||
name: str
|
||||
path: pathlib.Path
|
||||
url: str
|
||||
server: str
|
||||
owner: str
|
||||
repo: str
|
||||
branch: str
|
||||
|
||||
def tolist(self):
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
return f"{self.name} ({self.url})"
|
||||
if VERBOSE_LEVEL.get() <= 10:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=self.url,
|
||||
path=str(self.path),
|
||||
)
|
||||
if VERBOSE_LEVEL.get() <= 20:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=self.url,
|
||||
path=str(self.path),
|
||||
server=self.server,
|
||||
owner=self.owner,
|
||||
repo=self.repo,
|
||||
branch=self.branch,
|
||||
)
|
||||
|
||||
|
||||
class BentoInfo(SimpleNamespace):
|
||||
repo: RepoInfo
|
||||
path: pathlib.Path
|
||||
alias: str = ""
|
||||
|
||||
def __str__(self):
|
||||
if self.repo.name == "default":
|
||||
return f"{self.tag}"
|
||||
else:
|
||||
return f"{self.repo.name}/{self.tag}"
|
||||
|
||||
def __hash__(self):
|
||||
return md5(str(self.path))
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
if self.alias:
|
||||
return f"{self.path.parent.name}:{self.alias}"
|
||||
return f"{self.path.parent.name}:{self.path.name}"
|
||||
|
||||
@property
|
||||
def bentoml_tag(self) -> str:
|
||||
return f"{self.path.parent.name}:{self.path.name}"
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.path.parent.name
|
||||
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self.path.name
|
||||
|
||||
@property
|
||||
def labels(self) -> dict[str, str]:
|
||||
return self.bento_yaml["labels"]
|
||||
|
||||
@functools.cached_property
|
||||
def bento_yaml(self) -> dict:
|
||||
import yaml
|
||||
|
||||
bento_file = self.path / "bento.yaml"
|
||||
return yaml.safe_load(bento_file.read_text())
|
||||
|
||||
@functools.cached_property
|
||||
def platforms(self) -> list[str]:
|
||||
return self.bento_yaml["labels"].get("platforms", "linux").split(",")
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_yaml(self) -> dict:
|
||||
def _pretty_routes(routes):
|
||||
return {
|
||||
route["route"]: {
|
||||
"input": {
|
||||
k: v["type"] for k, v in route["input"]["properties"].items()
|
||||
},
|
||||
"output": route["output"]["type"],
|
||||
}
|
||||
for route in routes
|
||||
}
|
||||
|
||||
if len(self.bento_yaml["services"]) == 1:
|
||||
pretty_yaml = {
|
||||
"apis": _pretty_routes(self.bento_yaml["schema"]["routes"]),
|
||||
"resources": self.bento_yaml["services"][0]["config"]["resources"],
|
||||
"envs": self.bento_yaml["envs"],
|
||||
"platforms": self.platforms,
|
||||
}
|
||||
return pretty_yaml
|
||||
return self.bento_yaml
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_gpu(self) -> str:
|
||||
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
|
||||
|
||||
try:
|
||||
resources = self.bento_yaml["services"][0]["config"]["resources"]
|
||||
if resources["gpu"] > 1:
|
||||
acc = ACCELERATOR_SPECS[resources["gpu_type"]]
|
||||
return f"{acc.memory_size:.0f}Gx{resources['gpu']}"
|
||||
elif resources["gpu"] > 0:
|
||||
acc = ACCELERATOR_SPECS[resources["gpu_type"]]
|
||||
return f"{acc.memory_size:.0f}G"
|
||||
except KeyError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def tolist(self):
|
||||
verbose = VERBOSE_LEVEL.get()
|
||||
if verbose <= 0:
|
||||
return str(self)
|
||||
if verbose <= 10:
|
||||
return dict(
|
||||
tag=self.tag,
|
||||
repo=self.repo.tolist(),
|
||||
path=str(self.path),
|
||||
model_card=self.pretty_yaml,
|
||||
)
|
||||
if verbose <= 20:
|
||||
return dict(
|
||||
tag=self.tag,
|
||||
repo=self.repo.tolist(),
|
||||
path=str(self.path),
|
||||
bento_yaml=self.bento_yaml,
|
||||
)
|
||||
|
||||
|
||||
class VenvSpec(SimpleNamespace):
|
||||
python_version: str
|
||||
python_packages: dict[str, str]
|
||||
name_prefix = ""
|
||||
|
||||
def __hash__(self):
|
||||
return md5(
|
||||
# self.python_version,
|
||||
*sorted(self.python_packages),
|
||||
)
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
|
||||
class DeploymentTarget(SimpleNamespace):
|
||||
source: str = "local"
|
||||
name: str = "local"
|
||||
price: str = ""
|
||||
platform = "linux"
|
||||
accelerators: list[Accelerator]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.source)
|
||||
|
||||
@property
|
||||
def accelerators_repr(self) -> str:
|
||||
accs = {a.model for a in self.accelerators}
|
||||
if len(accs) == 0:
|
||||
return "null"
|
||||
if len(accs) == 1:
|
||||
a = self.accelerators[0]
|
||||
return f"{a.model} x{len(self.accelerators)}"
|
||||
return ", ".join((f"{a.model}" for a in self.accelerators))
|
||||
|
||||
|
||||
def run_command(
|
||||
cmd,
|
||||
cwd=None,
|
||||
env=None,
|
||||
copy_env=True,
|
||||
venv=None,
|
||||
silent=False,
|
||||
) -> subprocess.CompletedProcess:
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
bin_dir = "Scripts" if os.name == "nt" else "bin"
|
||||
if not silent:
|
||||
output("\n")
|
||||
if cwd:
|
||||
output(f"$ cd {cwd}", style="orange")
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f"$ export {k}={shlex.quote(v)}", style="orange")
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style="orange")
|
||||
output(f"$ {' '.join(cmd)}", style="orange")
|
||||
|
||||
if venv:
|
||||
py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}"
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == "bentoml":
|
||||
cmd = [py, "-m", "bentoml"] + cmd[1:]
|
||||
if cmd and cmd[0] == "python":
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
try:
|
||||
if silent:
|
||||
return subprocess.run( # type: ignore
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
else:
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
output("Command failed", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
async def stream_command_output(stream, style="gray"):
|
||||
async for line in stream:
|
||||
output(line.decode(), style=style, end="")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def async_run_command(
|
||||
cmd,
|
||||
cwd=None,
|
||||
env=None,
|
||||
copy_env=True,
|
||||
venv=None,
|
||||
silent=True,
|
||||
):
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
|
||||
if not silent:
|
||||
output("\n")
|
||||
if cwd:
|
||||
output(f"$ cd {cwd}", style="orange")
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f"$ export {k}={shlex.quote(v)}", style="orange")
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style="orange")
|
||||
output(f"$ {' '.join(cmd)}", style="orange")
|
||||
|
||||
if venv:
|
||||
py = venv / "bin" / "python"
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == "bentoml":
|
||||
cmd = [py, "-m", "bentoml"] + cmd[1:]
|
||||
if cmd and cmd[0] == "python":
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
" ".join(map(str, cmd)),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
yield proc
|
||||
except subprocess.CalledProcessError:
|
||||
output("Command failed", style="red")
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
if proc:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
await proc.wait()
|
||||
|
||||
|
||||
def md5(*strings: str) -> int:
|
||||
m = hashlib.md5()
|
||||
for s in strings:
|
||||
m.update(s.encode())
|
||||
return int(m.hexdigest(), 16)
|
||||
@@ -1,117 +0,0 @@
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from openllm_next.common import (
|
||||
BentoInfo,
|
||||
async_run_command,
|
||||
output,
|
||||
run_command,
|
||||
stream_command_output,
|
||||
)
|
||||
from openllm_next.venv import ensure_venv
|
||||
|
||||
|
||||
def _get_serve_cmd(bento: BentoInfo, port: int = 3000):
|
||||
cmd = ["bentoml", "serve", bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ["--port", str(port)]
|
||||
env = {
|
||||
"BENTOML_HOME": f"{bento.repo.path}/bentoml",
|
||||
}
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def serve(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port=port)
|
||||
run_command(cmd, env=env, cwd=cwd, venv=venv)
|
||||
|
||||
|
||||
async def _run_model(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
timeout: int = 600,
|
||||
):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port)
|
||||
async with async_run_command(
|
||||
cmd,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
venv=venv,
|
||||
silent=False,
|
||||
) as server_proc:
|
||||
|
||||
output(f"Model server started {server_proc.pid}")
|
||||
|
||||
stdout_streamer = None
|
||||
stderr_streamer = None
|
||||
start_time = time.time()
|
||||
|
||||
output("Model loading...", style="green")
|
||||
for _ in range(timeout):
|
||||
try:
|
||||
resp = httpx.get(f"http://localhost:{port}/readyz", timeout=3)
|
||||
if resp.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
if time.time() - start_time > 30:
|
||||
if not stdout_streamer:
|
||||
stdout_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stdout, style="gray")
|
||||
)
|
||||
if not stderr_streamer:
|
||||
stderr_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stderr, style="#BD2D0F")
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
output("Model failed to load", style="red")
|
||||
server_proc.terminate()
|
||||
return
|
||||
|
||||
if stdout_streamer:
|
||||
stdout_streamer.cancel()
|
||||
if stderr_streamer:
|
||||
stderr_streamer.cancel()
|
||||
|
||||
output("Model is ready", style="green")
|
||||
messages: list[dict[str, str]] = []
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
client = AsyncOpenAI(base_url=f"http://localhost:{port}/v1", api_key="local")
|
||||
model_id = (await client.models.list()).data[0].id
|
||||
while True:
|
||||
try:
|
||||
message = input("user: ")
|
||||
if message == "":
|
||||
output("empty message, please enter something", style="yellow")
|
||||
continue
|
||||
messages.append(dict(role="user", content=message))
|
||||
output("assistant: ", end="", style="lightgreen")
|
||||
assistant_message = ""
|
||||
stream = await client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=messages, # type: ignore
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in stream:
|
||||
text = chunk.choices[0].delta.content or ""
|
||||
assistant_message += text
|
||||
output(text, end="", style="lightgreen")
|
||||
messages.append(dict(role="assistant", content=assistant_message))
|
||||
output("")
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output("\nStopping model server...", style="green")
|
||||
output("Stopped model server", style="green")
|
||||
|
||||
|
||||
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600):
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
@@ -1,173 +0,0 @@
|
||||
import typing
|
||||
from typing import Optional
|
||||
|
||||
import tabulate
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import DeploymentTarget, can_run
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
FORCE,
|
||||
VERBOSE_LEVEL,
|
||||
BentoInfo,
|
||||
load_config,
|
||||
output,
|
||||
)
|
||||
from openllm_next.repo import ensure_repo_updated, parse_repo_url
|
||||
|
||||
app = OpenLLMTyper(help="manage models")
|
||||
|
||||
|
||||
@app.command()
|
||||
def get(
|
||||
tag: str,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento_info = ensure_bento(tag, repo_name=repo)
|
||||
if bento_info:
|
||||
output(bento_info)
|
||||
|
||||
|
||||
@app.command(name="list")
|
||||
def list_(
|
||||
tag: Optional[str] = None,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
bentos = list_bento(tag=tag, repo_name=repo)
|
||||
bentos.sort(key=lambda x: x.name)
|
||||
|
||||
seen = set()
|
||||
|
||||
def is_seen(value):
|
||||
if value in seen:
|
||||
return True
|
||||
seen.add(value)
|
||||
return False
|
||||
|
||||
table = tabulate.tabulate(
|
||||
[
|
||||
[
|
||||
"" if is_seen(bento.name) else bento.name,
|
||||
bento.tag,
|
||||
bento.repo.name,
|
||||
bento.pretty_gpu,
|
||||
",".join(bento.platforms),
|
||||
]
|
||||
for bento in bentos
|
||||
],
|
||||
headers=["model", "version", "repo", "required VRAM", "platforms"],
|
||||
)
|
||||
output(table)
|
||||
|
||||
|
||||
def ensure_bento(
|
||||
model: str,
|
||||
target: Optional[DeploymentTarget] = None,
|
||||
repo_name: Optional[str] = None,
|
||||
) -> BentoInfo:
|
||||
bentos = list_bento(model, repo_name=repo_name)
|
||||
if len(bentos) == 0:
|
||||
output(f"No model found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) == 1:
|
||||
if FORCE.get():
|
||||
output(f"Found model {bentos[0]}", style="green")
|
||||
return bentos[0]
|
||||
if target is None:
|
||||
return bentos[0]
|
||||
if can_run(bentos[0], target) <= 0:
|
||||
return bentos[0]
|
||||
output(f"Found model {bentos[0]}", style="green")
|
||||
return bentos[0]
|
||||
|
||||
if target is None:
|
||||
output(
|
||||
f"Multiple models match {model}, did you mean one of these?",
|
||||
style="red",
|
||||
)
|
||||
for bento in bentos:
|
||||
output(f" {bento}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
filtered = [bento for bento in bentos if can_run(bento, target) > 0]
|
||||
if len(filtered) == 0:
|
||||
output(f"No deployment target found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(filtered) == 0:
|
||||
output(f"No deployment target found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) > 1:
|
||||
output(
|
||||
f"Multiple models match {model}, did you mean one of these?",
|
||||
style="red",
|
||||
)
|
||||
for bento in bentos:
|
||||
output(f" {bento}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
return bentos[0]
|
||||
|
||||
|
||||
def list_bento(
|
||||
tag: typing.Optional[str] = None,
|
||||
repo_name: typing.Optional[str] = None,
|
||||
include_alias: bool = False,
|
||||
) -> typing.List[BentoInfo]:
|
||||
ensure_repo_updated()
|
||||
|
||||
if repo_name is not None:
|
||||
config = load_config()
|
||||
if repo_name not in config.repos:
|
||||
output(f"Repo `{repo_name}` not found, did you mean one of these?")
|
||||
for repo_name in config.repos:
|
||||
output(f" {repo_name}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not tag:
|
||||
glob_pattern = "bentoml/bentos/*/*"
|
||||
elif ":" in tag:
|
||||
bento_name, version = tag.split(":")
|
||||
glob_pattern = f"bentoml/bentos/{bento_name}/{version}"
|
||||
else:
|
||||
glob_pattern = f"bentoml/bentos/{tag}/*"
|
||||
|
||||
model_list = []
|
||||
config = load_config()
|
||||
for _repo_name, repo_url in config.repos.items():
|
||||
if repo_name is not None and _repo_name != repo_name:
|
||||
continue
|
||||
repo = parse_repo_url(repo_url, _repo_name)
|
||||
for path in repo.path.glob(glob_pattern):
|
||||
if path.is_dir() and (path / "bento.yaml").exists():
|
||||
model = BentoInfo(repo=repo, path=path)
|
||||
elif path.is_file():
|
||||
with open(path) as f:
|
||||
origin_name = f.read().strip()
|
||||
origin_path = path.parent / origin_name
|
||||
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
|
||||
else:
|
||||
model = None
|
||||
if model:
|
||||
model_list.append(model)
|
||||
model_list.sort(key=lambda x: x.tag)
|
||||
if not include_alias:
|
||||
seen = set()
|
||||
model_list = [
|
||||
x
|
||||
for x in model_list
|
||||
if not (
|
||||
f"{x.bento_yaml['name']}:{x.bento_yaml['version']}" in seen
|
||||
or seen.add(f"{x.bento_yaml['name']}:{x.bento_yaml['version']}")
|
||||
)
|
||||
]
|
||||
return model_list
|
||||
@@ -1,203 +0,0 @@
|
||||
import datetime
|
||||
import re
|
||||
import shutil
|
||||
|
||||
import pyaml
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
INTERACTIVE,
|
||||
REPO_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
RepoInfo,
|
||||
load_config,
|
||||
output,
|
||||
save_config,
|
||||
)
|
||||
|
||||
UPDATE_INTERVAL = datetime.timedelta(days=3)
|
||||
|
||||
app = OpenLLMTyper(help="manage repos")
|
||||
|
||||
|
||||
@app.command()
|
||||
def list(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
config = load_config()
|
||||
pyaml.pprint(
|
||||
[parse_repo_url(repo, name) for name, repo in config.repos.items()],
|
||||
sort_dicts=False,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def remove(name: str):
|
||||
config = load_config()
|
||||
if name not in config.repos:
|
||||
output(f"Repo {name} does not exist", style="red")
|
||||
return
|
||||
|
||||
del config.repos[name]
|
||||
save_config(config)
|
||||
output(f"Repo {name} removed", style="green")
|
||||
|
||||
|
||||
def _complete_alias(repo_name: str):
|
||||
from openllm_next.model import list_bento
|
||||
|
||||
for bento in list_bento(repo_name=repo_name):
|
||||
alias = bento.labels.get("openllm_alias", "").strip()
|
||||
if alias:
|
||||
for a in alias.split(","):
|
||||
with open(bento.path.parent / a, "w") as f:
|
||||
f.write(bento.version)
|
||||
|
||||
|
||||
@app.command()
|
||||
def update():
|
||||
import dulwich
|
||||
import dulwich.errors
|
||||
import dulwich.porcelain
|
||||
|
||||
config = load_config()
|
||||
repos_in_use = set()
|
||||
for repo_name, repo in config.repos.items():
|
||||
repo = parse_repo_url(repo, repo_name)
|
||||
repos_in_use.add((repo.server, repo.owner, repo.repo))
|
||||
if repo.path.exists(): # TODO: use update instead of remove and clone
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
if not repo.path.exists():
|
||||
repo.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
dulwich.porcelain.clone(
|
||||
f"https://{repo.server}/{repo.owner}/{repo.repo}.git",
|
||||
str(repo.path),
|
||||
checkout=True,
|
||||
depth=1,
|
||||
branch=repo.branch,
|
||||
)
|
||||
output("")
|
||||
output(f"Repo `{repo.name}` updated", style="green")
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f"Failed to clone repo {repo.name}", style="red")
|
||||
else:
|
||||
try:
|
||||
import dulwich.porcelain
|
||||
|
||||
dulwich.porcelain.pull(
|
||||
str(repo.path),
|
||||
f"https://{repo.server}/{repo.owner}/{repo.repo}.git",
|
||||
refspecs=repo.branch,
|
||||
force=True,
|
||||
)
|
||||
dulwich.porcelain.clean(str(repo.path), str(repo.path))
|
||||
output("")
|
||||
output(f"Repo `{repo.name}` updated", style="green")
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f"Failed to update repo {repo.name}", style="red")
|
||||
for c in REPO_DIR.glob("*/*/*"):
|
||||
repo_spec = tuple(c.parts[-3:])
|
||||
if repo_spec not in repos_in_use:
|
||||
shutil.rmtree(c, ignore_errors=True)
|
||||
output(f"Removed unused repo cache {c}")
|
||||
with open(REPO_DIR / "last_update", "w") as f:
|
||||
f.write(datetime.datetime.now().isoformat())
|
||||
for repo_name in config.repos:
|
||||
_complete_alias(repo_name)
|
||||
|
||||
|
||||
def ensure_repo_updated():
|
||||
last_update_file = REPO_DIR / "last_update"
|
||||
if not last_update_file.exists():
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
"The repo cache is never updated, do you want to update it to fetch the latest model list?"
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
return
|
||||
else:
|
||||
output(
|
||||
"The repo cache is never updated, please run `openllm repo update` to fetch the latest model list",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
|
||||
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
"The repo cache is outdated, do you want to update it to fetch the latest model list?"
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
else:
|
||||
output(
|
||||
"The repo cache is outdated, please run `openllm repo update` to fetch the latest model list",
|
||||
style="yellow",
|
||||
)
|
||||
|
||||
|
||||
GIT_REPO_RE = re.compile(
|
||||
r"git\+https://(?P<server>.+)/(?P<owner>.+)/(?P<repo>.+?)(@(?P<branch>.+))?$"
|
||||
)
|
||||
|
||||
|
||||
def parse_repo_url(repo_url, repo_name=None) -> RepoInfo:
|
||||
"""
|
||||
parse the git repo url to server, owner, repo name, branch
|
||||
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm@main")
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm")
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
"""
|
||||
match = GIT_REPO_RE.match(repo_url)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid git repo url: {repo_url}")
|
||||
server = match.group("server")
|
||||
owner = match.group("owner")
|
||||
repo = match.group("repo")
|
||||
branch = match.group("branch") or "main"
|
||||
path = REPO_DIR / server / owner / repo
|
||||
return RepoInfo(
|
||||
name=repo if repo_name is None else repo_name,
|
||||
url=repo_url,
|
||||
server=server,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
path=path,
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def add(name: str, repo: str):
|
||||
name = name.lower()
|
||||
if not name.isidentifier():
|
||||
output(
|
||||
f"Invalid repo name: {name}, should only contain letters, numbers and underscores",
|
||||
style="red",
|
||||
)
|
||||
return
|
||||
|
||||
config = load_config()
|
||||
if name in config.repos:
|
||||
override = questionary.confirm(
|
||||
f"Repo {name} already exists({config.repos[name]}), override?"
|
||||
).ask()
|
||||
if not override:
|
||||
return
|
||||
|
||||
config.repos[name] = repo
|
||||
save_config(config)
|
||||
output(f"Repo {name} added", style="green")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
@@ -1,164 +0,0 @@
|
||||
import functools
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import typing
|
||||
from typing import Iterable
|
||||
|
||||
import typer
|
||||
|
||||
from openllm_next.common import (
|
||||
VENV_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
BentoInfo,
|
||||
VenvSpec,
|
||||
output,
|
||||
run_command,
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_packages(requirement: typing.Union[pathlib.Path, str]):
|
||||
from pip_requirements_parser import RequirementsFile
|
||||
|
||||
requirements_txt = RequirementsFile.from_file(
|
||||
str(requirement),
|
||||
include_nested=True,
|
||||
)
|
||||
return requirements_txt.requirements
|
||||
|
||||
|
||||
def _filter_preheat_packages(requirements: Iterable) -> list[str]:
|
||||
PREHEAT_PIP_PACKAGES = ["torch", "vllm"]
|
||||
|
||||
deps: list[str] = []
|
||||
for req in requirements:
|
||||
if (
|
||||
req.is_editable
|
||||
or req.is_local_path
|
||||
or req.is_url
|
||||
or req.is_wheel
|
||||
or not req.name
|
||||
or not req.specifier
|
||||
):
|
||||
continue
|
||||
for sp in req.specifier:
|
||||
if sp.operator == "==" and req.name in PREHEAT_PIP_PACKAGES:
|
||||
assert req.line is not None
|
||||
deps.append(req.line)
|
||||
break
|
||||
return deps
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_bento_env_specs(bento: BentoInfo):
|
||||
ver_file = bento.path / "env" / "python" / "version.txt"
|
||||
assert ver_file.exists(), f"cannot find version file in {bento.path}"
|
||||
|
||||
lock_file = bento.path / "env" / "python" / "requirements.lock.txt"
|
||||
if not lock_file.exists():
|
||||
lock_file = bento.path / "env" / "python" / "requirements.txt"
|
||||
|
||||
reqs = _resolve_packages(lock_file)
|
||||
preheat_packages = _filter_preheat_packages(reqs)
|
||||
ver = ver_file.read_text().strip()
|
||||
return (
|
||||
VenvSpec(
|
||||
python_version=ver,
|
||||
python_packages=preheat_packages,
|
||||
name_prefix=f"{bento.tag.replace(':', '_')}-1-",
|
||||
),
|
||||
VenvSpec(
|
||||
python_version=ver,
|
||||
python_packages=[v.line for v in reqs],
|
||||
name_prefix=f"{bento.tag.replace(':', '_')}-2-",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path:
|
||||
if os.name == "nt":
|
||||
return venv / "Lib/site-packages"
|
||||
else:
|
||||
return next(venv.glob("lib/python*")) / "site-packages"
|
||||
|
||||
|
||||
def _ensure_venv(
|
||||
env_spec: VenvSpec,
|
||||
parrent_venv: typing.Optional[pathlib.Path] = None,
|
||||
) -> pathlib.Path:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if venv.exists() and not (venv / "DONE").exists():
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if not venv.exists():
|
||||
output(f"Installing model dependencies({venv})...", style="green")
|
||||
|
||||
venv_py = (
|
||||
venv / "Scripts" / "python.exe"
|
||||
if os.name == "nt"
|
||||
else venv / "bin" / "python"
|
||||
)
|
||||
try:
|
||||
run_command(
|
||||
["python", "-m", "uv", "venv", venv],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
lib_dir = _get_lib_dir(venv)
|
||||
if parrent_venv is not None:
|
||||
parent_lib_dir = _get_lib_dir(parrent_venv)
|
||||
with open(lib_dir / f"{parrent_venv.name}.pth", "w+") as f:
|
||||
f.write(str(parent_lib_dir))
|
||||
with open(venv / "requirements.txt", "w") as f:
|
||||
f.write("\n".join(sorted(env_spec.python_packages)))
|
||||
run_command(
|
||||
[
|
||||
"python",
|
||||
"-m",
|
||||
"uv",
|
||||
"pip",
|
||||
"install",
|
||||
"-p",
|
||||
str(venv_py),
|
||||
"-r",
|
||||
venv / "requirements.txt",
|
||||
],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
with open(venv / "DONE", "w") as f:
|
||||
f.write("DONE")
|
||||
except Exception:
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
output(
|
||||
f"Failed to install dependencies to {venv}. Cleaned up.",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
output(f"Successfully installed dependencies to {venv}.", style="green")
|
||||
return venv
|
||||
else:
|
||||
return venv
|
||||
|
||||
|
||||
def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path:
|
||||
last_venv = None
|
||||
for env_spec in env_spec_list:
|
||||
last_venv = _ensure_venv(env_spec, last_venv)
|
||||
assert last_venv is not None
|
||||
return last_venv
|
||||
|
||||
|
||||
def ensure_venv(bento: BentoInfo) -> pathlib.Path:
|
||||
return _ensure_venvs(_resolve_bento_env_specs(bento))
|
||||
|
||||
|
||||
def _check_venv(env_spec: VenvSpec) -> bool:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if not venv.exists():
|
||||
return False
|
||||
if venv.exists() and not (venv / "DONE").exists():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_venv(bento: BentoInfo) -> bool:
|
||||
return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento))
|
||||
57
package.json
57
package.json
@@ -1,57 +0,0 @@
|
||||
{
|
||||
"name": "@bentoml/openllm-monorepo",
|
||||
"version": "0.5.7",
|
||||
"description": "OpenLLM: Operating LLMs in production",
|
||||
"author": "Aaron Pham <29749331+aarnphm@users.noreply.github.com>",
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/bentoml/OpenLLM.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/bentoml/OpenLLM/issues"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"lint": "eslint . --cache",
|
||||
"check": "npx prettier . --check",
|
||||
"format": "npx prettier . --write"
|
||||
},
|
||||
"packageManager": "pnpm@8.6.3",
|
||||
"devDependencies": {
|
||||
"@edge-runtime/vm": "3.1.6",
|
||||
"@next/eslint-plugin-next": "13.5.5",
|
||||
"@types/node": "20.8.7",
|
||||
"@typescript-eslint/eslint-plugin": "6.10.0",
|
||||
"@typescript-eslint/parser": "6.8.0",
|
||||
"eslint": "^8.53.0",
|
||||
"eslint-config-prettier": "9.0.0",
|
||||
"eslint-plugin-import": "2.29.0",
|
||||
"eslint-plugin-react": "7.33.2",
|
||||
"eslint-plugin-react-hooks": "4.6.0",
|
||||
"eslint-plugin-tailwindcss": "3.13.0",
|
||||
"eslint-plugin-unicorn": "48.0.1",
|
||||
"prettier": "^3.0.3",
|
||||
"prettier-plugin-pkg": "0.18.0",
|
||||
"prettier-plugin-tailwindcss": "0.5.6",
|
||||
"turbo": "1.10.16",
|
||||
"typescript": "5.2.2"
|
||||
},
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
"vitest": "^0.27.1",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"@mdx-js/react": "^2.3.0",
|
||||
"@vercel/analytics": "1.0.2",
|
||||
"next-themes": "^0.2.1",
|
||||
"next": "^13.4.8",
|
||||
"@types/react": "^18.0.15",
|
||||
"@types/react-dom": "^18.0.6"
|
||||
}
|
||||
}
|
||||
}
|
||||
183
pyproject.toml
183
pyproject.toml
@@ -1,13 +1,34 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=42", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "openllm-next"
|
||||
version = "0.0.1"
|
||||
description = "A description of your package."
|
||||
authors = [{name = "oasiszero", email = "oasis0.com@gmail.com"}]
|
||||
license = {file = "LICENSE"}
|
||||
name = "openllm"
|
||||
description = "OpenLLM: Self-hosting LLMs Made Easy."
|
||||
readme = { file = "README.md", content-type = "text/markdown" }
|
||||
authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }]
|
||||
dynamic = ["version"]
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: GPU :: NVIDIA CUDA",
|
||||
"Environment :: GPU :: NVIDIA CUDA :: 12",
|
||||
"Environment :: GPU :: NVIDIA CUDA :: 11.8",
|
||||
"Environment :: GPU :: NVIDIA CUDA :: 11.7",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Topic :: Software Development :: Libraries",
|
||||
"Operating System :: OS Independent",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Intended Audience :: System Administrators",
|
||||
"Typing :: Typed",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
]
|
||||
dependencies = [
|
||||
"bentoml",
|
||||
"typer",
|
||||
@@ -22,13 +43,147 @@ dependencies = [
|
||||
"uv",
|
||||
"openai==1.35.9",
|
||||
]
|
||||
keywords = [
|
||||
"MLOps",
|
||||
"AI",
|
||||
"BentoML",
|
||||
"Model Serving",
|
||||
"Model Deployment",
|
||||
"LLMOps",
|
||||
"Falcon",
|
||||
"Vicuna",
|
||||
"Llama 2",
|
||||
"Fine tuning",
|
||||
"Serverless",
|
||||
"Large Language Model",
|
||||
"Generative AI",
|
||||
"StableLM",
|
||||
"Alpaca",
|
||||
"PyTorch",
|
||||
"Mistral",
|
||||
"vLLM",
|
||||
"Transformers",
|
||||
]
|
||||
license = "Apache-2.0"
|
||||
requires-python = ">=3.9"
|
||||
|
||||
[project.scripts]
|
||||
openllm = "openllm_next.__main__:main"
|
||||
openllm = "openllm.__main__:app"
|
||||
|
||||
[project.urls]
|
||||
Blog = "https://modelserving.com"
|
||||
Documentation = "https://github.com/bentoml/OpenLLM#readme"
|
||||
GitHub = "https://github.com/bentoml/OpenLLM"
|
||||
Homepage = "https://bentoml.com"
|
||||
Tracker = "https://github.com/bentoml/OpenLLM/issues"
|
||||
Twitter = "https://twitter.com/bentomlai"
|
||||
|
||||
[tool.typer]
|
||||
src-dir = "openllm_next"
|
||||
src-dir = "src/openllm"
|
||||
|
||||
[tool.isort]
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = true
|
||||
[build-system]
|
||||
requires = [
|
||||
"hatchling==1.25.0",
|
||||
"hatch-vcs==0.4.0",
|
||||
"hatch-fancy-pypi-readme==24.1.0",
|
||||
]
|
||||
build-backend = 'hatchling.build'
|
||||
|
||||
[tool.hatch.version]
|
||||
source = "vcs"
|
||||
fallback-version = "0.0.0"
|
||||
[tool.hatch.build.hooks.vcs]
|
||||
version-file = "src/openllm/_version.py"
|
||||
[tool.hatch.version.raw-options]
|
||||
git_describe_command = [
|
||||
"git",
|
||||
"describe",
|
||||
"--dirty",
|
||||
"--tags",
|
||||
"--long",
|
||||
"--first-parent",
|
||||
]
|
||||
version_scheme = "post-release"
|
||||
fallback_version = "0.0.0"
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
[tool.hatch.build.targets.wheel]
|
||||
only-include = ["src/openllm"]
|
||||
sources = ["src"]
|
||||
[tool.hatch.build.targets.sdist]
|
||||
exclude = ["/.git_archival.txt", "/.python-version-default"]
|
||||
|
||||
[tool.hatch.metadata.hooks.fancy-pypi-readme]
|
||||
content-type = "text/markdown"
|
||||
# PyPI doesn't support the <picture> tag.
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
text = """
|
||||
<p align="center">
|
||||
<a href="https://github.com/bentoml/openllm">
|
||||
<img src="https://raw.githubusercontent.com/bentoml/openllm/main/.github/assets/main-banner.png" alt="Banner for OpenLLM" />
|
||||
</a>
|
||||
</p>
|
||||
"""
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
end-before = "\n<!-- hatch-fancy-pypi-readme intro stop -->"
|
||||
path = "README.md"
|
||||
start-after = "<!-- hatch-fancy-pypi-readme intro start -->\n"
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
text = """
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/bentoml/openllm/main/.github/assets/output.gif" alt="Gif showing OpenLLM Intro" />
|
||||
</p>
|
||||
"""
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
end-before = "\n<!-- hatch-fancy-pypi-readme meta stop -->"
|
||||
path = "README.md"
|
||||
start-after = "<!-- hatch-fancy-pypi-readme meta start -->\n"
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
text = """
|
||||
## Release Information
|
||||
"""
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
path = "CHANGELOG.md"
|
||||
pattern = "\n(###.+?\n)## "
|
||||
start-after = "<!-- towncrier release notes start -->"
|
||||
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
|
||||
text = """
|
||||
---
|
||||
[Click me for full changelog](https://github.com/bentoml/openllm/blob/main/CHANGELOG.md)
|
||||
"""
|
||||
|
||||
[tool.towncrier]
|
||||
directory = "changelog.d"
|
||||
filename = "CHANGELOG.md"
|
||||
issue_format = "[#{issue}](https://github.com/bentoml/openllm/issues/{issue})"
|
||||
name = "openllm"
|
||||
start_string = "<!-- towncrier release notes start -->\n"
|
||||
template = "changelog.d/template.md.jinja"
|
||||
title_format = ""
|
||||
underlines = ["", "", ""]
|
||||
[[tool.towncrier.section]]
|
||||
path = ""
|
||||
[[tool.towncrier.type]]
|
||||
directory = "breaking"
|
||||
name = "Backwards-incompatible Changes"
|
||||
showcontent = true
|
||||
[[tool.towncrier.type]]
|
||||
directory = "deprecation"
|
||||
name = "Deprecations"
|
||||
showcontent = true
|
||||
[[tool.towncrier.type]]
|
||||
directory = "change"
|
||||
name = "Changes"
|
||||
showcontent = true
|
||||
[[tool.towncrier.type]]
|
||||
directory = "refactor"
|
||||
name = "Refactor"
|
||||
showcontent = true
|
||||
[[tool.towncrier.type]]
|
||||
directory = "feature"
|
||||
name = "Features"
|
||||
showcontent = true
|
||||
[[tool.towncrier.type]]
|
||||
directory = "fix"
|
||||
name = "Bug fix"
|
||||
showcontent = true
|
||||
|
||||
274
src/openllm/__main__.py
Normal file
274
src/openllm/__main__.py
Normal file
@@ -0,0 +1,274 @@
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import platform
|
||||
import importlib.metadata
|
||||
from collections import defaultdict
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm.accelerator_spec import DeploymentTarget, can_run, get_local_machine_spec
|
||||
from openllm.analytic import DO_NOT_TRACK, OpenLLMTyper
|
||||
from openllm.clean import app as clean_app
|
||||
from openllm.cloud import deploy as cloud_deploy
|
||||
from openllm.cloud import ensure_cloud_context, get_cloud_machine_spec
|
||||
from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
|
||||
from openllm.local import run as local_run
|
||||
from openllm.local import serve as local_serve
|
||||
from openllm.model import app as model_app
|
||||
from openllm.model import ensure_bento, list_bento
|
||||
from openllm.repo import app as repo_app
|
||||
|
||||
app = OpenLLMTyper(
|
||||
help='`openllm hello` to get started. '
|
||||
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
|
||||
' get an OpenAI API compatible chat server in seconds.'
|
||||
)
|
||||
|
||||
app.add_typer(repo_app, name='repo')
|
||||
app.add_typer(model_app, name='model')
|
||||
app.add_typer(clean_app, name='clean')
|
||||
|
||||
|
||||
def _select_bento_name(models, target):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
model_infos = [[model.repo.name, model.name, can_run(model, target)] for model in models]
|
||||
model_name_groups = defaultdict(lambda: 0)
|
||||
for repo, name, score in model_infos:
|
||||
model_name_groups[(repo, name)] += score
|
||||
table_data = [[name, repo, CHECKED if score > 0 else ''] for (repo, name), score in model_name_groups.items()]
|
||||
if not table_data:
|
||||
output('No model found', style='red')
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
|
||||
headers = f'{table[0]}\n {table[1]}'
|
||||
|
||||
options.append(questionary.Separator(headers))
|
||||
for table_data, table_line in zip(table_data, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data[:2]))
|
||||
selected = questionary.select('Select a model', options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_bento_version(models, target, bento_name, repo):
|
||||
from tabulate import tabulate
|
||||
|
||||
model_infos = [
|
||||
[model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
|
||||
table_data = [
|
||||
[model.tag, CHECKED if score > 0 else '']
|
||||
for model, score in model_infos
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
if not table_data:
|
||||
output(f'No model found for {bento_name} in {repo}', style='red')
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
|
||||
|
||||
options = []
|
||||
options.append(questionary.Separator(f'{table[0]}\n {table[1]}'))
|
||||
for table_data, table_line in zip(model_infos, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data))
|
||||
selected = questionary.select('Select a version', options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_target(bento, targets):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
table = tabulate(
|
||||
[
|
||||
[
|
||||
target.name,
|
||||
target.accelerators_repr,
|
||||
f'${target.price}',
|
||||
CHECKED if can_run(bento, target) else 'insufficient res.',
|
||||
]
|
||||
for target in targets
|
||||
],
|
||||
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
|
||||
).split('\n')
|
||||
options.append(questionary.Separator(f'{table[0]}\n {table[1]}'))
|
||||
|
||||
for target, line in zip(targets, table[2:]):
|
||||
options.append(questionary.Choice(f'{line}', value=target))
|
||||
selected = questionary.select('Select an instance type', options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_action(bento, score):
|
||||
if score > 0:
|
||||
options = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
else:
|
||||
options = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice('0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'1. Serve the model locally and get a chat server',
|
||||
value='serve',
|
||||
disabled='insufficient res.',
|
||||
shortcut_key='1',
|
||||
),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
action = questionary.select('Select an action', options).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
if action == 'run':
|
||||
try:
|
||||
local_run(bento)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm run {bento}', style='orange')
|
||||
elif action == 'serve':
|
||||
try:
|
||||
local_serve(bento)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm serve {bento}', style='orange')
|
||||
elif action == 'deploy':
|
||||
ensure_cloud_context()
|
||||
targets = get_cloud_machine_spec()
|
||||
target = _select_target(bento, targets)
|
||||
try:
|
||||
cloud_deploy(bento, target)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')
|
||||
|
||||
|
||||
@app.command(help='get started interactively')
|
||||
def hello():
|
||||
INTERACTIVE.set(True)
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
target = get_local_machine_spec()
|
||||
output(f' Detected Platform: {target.platform}', style='green')
|
||||
if target.accelerators:
|
||||
output(' Detected Accelerators: ', style='green')
|
||||
for a in target.accelerators:
|
||||
output(f' - {a.model} {a.memory_size}GB', style='green')
|
||||
else:
|
||||
output(' Detected Accelerators: None', style='yellow')
|
||||
|
||||
models = list_bento()
|
||||
if not models:
|
||||
output('No model found, you probably need to update the model repo:', style='red')
|
||||
output(' $ openllm repo update', style='orange')
|
||||
raise typer.Exit(1)
|
||||
|
||||
bento_name, repo = _select_bento_name(models, target)
|
||||
bento, score = _select_bento_version(models, target, bento_name, repo)
|
||||
_select_action(bento, score)
|
||||
|
||||
|
||||
@app.command(help='start an OpenAI API compatible chat server and chat in browser')
|
||||
def serve(
|
||||
model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
|
||||
|
||||
@app.command(help='run the model and chat in terminal')
|
||||
def run(
|
||||
model: Annotated[str, typer.Argument()] = '',
|
||||
repo: Optional[str] = None,
|
||||
port: Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
|
||||
|
||||
@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)')
|
||||
def deploy(
|
||||
model: Annotated[str, typer.Argument()] = '',
|
||||
instance_type: Optional[str] = None,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento = ensure_bento(model, repo_name=repo)
|
||||
if instance_type is not None:
|
||||
cloud_deploy(bento, DeploymentTarget(name=instance_type))
|
||||
return
|
||||
targets = get_cloud_machine_spec()
|
||||
targets = filter(lambda x: can_run(bento, x) > 0, targets)
|
||||
targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
target = targets[0]
|
||||
output(f'Recommended instance type: {target.name}', style='green')
|
||||
cloud_deploy(bento, target)
|
||||
|
||||
|
||||
@app.callback(invoke_without_command=True)
|
||||
def typer_callback(
|
||||
verbose: int = 0,
|
||||
do_not_track: bool = typer.Option(
|
||||
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
|
||||
),
|
||||
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(verbose)
|
||||
if version:
|
||||
output(
|
||||
f"openllm, {importlib.metadata.version('openllm')}\nPython ({platform.python_implementation()}) {platform.python_version()}"
|
||||
)
|
||||
sys.exit(0)
|
||||
if do_not_track:
|
||||
os.environ[DO_NOT_TRACK] = str(True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app()
|
||||
144
src/openllm/accelerator_spec.py
Normal file
144
src/openllm/accelerator_spec.py
Normal file
@@ -0,0 +1,144 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import math
|
||||
import typing
|
||||
from types import SimpleNamespace
|
||||
|
||||
import psutil
|
||||
|
||||
from openllm.common import BentoInfo, DeploymentTarget, output
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.model}({self.memory_size}GB)'
|
||||
|
||||
|
||||
class Resource(SimpleNamespace):
|
||||
cpu: int = 0
|
||||
memory: float
|
||||
gpu: int = 0
|
||||
gpu_type: str = ''
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
|
||||
|
||||
def __bool__(self):
|
||||
return any(value is not None for value in self.__dict__.values())
|
||||
|
||||
|
||||
ACCELERATOR_SPEC_DICT: dict[str, dict] = {
|
||||
'nvidia-gtx-1650': {'model': 'GTX 1650', 'memory_size': 4.0},
|
||||
'nvidia-gtx-1060': {'model': 'GTX 1060', 'memory_size': 6.0},
|
||||
'nvidia-gtx-1080-ti': {'model': 'GTX 1080 Ti', 'memory_size': 11.0},
|
||||
'nvidia-rtx-3060': {'model': 'RTX 3060', 'memory_size': 12.0},
|
||||
'nvidia-rtx-3060-ti': {'model': 'RTX 3060 Ti', 'memory_size': 8.0},
|
||||
'nvidia-rtx-3070-ti': {'model': 'RTX 3070 Ti', 'memory_size': 8.0},
|
||||
'nvidia-rtx-3080': {'model': 'RTX 3080', 'memory_size': 10.0},
|
||||
'nvidia-rtx-3080-ti': {'model': 'RTX 3080 Ti', 'memory_size': 12.0},
|
||||
'nvidia-rtx-3090': {'model': 'RTX 3090', 'memory_size': 24.0},
|
||||
'nvidia-rtx-4070-ti': {'model': 'RTX 4070 Ti', 'memory_size': 12.0},
|
||||
'nvidia-tesla-p4': {'model': 'P4', 'memory_size': 8.0},
|
||||
'nvidia-tesla-p100': {'model': 'P100', 'memory_size': 16.0},
|
||||
'nvidia-tesla-k80': {'model': 'K80', 'memory_size': 12.0},
|
||||
'nvidia-tesla-t4': {'model': 'T4', 'memory_size': 16.0},
|
||||
'nvidia-tesla-v100': {'model': 'V100', 'memory_size': 16.0},
|
||||
'nvidia-l4': {'model': 'L4', 'memory_size': 24.0},
|
||||
'nvidia-tesla-l4': {'model': 'L4', 'memory_size': 24.0},
|
||||
'nvidia-tesla-a10g': {'model': 'A10G', 'memory_size': 24.0},
|
||||
'nvidia-a100-80g': {'model': 'A100', 'memory_size': 80.0},
|
||||
'nvidia-a100-80gb': {'model': 'A100', 'memory_size': 80.0},
|
||||
'nvidia-tesla-a100': {'model': 'A100', 'memory_size': 40.0},
|
||||
}
|
||||
|
||||
|
||||
ACCELERATOR_SPECS: dict[str, Accelerator] = {key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()}
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_local_machine_spec():
|
||||
if psutil.MACOS:
|
||||
return DeploymentTarget(accelerators=[], source='local', platform='macos')
|
||||
|
||||
if psutil.WINDOWS:
|
||||
platform = 'windows'
|
||||
elif psutil.LINUX:
|
||||
platform = 'linux'
|
||||
else:
|
||||
raise NotImplementedError('Unsupported platform')
|
||||
|
||||
from pynvml import (
|
||||
nvmlDeviceGetCount,
|
||||
nvmlDeviceGetCudaComputeCapability,
|
||||
nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetMemoryInfo,
|
||||
nvmlDeviceGetName,
|
||||
nvmlInit,
|
||||
nvmlShutdown,
|
||||
)
|
||||
|
||||
try:
|
||||
nvmlInit()
|
||||
device_count = nvmlDeviceGetCount()
|
||||
accelerators: list[Accelerator] = []
|
||||
for i in range(device_count):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
name = nvmlDeviceGetName(handle)
|
||||
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
||||
accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)))
|
||||
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
|
||||
if compute_capability < (7, 5):
|
||||
output(
|
||||
f'GPU {name} with compute capability {compute_capability} '
|
||||
'may not be supported, 7.5 or higher is recommended. check '
|
||||
'https://developer.nvidia.com/cuda-gpus for more information',
|
||||
style='yellow',
|
||||
)
|
||||
nvmlShutdown()
|
||||
return DeploymentTarget(accelerators=accelerators, source='local', platform=platform)
|
||||
except Exception as e:
|
||||
output(
|
||||
'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment', style='yellow'
|
||||
)
|
||||
output(f'Error: {e}', style='red', level=20)
|
||||
return DeploymentTarget(accelerators=[], source='local', platform=platform)
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def can_run(bento: typing.Union[Resource, BentoInfo], target: typing.Optional[DeploymentTarget] = None) -> float:
|
||||
"""
|
||||
Calculate if the bento can be deployed on the target.
|
||||
"""
|
||||
if target is None:
|
||||
target = get_local_machine_spec()
|
||||
|
||||
resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {})))
|
||||
labels = bento.bento_yaml.get('labels', {})
|
||||
platforms = labels.get('platforms', 'linux').split(',')
|
||||
|
||||
if target.platform not in platforms:
|
||||
return 0.0
|
||||
|
||||
# return 1.0 if no resource is specified
|
||||
if not resource_spec:
|
||||
return 0.5
|
||||
|
||||
if resource_spec.gpu > 0:
|
||||
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
|
||||
filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size]
|
||||
if resource_spec.gpu > len(filtered_accelerators):
|
||||
return 0.0
|
||||
return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators)
|
||||
if target.accelerators:
|
||||
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
|
||||
return 1.0
|
||||
102
src/openllm/analytic.py
Normal file
102
src/openllm/analytic.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import typing
|
||||
from abc import ABC
|
||||
|
||||
import attr
|
||||
import click
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK'
|
||||
|
||||
|
||||
class EventMeta(ABC):
|
||||
@property
|
||||
def event_name(self):
|
||||
# camel case to snake case
|
||||
event_name = re.sub(r'(?<!^)(?=[A-Z])', '_', self.__class__.__name__).lower()
|
||||
# remove "_event" suffix
|
||||
suffix_to_remove = '_event'
|
||||
if event_name.endswith(suffix_to_remove):
|
||||
event_name = event_name[: -len(suffix_to_remove)]
|
||||
return event_name
|
||||
|
||||
|
||||
@attr.define
|
||||
class CliEvent(EventMeta):
|
||||
cmd_group: str
|
||||
cmd_name: str
|
||||
duration_in_ms: float = attr.field(default=0)
|
||||
error_type: typing.Optional[str] = attr.field(default=None)
|
||||
return_code: typing.Optional[int] = attr.field(default=None)
|
||||
|
||||
|
||||
@attr.define
|
||||
class OpenllmCliEvent(CliEvent):
|
||||
pass
|
||||
|
||||
|
||||
class OrderedCommands(typer.core.TyperGroup):
|
||||
def list_commands(self, _: click.Context) -> typing.Iterable[str]:
|
||||
return list(self.commands)
|
||||
|
||||
|
||||
class OpenLLMTyper(typer.Typer):
|
||||
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
no_args_is_help = kwargs.pop('no_args_is_help', True)
|
||||
context_settings = kwargs.pop('context_settings', {})
|
||||
if 'help_option_names' not in context_settings:
|
||||
context_settings['help_option_names'] = ('-h', '--help')
|
||||
if 'max_content_width' not in context_settings:
|
||||
context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120)))
|
||||
klass = kwargs.pop('cls', OrderedCommands)
|
||||
|
||||
super().__init__(*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs)
|
||||
|
||||
def command(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
@click.pass_context
|
||||
def wrapped(ctx: click.Context, *args, **kwargs):
|
||||
from bentoml._internal.utils.analytics import track
|
||||
|
||||
do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true'
|
||||
|
||||
# so we know that the root program is openllm
|
||||
command_name = ctx.info_name
|
||||
if ctx.parent.parent is not None:
|
||||
# openllm model list
|
||||
command_group = ctx.parent.info_name
|
||||
elif ctx.parent.info_name == ctx.find_root().info_name:
|
||||
# openllm run
|
||||
command_group = 'openllm'
|
||||
|
||||
if do_not_track:
|
||||
return f(*args, **kwargs)
|
||||
start_time = time.time_ns()
|
||||
try:
|
||||
return_value = f(*args, **kwargs)
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(OpenllmCliEvent(cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6))
|
||||
return return_value
|
||||
except BaseException as e:
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
error_type=type(e).__name__,
|
||||
return_code=2 if isinstance(e, KeyboardInterrupt) else 1,
|
||||
)
|
||||
)
|
||||
raise
|
||||
|
||||
return typer.Typer.command(self, *args, **kwargs)(wrapped)
|
||||
|
||||
return decorator
|
||||
66
src/openllm/clean.py
Normal file
66
src/openllm/clean.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
import questionary
|
||||
|
||||
from openllm.analytic import OpenLLMTyper
|
||||
from openllm.common import CONFIG_FILE, REPO_DIR, VENV_DIR, VERBOSE_LEVEL, output
|
||||
|
||||
app = OpenLLMTyper(help='clean up and release disk space used by OpenLLM')
|
||||
|
||||
|
||||
HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub'
|
||||
|
||||
|
||||
@app.command(help='Clean up all the cached models from huggingface')
|
||||
def model_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in HUGGINGFACE_CACHE.rglob('*'))
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
|
||||
output('All models cached by Huggingface have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Clean up all the virtual environments created by OpenLLM')
|
||||
def venvs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in VENV_DIR.rglob('*'))
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(VENV_DIR, ignore_errors=True)
|
||||
output('All virtual environments have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Clean up all the repositories cloned by OpenLLM')
|
||||
def repos(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(REPO_DIR, ignore_errors=True)
|
||||
output('All repositories have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Reset configurations to default')
|
||||
def configs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
|
||||
output('All configurations have been reset', style='green')
|
||||
|
||||
|
||||
@app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start')
|
||||
def all_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
repos()
|
||||
venvs()
|
||||
model_cache()
|
||||
configs()
|
||||
130
src/openllm/cloud.py
Normal file
130
src/openllm/cloud.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing
|
||||
|
||||
import typer
|
||||
|
||||
from openllm.accelerator_spec import ACCELERATOR_SPECS
|
||||
from openllm.analytic import OpenLLMTyper
|
||||
from openllm.common import INTERACTIVE, BentoInfo, DeploymentTarget, output, run_command
|
||||
|
||||
app = OpenLLMTyper()
|
||||
|
||||
|
||||
def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None):
|
||||
cmd = ['bentoml', 'deploy', bento.bentoml_tag]
|
||||
env = {'BENTOML_HOME': f'{bento.repo.path}/bentoml'}
|
||||
|
||||
required_envs = bento.bento_yaml.get('envs', [])
|
||||
required_env_names = [env['name'] for env in required_envs if 'name' in env]
|
||||
if required_env_names:
|
||||
output(f'This model requires the following environment variables to run: {required_env_names!r}', style='yellow')
|
||||
|
||||
for env_info in bento.bento_yaml.get('envs', []):
|
||||
if 'name' not in env_info:
|
||||
continue
|
||||
if os.environ.get(env_info['name']):
|
||||
default = os.environ[env_info['name']]
|
||||
elif 'value' in env_info:
|
||||
default = env_info['value']
|
||||
else:
|
||||
default = ''
|
||||
|
||||
if INTERACTIVE.get():
|
||||
import questionary
|
||||
|
||||
value = questionary.text(f"{env_info['name']}:", default=default).ask()
|
||||
else:
|
||||
if default == '':
|
||||
output(f"Environment variable {env_info['name']} is required but not provided", style='red')
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
value = default
|
||||
|
||||
if value is None:
|
||||
raise typer.Exit(1)
|
||||
cmd += ['--env', f"{env_info['name']}={value}"]
|
||||
|
||||
if target:
|
||||
cmd += ['--instance-type', target.name]
|
||||
|
||||
assert (pathlib.Path.home() / 'bentoml' / '.yatai.yaml').exists()
|
||||
shutil.copy(pathlib.Path.home() / 'bentoml' / '.yatai.yaml', bento.repo.path / 'bentoml' / '.yatai.yaml')
|
||||
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def ensure_cloud_context():
|
||||
import questionary
|
||||
|
||||
cmd = ['bentoml', 'cloud', 'current-context']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
context = json.loads(result)
|
||||
output(f" bentoml already logged in: {context['endpoint']}", style='green')
|
||||
except subprocess.CalledProcessError:
|
||||
output(' bentoml not logged in', style='red')
|
||||
if not INTERACTIVE.get():
|
||||
output('\n get bentoml logged in by:')
|
||||
output(' $ bentoml cloud login', style='orange')
|
||||
output('')
|
||||
output(
|
||||
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
|
||||
style='yellow',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
action = questionary.select(
|
||||
'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes']
|
||||
).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
elif action == 'get an account in two minutes':
|
||||
output('Please visit https://cloud.bentoml.com to get your token', style='yellow')
|
||||
endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask()
|
||||
if endpoint is None:
|
||||
raise typer.Exit(1)
|
||||
token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask()
|
||||
if token is None:
|
||||
raise typer.Exit(1)
|
||||
cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint]
|
||||
try:
|
||||
result = subprocess.check_output(cmd)
|
||||
output(' Logged in successfully', style='green')
|
||||
except subprocess.CalledProcessError:
|
||||
output(' Failed to login', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def get_cloud_machine_spec():
|
||||
ensure_cloud_context()
|
||||
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
instance_types = json.loads(result)
|
||||
return [
|
||||
DeploymentTarget(
|
||||
source='cloud',
|
||||
name=it['name'],
|
||||
price=it['price'],
|
||||
platform='linux',
|
||||
accelerators=(
|
||||
[ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))]
|
||||
if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS
|
||||
else []
|
||||
),
|
||||
)
|
||||
for it in instance_types
|
||||
]
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
||||
output('Failed to get cloud instance types', style='red')
|
||||
return []
|
||||
|
||||
|
||||
def deploy(bento: BentoInfo, target: DeploymentTarget):
|
||||
ensure_cloud_context()
|
||||
cmd, env, cwd = _get_deploy_cmd(bento, target)
|
||||
run_command(cmd, env=env, cwd=cwd)
|
||||
370
src/openllm/common.py
Normal file
370
src/openllm/common.py
Normal file
@@ -0,0 +1,370 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import sysconfig
|
||||
import typing
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from types import SimpleNamespace
|
||||
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
ERROR_STYLE = 'red'
|
||||
SUCCESS_STYLE = 'green'
|
||||
|
||||
|
||||
CLLAMA_HOME = pathlib.Path.home() / '.openllm'
|
||||
REPO_DIR = CLLAMA_HOME / 'repos'
|
||||
TEMP_DIR = CLLAMA_HOME / 'temp'
|
||||
VENV_DIR = CLLAMA_HOME / 'venv'
|
||||
|
||||
REPO_DIR.mkdir(exist_ok=True, parents=True)
|
||||
TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
||||
VENV_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
CONFIG_FILE = CLLAMA_HOME / 'config.json'
|
||||
|
||||
CHECKED = '☆'
|
||||
|
||||
T = typing.TypeVar('T')
|
||||
|
||||
|
||||
class ContextVar(typing.Generic[T]):
|
||||
def __init__(self, default: T):
|
||||
self._stack: list[T] = []
|
||||
self._default = default
|
||||
|
||||
def get(self) -> T:
|
||||
if self._stack:
|
||||
return self._stack[-1]
|
||||
return self._default
|
||||
|
||||
def set(self, value):
|
||||
self._stack.append(value)
|
||||
|
||||
@contextmanager
|
||||
def patch(self, value):
|
||||
self._stack.append(value)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._stack.pop()
|
||||
|
||||
|
||||
VERBOSE_LEVEL = ContextVar(10)
|
||||
INTERACTIVE = ContextVar(False)
|
||||
FORCE = ContextVar(False)
|
||||
|
||||
|
||||
def output(content, level=0, style=None, end=None):
|
||||
import questionary
|
||||
|
||||
if level > VERBOSE_LEVEL.get():
|
||||
return
|
||||
|
||||
if not isinstance(content, str):
|
||||
import pyaml
|
||||
|
||||
out = io.StringIO()
|
||||
pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False)
|
||||
questionary.print(out.getvalue(), style=style, end='' if end is None else end)
|
||||
out.close()
|
||||
|
||||
if isinstance(content, str):
|
||||
questionary.print(content, style=style, end='\n' if end is None else end)
|
||||
|
||||
|
||||
class Config(SimpleNamespace):
|
||||
repos: dict[str, str] = {'default': 'git+https://github.com/bentoml/openllm-models@main'}
|
||||
default_repo: str = 'default'
|
||||
|
||||
def tolist(self):
|
||||
return dict(repos=self.repos, default_repo=self.default_repo)
|
||||
|
||||
|
||||
def load_config():
|
||||
if CONFIG_FILE.exists():
|
||||
try:
|
||||
with open(CONFIG_FILE) as f:
|
||||
return Config(**json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
return Config()
|
||||
return Config()
|
||||
|
||||
|
||||
def save_config(config):
|
||||
with open(CONFIG_FILE, 'w') as f:
|
||||
json.dump(config.tolist(), f, indent=2)
|
||||
|
||||
|
||||
class RepoInfo(SimpleNamespace):
|
||||
name: str
|
||||
path: pathlib.Path
|
||||
url: str
|
||||
server: str
|
||||
owner: str
|
||||
repo: str
|
||||
branch: str
|
||||
|
||||
def tolist(self):
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
return f'{self.name} ({self.url})'
|
||||
if VERBOSE_LEVEL.get() <= 10:
|
||||
return dict(name=self.name, url=self.url, path=str(self.path))
|
||||
if VERBOSE_LEVEL.get() <= 20:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=self.url,
|
||||
path=str(self.path),
|
||||
server=self.server,
|
||||
owner=self.owner,
|
||||
repo=self.repo,
|
||||
branch=self.branch,
|
||||
)
|
||||
|
||||
|
||||
class BentoInfo(SimpleNamespace):
|
||||
repo: RepoInfo
|
||||
path: pathlib.Path
|
||||
alias: str = ''
|
||||
|
||||
def __str__(self):
|
||||
if self.repo.name == 'default':
|
||||
return f'{self.tag}'
|
||||
else:
|
||||
return f'{self.repo.name}/{self.tag}'
|
||||
|
||||
def __hash__(self):
|
||||
return md5(str(self.path))
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
if self.alias:
|
||||
return f'{self.path.parent.name}:{self.alias}'
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
|
||||
@property
|
||||
def bentoml_tag(self) -> str:
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.path.parent.name
|
||||
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self.path.name
|
||||
|
||||
@property
|
||||
def labels(self) -> dict[str, str]:
|
||||
return self.bento_yaml['labels']
|
||||
|
||||
@functools.cached_property
|
||||
def bento_yaml(self) -> dict:
|
||||
import yaml
|
||||
|
||||
bento_file = self.path / 'bento.yaml'
|
||||
return yaml.safe_load(bento_file.read_text())
|
||||
|
||||
@functools.cached_property
|
||||
def platforms(self) -> list[str]:
|
||||
return self.bento_yaml['labels'].get('platforms', 'linux').split(',')
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_yaml(self) -> dict:
|
||||
def _pretty_routes(routes):
|
||||
return {
|
||||
route['route']: {
|
||||
'input': {k: v['type'] for k, v in route['input']['properties'].items()},
|
||||
'output': route['output']['type'],
|
||||
}
|
||||
for route in routes
|
||||
}
|
||||
|
||||
if len(self.bento_yaml['services']) == 1:
|
||||
pretty_yaml = {
|
||||
'apis': _pretty_routes(self.bento_yaml['schema']['routes']),
|
||||
'resources': self.bento_yaml['services'][0]['config']['resources'],
|
||||
'envs': self.bento_yaml['envs'],
|
||||
'platforms': self.platforms,
|
||||
}
|
||||
return pretty_yaml
|
||||
return self.bento_yaml
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_gpu(self) -> str:
|
||||
from openllm.accelerator_spec import ACCELERATOR_SPECS
|
||||
|
||||
try:
|
||||
resources = self.bento_yaml['services'][0]['config']['resources']
|
||||
if resources['gpu'] > 1:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f"{acc.memory_size:.0f}Gx{resources['gpu']}"
|
||||
elif resources['gpu'] > 0:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f'{acc.memory_size:.0f}G'
|
||||
except KeyError:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def tolist(self):
|
||||
verbose = VERBOSE_LEVEL.get()
|
||||
if verbose <= 0:
|
||||
return str(self)
|
||||
if verbose <= 10:
|
||||
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml)
|
||||
if verbose <= 20:
|
||||
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml)
|
||||
|
||||
|
||||
class VenvSpec(SimpleNamespace):
|
||||
python_version: str
|
||||
python_packages: dict[str, str]
|
||||
name_prefix = ''
|
||||
|
||||
def __hash__(self):
|
||||
return md5(
|
||||
# self.python_version,
|
||||
*sorted(self.python_packages)
|
||||
)
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
|
||||
class DeploymentTarget(SimpleNamespace):
|
||||
source: str = 'local'
|
||||
name: str = 'local'
|
||||
price: str = ''
|
||||
platform = 'linux'
|
||||
accelerators: list[Accelerator]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.source)
|
||||
|
||||
@property
|
||||
def accelerators_repr(self) -> str:
|
||||
accs = {a.model for a in self.accelerators}
|
||||
if len(accs) == 0:
|
||||
return 'null'
|
||||
if len(accs) == 1:
|
||||
a = self.accelerators[0]
|
||||
return f'{a.model} x{len(self.accelerators)}'
|
||||
return ', '.join((f'{a.model}' for a in self.accelerators))
|
||||
|
||||
|
||||
def run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=False) -> subprocess.CompletedProcess:
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
bin_dir = 'Scripts' if os.name == 'nt' else 'bin'
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style='orange')
|
||||
output(f"$ {' '.join(cmd)}", style='orange')
|
||||
|
||||
if venv:
|
||||
py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}"
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py, '-m', 'bentoml'] + cmd[1:]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
try:
|
||||
if silent:
|
||||
return subprocess.run( # type: ignore
|
||||
cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
||||
)
|
||||
else:
|
||||
return subprocess.run(cmd, cwd=cwd, env=env)
|
||||
except subprocess.CalledProcessError:
|
||||
output('Command failed', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
async def stream_command_output(stream, style='gray'):
|
||||
async for line in stream:
|
||||
output(line.decode(), style=style, end='')
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def async_run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=True):
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style='orange')
|
||||
output(f"$ {' '.join(cmd)}", style='orange')
|
||||
|
||||
if venv:
|
||||
py = venv / 'bin' / 'python'
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py, '-m', 'bentoml'] + cmd[1:]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env
|
||||
)
|
||||
yield proc
|
||||
except subprocess.CalledProcessError:
|
||||
output('Command failed', style='red')
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
if proc:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
await proc.wait()
|
||||
|
||||
|
||||
def md5(*strings: str) -> int:
|
||||
m = hashlib.md5()
|
||||
for s in strings:
|
||||
m.update(s.encode())
|
||||
return int(m.hexdigest(), 16)
|
||||
91
src/openllm/local.py
Normal file
91
src/openllm/local.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from openllm.common import BentoInfo, async_run_command, output, run_command, stream_command_output
|
||||
from openllm.venv import ensure_venv
|
||||
|
||||
|
||||
def _get_serve_cmd(bento: BentoInfo, port: int = 3000):
|
||||
cmd = ['bentoml', 'serve', bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ['--port', str(port)]
|
||||
env = {'BENTOML_HOME': f'{bento.repo.path}/bentoml'}
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def serve(bento: BentoInfo, port: int = 3000):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port=port)
|
||||
run_command(cmd, env=env, cwd=cwd, venv=venv)
|
||||
|
||||
|
||||
async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port)
|
||||
async with async_run_command(cmd, env=env, cwd=cwd, venv=venv, silent=False) as server_proc:
|
||||
output(f'Model server started {server_proc.pid}')
|
||||
|
||||
stdout_streamer = None
|
||||
stderr_streamer = None
|
||||
start_time = time.time()
|
||||
|
||||
output('Model loading...', style='green')
|
||||
for _ in range(timeout):
|
||||
try:
|
||||
resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3)
|
||||
if resp.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
if time.time() - start_time > 30:
|
||||
if not stdout_streamer:
|
||||
stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray'))
|
||||
if not stderr_streamer:
|
||||
stderr_streamer = asyncio.create_task(stream_command_output(server_proc.stderr, style='#BD2D0F'))
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
output('Model failed to load', style='red')
|
||||
server_proc.terminate()
|
||||
return
|
||||
|
||||
if stdout_streamer:
|
||||
stdout_streamer.cancel()
|
||||
if stderr_streamer:
|
||||
stderr_streamer.cancel()
|
||||
|
||||
output('Model is ready', style='green')
|
||||
messages: list[dict[str, str]] = []
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
client = AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local')
|
||||
model_id = (await client.models.list()).data[0].id
|
||||
while True:
|
||||
try:
|
||||
message = input('user: ')
|
||||
if message == '':
|
||||
output('empty message, please enter something', style='yellow')
|
||||
continue
|
||||
messages.append(dict(role='user', content=message))
|
||||
output('assistant: ', end='', style='lightgreen')
|
||||
assistant_message = ''
|
||||
stream = await client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=messages, # type: ignore
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in stream:
|
||||
text = chunk.choices[0].delta.content or ''
|
||||
assistant_message += text
|
||||
output(text, end='', style='lightgreen')
|
||||
messages.append(dict(role='assistant', content=assistant_message))
|
||||
output('')
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output('\nStopping model server...', style='green')
|
||||
output('Stopped model server', style='green')
|
||||
|
||||
|
||||
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600):
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
147
src/openllm/model.py
Normal file
147
src/openllm/model.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import typing
|
||||
from typing import Optional
|
||||
|
||||
import tabulate
|
||||
import typer
|
||||
|
||||
from openllm.accelerator_spec import DeploymentTarget, can_run
|
||||
from openllm.analytic import OpenLLMTyper
|
||||
from openllm.common import FORCE, VERBOSE_LEVEL, BentoInfo, load_config, output
|
||||
from openllm.repo import ensure_repo_updated, parse_repo_url
|
||||
|
||||
app = OpenLLMTyper(help='manage models')
|
||||
|
||||
|
||||
@app.command()
|
||||
def get(tag: str, repo: Optional[str] = None, verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento_info = ensure_bento(tag, repo_name=repo)
|
||||
if bento_info:
|
||||
output(bento_info)
|
||||
|
||||
|
||||
@app.command(name='list')
|
||||
def list_(tag: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
bentos = list_bento(tag=tag, repo_name=repo)
|
||||
bentos.sort(key=lambda x: x.name)
|
||||
|
||||
seen = set()
|
||||
|
||||
def is_seen(value):
|
||||
if value in seen:
|
||||
return True
|
||||
seen.add(value)
|
||||
return False
|
||||
|
||||
table = tabulate.tabulate(
|
||||
[
|
||||
[
|
||||
'' if is_seen(bento.name) else bento.name,
|
||||
bento.tag,
|
||||
bento.repo.name,
|
||||
bento.pretty_gpu,
|
||||
','.join(bento.platforms),
|
||||
]
|
||||
for bento in bentos
|
||||
],
|
||||
headers=['model', 'version', 'repo', 'required VRAM', 'platforms'],
|
||||
)
|
||||
output(table)
|
||||
|
||||
|
||||
def ensure_bento(model: str, target: Optional[DeploymentTarget] = None, repo_name: Optional[str] = None) -> BentoInfo:
|
||||
bentos = list_bento(model, repo_name=repo_name)
|
||||
if len(bentos) == 0:
|
||||
output(f'No model found for {model}', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) == 1:
|
||||
if FORCE.get():
|
||||
output(f'Found model {bentos[0]}', style='green')
|
||||
return bentos[0]
|
||||
if target is None:
|
||||
return bentos[0]
|
||||
if can_run(bentos[0], target) <= 0:
|
||||
return bentos[0]
|
||||
output(f'Found model {bentos[0]}', style='green')
|
||||
return bentos[0]
|
||||
|
||||
if target is None:
|
||||
output(f'Multiple models match {model}, did you mean one of these?', style='red')
|
||||
for bento in bentos:
|
||||
output(f' {bento}')
|
||||
raise typer.Exit(1)
|
||||
|
||||
filtered = [bento for bento in bentos if can_run(bento, target) > 0]
|
||||
if len(filtered) == 0:
|
||||
output(f'No deployment target found for {model}', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(filtered) == 0:
|
||||
output(f'No deployment target found for {model}', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) > 1:
|
||||
output(f'Multiple models match {model}, did you mean one of these?', style='red')
|
||||
for bento in bentos:
|
||||
output(f' {bento}')
|
||||
raise typer.Exit(1)
|
||||
|
||||
return bentos[0]
|
||||
|
||||
|
||||
def list_bento(
|
||||
tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False
|
||||
) -> typing.List[BentoInfo]:
|
||||
ensure_repo_updated()
|
||||
|
||||
if repo_name is not None:
|
||||
config = load_config()
|
||||
if repo_name not in config.repos:
|
||||
output(f'Repo `{repo_name}` not found, did you mean one of these?')
|
||||
for repo_name in config.repos:
|
||||
output(f' {repo_name}')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not tag:
|
||||
glob_pattern = 'bentoml/bentos/*/*'
|
||||
elif ':' in tag:
|
||||
bento_name, version = tag.split(':')
|
||||
glob_pattern = f'bentoml/bentos/{bento_name}/{version}'
|
||||
else:
|
||||
glob_pattern = f'bentoml/bentos/{tag}/*'
|
||||
|
||||
model_list = []
|
||||
config = load_config()
|
||||
for _repo_name, repo_url in config.repos.items():
|
||||
if repo_name is not None and _repo_name != repo_name:
|
||||
continue
|
||||
repo = parse_repo_url(repo_url, _repo_name)
|
||||
for path in repo.path.glob(glob_pattern):
|
||||
if path.is_dir() and (path / 'bento.yaml').exists():
|
||||
model = BentoInfo(repo=repo, path=path)
|
||||
elif path.is_file():
|
||||
with open(path) as f:
|
||||
origin_name = f.read().strip()
|
||||
origin_path = path.parent / origin_name
|
||||
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
|
||||
else:
|
||||
model = None
|
||||
if model:
|
||||
model_list.append(model)
|
||||
model_list.sort(key=lambda x: x.tag)
|
||||
if not include_alias:
|
||||
seen = set()
|
||||
model_list = [
|
||||
x
|
||||
for x in model_list
|
||||
if not (
|
||||
f"{x.bento_yaml['name']}:{x.bento_yaml['version']}" in seen
|
||||
or seen.add(f"{x.bento_yaml['name']}:{x.bento_yaml['version']}")
|
||||
)
|
||||
]
|
||||
return model_list
|
||||
179
src/openllm/repo.py
Normal file
179
src/openllm/repo.py
Normal file
@@ -0,0 +1,179 @@
|
||||
import datetime
|
||||
import re
|
||||
import shutil
|
||||
|
||||
import pyaml
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm.analytic import OpenLLMTyper
|
||||
from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config
|
||||
|
||||
UPDATE_INTERVAL = datetime.timedelta(days=3)
|
||||
|
||||
app = OpenLLMTyper(help='manage repos')
|
||||
|
||||
|
||||
@app.command()
|
||||
def list(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
config = load_config()
|
||||
pyaml.pprint([parse_repo_url(repo, name) for name, repo in config.repos.items()], sort_dicts=False, sort_keys=False)
|
||||
|
||||
|
||||
@app.command()
|
||||
def remove(name: str):
|
||||
config = load_config()
|
||||
if name not in config.repos:
|
||||
output(f'Repo {name} does not exist', style='red')
|
||||
return
|
||||
|
||||
del config.repos[name]
|
||||
save_config(config)
|
||||
output(f'Repo {name} removed', style='green')
|
||||
|
||||
|
||||
def _complete_alias(repo_name: str):
|
||||
from openllm.model import list_bento
|
||||
|
||||
for bento in list_bento(repo_name=repo_name):
|
||||
alias = bento.labels.get('openllm_alias', '').strip()
|
||||
if alias:
|
||||
for a in alias.split(','):
|
||||
with open(bento.path.parent / a, 'w') as f:
|
||||
f.write(bento.version)
|
||||
|
||||
|
||||
@app.command()
|
||||
def update():
|
||||
import dulwich
|
||||
import dulwich.errors
|
||||
import dulwich.porcelain
|
||||
|
||||
config = load_config()
|
||||
repos_in_use = set()
|
||||
for repo_name, repo in config.repos.items():
|
||||
repo = parse_repo_url(repo, repo_name)
|
||||
repos_in_use.add((repo.server, repo.owner, repo.repo))
|
||||
if repo.path.exists(): # TODO: use update instead of remove and clone
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
if not repo.path.exists():
|
||||
repo.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
dulwich.porcelain.clone(
|
||||
f'https://{repo.server}/{repo.owner}/{repo.repo}.git',
|
||||
str(repo.path),
|
||||
checkout=True,
|
||||
depth=1,
|
||||
branch=repo.branch,
|
||||
)
|
||||
output('')
|
||||
output(f'Repo `{repo.name}` updated', style='green')
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f'Failed to clone repo {repo.name}', style='red')
|
||||
else:
|
||||
try:
|
||||
import dulwich.porcelain
|
||||
|
||||
dulwich.porcelain.pull(
|
||||
str(repo.path), f'https://{repo.server}/{repo.owner}/{repo.repo}.git', refspecs=repo.branch, force=True
|
||||
)
|
||||
dulwich.porcelain.clean(str(repo.path), str(repo.path))
|
||||
output('')
|
||||
output(f'Repo `{repo.name}` updated', style='green')
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f'Failed to update repo {repo.name}', style='red')
|
||||
for c in REPO_DIR.glob('*/*/*'):
|
||||
repo_spec = tuple(c.parts[-3:])
|
||||
if repo_spec not in repos_in_use:
|
||||
shutil.rmtree(c, ignore_errors=True)
|
||||
output(f'Removed unused repo cache {c}')
|
||||
with open(REPO_DIR / 'last_update', 'w') as f:
|
||||
f.write(datetime.datetime.now().isoformat())
|
||||
for repo_name in config.repos:
|
||||
_complete_alias(repo_name)
|
||||
|
||||
|
||||
def ensure_repo_updated():
|
||||
last_update_file = REPO_DIR / 'last_update'
|
||||
if not last_update_file.exists():
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is never updated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
return
|
||||
else:
|
||||
output(
|
||||
'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list', style='red'
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
|
||||
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is outdated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
else:
|
||||
output(
|
||||
'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list', style='yellow'
|
||||
)
|
||||
|
||||
|
||||
GIT_REPO_RE = re.compile(r'git\+https://(?P<server>.+)/(?P<owner>.+)/(?P<repo>.+?)(@(?P<branch>.+))?$')
|
||||
|
||||
|
||||
def parse_repo_url(repo_url, repo_name=None) -> RepoInfo:
|
||||
"""
|
||||
parse the git repo url to server, owner, repo name, branch
|
||||
>>> parse_repo_url('git+https://github.com/bentoml/bentovllm@main')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url('git+https://github.com/bentoml/bentovllm')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
"""
|
||||
match = GIT_REPO_RE.match(repo_url)
|
||||
if not match:
|
||||
raise ValueError(f'Invalid git repo url: {repo_url}')
|
||||
server = match.group('server')
|
||||
owner = match.group('owner')
|
||||
repo = match.group('repo')
|
||||
branch = match.group('branch') or 'main'
|
||||
path = REPO_DIR / server / owner / repo
|
||||
return RepoInfo(
|
||||
name=repo if repo_name is None else repo_name,
|
||||
url=repo_url,
|
||||
server=server,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
path=path,
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def add(name: str, repo: str):
|
||||
name = name.lower()
|
||||
if not name.isidentifier():
|
||||
output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red')
|
||||
return
|
||||
|
||||
config = load_config()
|
||||
if name in config.repos:
|
||||
override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask()
|
||||
if not override:
|
||||
return
|
||||
|
||||
config.repos[name] = repo
|
||||
save_config(config)
|
||||
output(f'Repo {name} added', style='green')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app()
|
||||
118
src/openllm/venv.py
Normal file
118
src/openllm/venv.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import functools
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import typing
|
||||
from typing import Iterable
|
||||
|
||||
import typer
|
||||
|
||||
from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, VenvSpec, output, run_command
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_packages(requirement: typing.Union[pathlib.Path, str]):
|
||||
from pip_requirements_parser import RequirementsFile
|
||||
|
||||
requirements_txt = RequirementsFile.from_file(str(requirement), include_nested=True)
|
||||
return requirements_txt.requirements
|
||||
|
||||
|
||||
def _filter_preheat_packages(requirements: Iterable) -> list[str]:
|
||||
PREHEAT_PIP_PACKAGES = ['torch', 'vllm']
|
||||
|
||||
deps: list[str] = []
|
||||
for req in requirements:
|
||||
if req.is_editable or req.is_local_path or req.is_url or req.is_wheel or not req.name or not req.specifier:
|
||||
continue
|
||||
for sp in req.specifier:
|
||||
if sp.operator == '==' and req.name in PREHEAT_PIP_PACKAGES:
|
||||
assert req.line is not None
|
||||
deps.append(req.line)
|
||||
break
|
||||
return deps
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_bento_env_specs(bento: BentoInfo):
|
||||
ver_file = bento.path / 'env' / 'python' / 'version.txt'
|
||||
assert ver_file.exists(), f'cannot find version file in {bento.path}'
|
||||
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt'
|
||||
if not lock_file.exists():
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.txt'
|
||||
|
||||
reqs = _resolve_packages(lock_file)
|
||||
preheat_packages = _filter_preheat_packages(reqs)
|
||||
ver = ver_file.read_text().strip()
|
||||
return (
|
||||
VenvSpec(python_version=ver, python_packages=preheat_packages, name_prefix=f"{bento.tag.replace(':', '_')}-1-"),
|
||||
VenvSpec(
|
||||
python_version=ver, python_packages=[v.line for v in reqs], name_prefix=f"{bento.tag.replace(':', '_')}-2-"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path:
|
||||
if os.name == 'nt':
|
||||
return venv / 'Lib/site-packages'
|
||||
else:
|
||||
return next(venv.glob('lib/python*')) / 'site-packages'
|
||||
|
||||
|
||||
def _ensure_venv(env_spec: VenvSpec, parrent_venv: typing.Optional[pathlib.Path] = None) -> pathlib.Path:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if not venv.exists():
|
||||
output(f'Installing model dependencies({venv})...', style='green')
|
||||
|
||||
venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python'
|
||||
try:
|
||||
run_command(['python', '-m', 'uv', 'venv', venv], silent=VERBOSE_LEVEL.get() < 10)
|
||||
lib_dir = _get_lib_dir(venv)
|
||||
if parrent_venv is not None:
|
||||
parent_lib_dir = _get_lib_dir(parrent_venv)
|
||||
with open(lib_dir / f'{parrent_venv.name}.pth', 'w+') as f:
|
||||
f.write(str(parent_lib_dir))
|
||||
with open(venv / 'requirements.txt', 'w') as f:
|
||||
f.write('\n'.join(sorted(env_spec.python_packages)))
|
||||
run_command(
|
||||
['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), '-r', venv / 'requirements.txt'],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
with open(venv / 'DONE', 'w') as f:
|
||||
f.write('DONE')
|
||||
except Exception:
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red')
|
||||
raise typer.Exit(1)
|
||||
output(f'Successfully installed dependencies to {venv}.', style='green')
|
||||
return venv
|
||||
else:
|
||||
return venv
|
||||
|
||||
|
||||
def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path:
|
||||
last_venv = None
|
||||
for env_spec in env_spec_list:
|
||||
last_venv = _ensure_venv(env_spec, last_venv)
|
||||
assert last_venv is not None
|
||||
return last_venv
|
||||
|
||||
|
||||
def ensure_venv(bento: BentoInfo) -> pathlib.Path:
|
||||
return _ensure_venvs(_resolve_bento_env_specs(bento))
|
||||
|
||||
|
||||
def _check_venv(env_spec: VenvSpec) -> bool:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if not venv.exists():
|
||||
return False
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_venv(bento: BentoInfo) -> bool:
|
||||
return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento))
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
|
||||
pip install -U mypy==1.7.0 "build<1" hatchling==1.18.0 hatch-vcs==0.3.0 hatch-fancy-pypi-readme==23.1.0 hatch-mypyc==0.16.0
|
||||
HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-client -w -C--global-option=--verbose "$@"
|
||||
HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-core -w -C--global-option=--verbose "$@"
|
||||
HATCH_VERBOSE=3 MYPY_CONFIG_FILE_DIR="${SCRIPT_DIR}" HATCH_BUILD_HOOKS_ENABLE=1 python -m build openllm-python -w -C--global-option=--verbose "$@"
|
||||
hatch clean
|
||||
@@ -1,284 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
import dataclasses
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import tomlkit
|
||||
from ghapi.all import GhApi
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from tomlkit.items import Array, Table
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
_OWNER, _REPO = 'bentoml', 'openllm'
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Classifier:
|
||||
identifier: t.Dict[str, str] = dataclasses.field(
|
||||
default_factory=lambda: {
|
||||
'status': 'Development Status',
|
||||
'environment': 'Environment',
|
||||
'license': 'License',
|
||||
'topic': 'Topic',
|
||||
'os': 'Operating System',
|
||||
'audience': 'Intended Audience',
|
||||
'typing': 'Typing',
|
||||
'language': 'Programming Language',
|
||||
}
|
||||
)
|
||||
joiner: str = ' :: '
|
||||
|
||||
@staticmethod
|
||||
def status() -> dict[int, str]:
|
||||
return {
|
||||
v: status
|
||||
for v, status in zip(
|
||||
range(1, 8),
|
||||
[
|
||||
'1 - Planning',
|
||||
'2 - Pre-Alpha',
|
||||
'3 - Alpha',
|
||||
'4 - Beta',
|
||||
'5 - Production/Stable',
|
||||
'6 - Mature',
|
||||
'7 - Inactive',
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def apache() -> str:
|
||||
return Classifier.create_classifier('license', 'OSI Approved', 'Apache Software License')
|
||||
|
||||
@staticmethod
|
||||
def create_classifier(identifier: str, *decls: t.Any) -> str:
|
||||
cls_ = Classifier()
|
||||
if identifier not in cls_.identifier:
|
||||
raise ValueError(f'{identifier} is not yet supported (supported alias: {Classifier.identifier})')
|
||||
return cls_.joiner.join([cls_.identifier[identifier], *decls])
|
||||
|
||||
@staticmethod
|
||||
def create_python_classifier(
|
||||
implementation: list[str] | None = None, supported_version: list[str] | None = None
|
||||
) -> list[str]:
|
||||
if supported_version is None:
|
||||
supported_version = ['3.8', '3.9', '3.10', '3.11', '3.12']
|
||||
if implementation is None:
|
||||
implementation = ['CPython', 'PyPy']
|
||||
base = [
|
||||
Classifier.create_classifier('language', 'Python'),
|
||||
Classifier.create_classifier('language', 'Python', '3'),
|
||||
]
|
||||
base.append(Classifier.create_classifier('language', 'Python', '3', 'Only'))
|
||||
base.extend([Classifier.create_classifier('language', 'Python', version) for version in supported_version])
|
||||
base.extend([
|
||||
Classifier.create_classifier('language', 'Python', 'Implementation', impl) for impl in implementation
|
||||
])
|
||||
return base
|
||||
|
||||
@staticmethod
|
||||
def create_status_classifier(level: int) -> str:
|
||||
return Classifier.create_classifier('status', Classifier.status()[level])
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Dependencies:
|
||||
name: str
|
||||
git_repo_url: t.Optional[str] = None
|
||||
branch: t.Optional[str] = None
|
||||
extensions: t.Optional[t.List[str]] = None
|
||||
subdirectory: t.Optional[str] = None
|
||||
requires_gpu: bool = False
|
||||
lower_constraint: t.Optional[str] = None
|
||||
upper_constraint: t.Optional[str] = None
|
||||
platform: t.Optional[t.Tuple[t.Literal['Linux', 'Windows', 'Darwin'], t.Literal['eq', 'ne']]] = None
|
||||
|
||||
def with_options(self, **kwargs: t.Any) -> Dependencies:
|
||||
return dataclasses.replace(self, **kwargs)
|
||||
|
||||
@property
|
||||
def has_constraint(self) -> bool:
|
||||
return self.lower_constraint is not None or self.upper_constraint is not None
|
||||
|
||||
@property
|
||||
def pypi_extensions(self) -> str:
|
||||
return '' if self.extensions is None else f"[{','.join(self.extensions)}]"
|
||||
|
||||
@staticmethod
|
||||
def platform_restriction(platform: t.LiteralString, op: t.Literal['eq', 'ne'] = 'eq') -> str:
|
||||
return f'platform_system{"==" if op == "eq" else "!="}"{platform}"'
|
||||
|
||||
def to_str(self) -> str:
|
||||
deps: list[str] = []
|
||||
if self.lower_constraint is not None and self.upper_constraint is not None:
|
||||
dep = f'{self.name}{self.pypi_extensions}>={self.lower_constraint},<{self.upper_constraint}'
|
||||
elif self.lower_constraint is not None:
|
||||
dep = f'{self.name}{self.pypi_extensions}>={self.lower_constraint}'
|
||||
elif self.upper_constraint is not None:
|
||||
dep = f'{self.name}{self.pypi_extensions}<{self.upper_constraint}'
|
||||
elif self.subdirectory is not None:
|
||||
dep = f'{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git#subdirectory={self.subdirectory}'
|
||||
elif self.branch is not None:
|
||||
dep = f'{self.name}{self.pypi_extensions} @ git+https://github.com/{self.git_repo_url}.git@{self.branch}'
|
||||
else:
|
||||
dep = f'{self.name}{self.pypi_extensions}'
|
||||
deps.append(dep)
|
||||
if self.platform:
|
||||
deps.append(self.platform_restriction(*self.platform))
|
||||
return ';'.join(deps)
|
||||
|
||||
@classmethod
|
||||
def from_tuple(cls, *decls: t.Any) -> Dependencies:
|
||||
return cls(*decls)
|
||||
|
||||
|
||||
_LOWER_BENTOML_CONSTRAINT = '1.2.16'
|
||||
_BENTOML_EXT = ['io']
|
||||
|
||||
|
||||
fname = f'{os.path.basename(os.path.dirname(__file__))}/{os.path.basename(__file__)}'
|
||||
|
||||
|
||||
def correct_style(it: t.Any) -> t.Any:
|
||||
return it
|
||||
|
||||
|
||||
def create_classifiers() -> Array:
|
||||
arr = correct_style(tomlkit.array())
|
||||
arr.extend([
|
||||
Classifier.create_status_classifier(5),
|
||||
Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA'),
|
||||
Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '12'),
|
||||
Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '11.8'),
|
||||
Classifier.create_classifier('environment', 'GPU', 'NVIDIA CUDA', '11.7'),
|
||||
Classifier.apache(),
|
||||
Classifier.create_classifier('topic', 'Scientific/Engineering', 'Artificial Intelligence'),
|
||||
Classifier.create_classifier('topic', 'Software Development', 'Libraries'),
|
||||
Classifier.create_classifier('os', 'OS Independent'),
|
||||
Classifier.create_classifier('audience', 'Developers'),
|
||||
Classifier.create_classifier('audience', 'Science/Research'),
|
||||
Classifier.create_classifier('audience', 'System Administrators'),
|
||||
Classifier.create_classifier('typing', 'Typed'),
|
||||
*Classifier.create_python_classifier(),
|
||||
])
|
||||
return arr.multiline(True)
|
||||
|
||||
|
||||
def create_url_table(_info: t.Any) -> Table:
|
||||
table = tomlkit.table()
|
||||
_urls = {
|
||||
'Blog': 'https://modelserving.com',
|
||||
'Chat': 'https://discord.gg/openllm',
|
||||
'Documentation': 'https://github.com/bentoml/openllm#readme',
|
||||
'GitHub': _info.html_url,
|
||||
'History': f'{_info.html_url}/blob/main/CHANGELOG.md',
|
||||
'Homepage': _info.homepage,
|
||||
'Tracker': f'{_info.html_url}/issues',
|
||||
'Twitter': 'https://twitter.com/bentomlai',
|
||||
}
|
||||
table.update({k: v for k, v in sorted(_urls.items())})
|
||||
return table
|
||||
|
||||
|
||||
def build_system() -> Table:
|
||||
table = tomlkit.table()
|
||||
table.add('build-backend', 'hatchling.build')
|
||||
requires_array = correct_style(tomlkit.array())
|
||||
requires_array.extend(['hatchling==1.18.0', 'hatch-vcs==0.3.0', 'hatch-fancy-pypi-readme==23.1.0'])
|
||||
table.add('requires', requires_array.multiline(True))
|
||||
return table
|
||||
|
||||
|
||||
def keywords() -> Array:
|
||||
arr = correct_style(tomlkit.array())
|
||||
arr.extend([
|
||||
'MLOps',
|
||||
'AI',
|
||||
'BentoML',
|
||||
'Model Serving',
|
||||
'Model Deployment',
|
||||
'LLMOps',
|
||||
'Falcon',
|
||||
'Vicuna',
|
||||
'Llama 2',
|
||||
'Fine tuning',
|
||||
'Serverless',
|
||||
'Large Language Model',
|
||||
'Generative AI',
|
||||
'StableLM',
|
||||
'Alpaca',
|
||||
'PyTorch',
|
||||
'Mistral',
|
||||
'vLLM',
|
||||
'Transformers',
|
||||
])
|
||||
return arr.multiline(True)
|
||||
|
||||
|
||||
def build_cli_extensions() -> Table:
|
||||
table = tomlkit.table()
|
||||
table.update({'openllm': '_openllm_tiny._entrypoint:cli'})
|
||||
return table
|
||||
|
||||
|
||||
def main(args) -> int:
|
||||
api = GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
_info = api.repos.get()
|
||||
with open(os.path.join(ROOT, 'openllm-python', 'pyproject.toml'), 'r') as f:
|
||||
pyproject = tomlkit.parse(f.read())
|
||||
|
||||
if args.release_version is not None:
|
||||
release_version = args.release_version
|
||||
else:
|
||||
try:
|
||||
release_version = api.repos.get_latest_release()['tag_name'].lstrip('v')
|
||||
except Exception as err:
|
||||
raise err
|
||||
|
||||
_BASE_DEPENDENCIES = [
|
||||
Dependencies(name='bentoml', extensions=_BENTOML_EXT, lower_constraint=_LOWER_BENTOML_CONSTRAINT),
|
||||
Dependencies(name='openllm-client', lower_constraint=release_version),
|
||||
Dependencies(name='openllm-core', lower_constraint=release_version),
|
||||
Dependencies(name='safetensors'),
|
||||
Dependencies(name='vllm', lower_constraint='0.4.3'),
|
||||
Dependencies(name='ghapi'),
|
||||
Dependencies(name='einops'),
|
||||
Dependencies(name='sentencepiece'),
|
||||
Dependencies(name='scipy'),
|
||||
Dependencies(name='click', lower_constraint='8.1.3'),
|
||||
Dependencies(name='cuda-python', platform=('Darwin', 'ne')),
|
||||
]
|
||||
|
||||
dependencies_array = correct_style(tomlkit.array())
|
||||
dependencies_array.extend([v.to_str() for v in _BASE_DEPENDENCIES])
|
||||
# dynamic field
|
||||
dyn_arr = tomlkit.array()
|
||||
dyn_arr.extend(['version', 'readme'])
|
||||
|
||||
pyproject['build-system'] = build_system()
|
||||
pyproject['project']['classifiers'] = create_classifiers()
|
||||
pyproject['project']['dependencies'] = dependencies_array.multiline(True)
|
||||
pyproject['project']['description'] = f'{_info.name}: {_info.description}'
|
||||
pyproject['project']['dynamic'] = dyn_arr
|
||||
pyproject['project']['keywords'] = keywords()
|
||||
pyproject['project']['license'] = _info.license.spdx_id
|
||||
pyproject['project']['name'] = f'{_info.name.lower()}'
|
||||
pyproject['project']['requires-python'] = '>=3.8'
|
||||
|
||||
pyproject['project']['urls'] = create_url_table(_info)
|
||||
pyproject['project']['scripts'] = build_cli_extensions()
|
||||
|
||||
with open(os.path.join(ROOT, 'openllm-python', 'pyproject.toml'), 'w') as f:
|
||||
f.write(tomlkit.dumps(pyproject))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--release-version', type=str, default=None)
|
||||
raise SystemExit(main(parser.parse_args()))
|
||||
@@ -1,53 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import orjson
|
||||
from lxml import etree
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
PACKAGES = {'openllm-python/src/openllm/': 'openllm'}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
coverage_report = ROOT / 'coverage.xml'
|
||||
root = etree.fromstring(coverage_report.read_text())
|
||||
|
||||
raw_package_data: defaultdict[str, dict[str, int]] = defaultdict(lambda: {'hits': 0, 'misses': 0})
|
||||
for package in root.find('packages'):
|
||||
for module in package.find('classes'):
|
||||
filename = module.attrib['filename']
|
||||
for relative_path, package_name in PACKAGES.items():
|
||||
if filename.startswith(relative_path):
|
||||
data = raw_package_data[package_name]
|
||||
break
|
||||
else:
|
||||
message = f'unknown package: {module}'
|
||||
raise ValueError(message)
|
||||
|
||||
for line in module.find('lines'):
|
||||
if line.attrib['hits'] == '1':
|
||||
data['hits'] += 1
|
||||
else:
|
||||
data['misses'] += 1
|
||||
|
||||
total_statements_covered = 0
|
||||
total_statements = 0
|
||||
coverage_data = {}
|
||||
for package_name, data in sorted(raw_package_data.items()):
|
||||
statements_covered = data['hits']
|
||||
statements = statements_covered + data['misses']
|
||||
total_statements_covered += statements_covered
|
||||
total_statements += statements
|
||||
coverage_data[package_name] = {'statements_covered': statements_covered, 'statements': statements}
|
||||
coverage_data['total'] = {'statements_covered': total_statements_covered, 'statements': total_statements}
|
||||
|
||||
coverage_summary = ROOT / 'coverage-summary.json'
|
||||
coverage_summary.write_text(orjson.dumps(coverage_data, option=orjson.OPT_INDENT_2).decode(), encoding='utf-8')
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
GIT_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
cd "$GIT_ROOT" || exit 1
|
||||
|
||||
[[ -x "$(command -v docker)" ]] || (
|
||||
echo "docker not found. Make sure to have docker running to run this job."
|
||||
exit 1
|
||||
)
|
||||
|
||||
RATCHET_EXP_KEEP_NEWLINES=true find "${GIT_ROOT}/.github/workflows" -type f -iname '*.yml' -exec docker run -it --rm -v "${PWD}":"${PWD}" -w "${PWD}" -e RATCHET_EXP_KEEP_NEWLINES=true ghcr.io/sethvargo/ratchet:0.4.0 pin {} \;
|
||||
@@ -1,91 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx,os,dataclasses,datetime,time,argparse,typing as t
|
||||
if (ENV:=os.getenv("PAPERSPACE_API_KEY")) is None:raise RuntimeError('This script requires setting "PAPERSPACE_API_KEY"')
|
||||
HEADERS=httpx.Headers({'Authorization': f'Bearer {ENV}', 'Accept': 'application/json'})
|
||||
API_URL='https://api.paperspace.com/v1'
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Machine:
|
||||
id:str
|
||||
inner:httpx.Client=dataclasses.field(default_factory=lambda:httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60),repr=False)
|
||||
def close(self):self.inner.close()
|
||||
def __del__(self):self.close()
|
||||
def __enter__(self):return self
|
||||
def __exit__(self, *_: t.Any)->None:self.close()
|
||||
@property
|
||||
def metadata(self)->dict[str,t.Any]:return self.inner.get(f'/machines/{self.id}').json()
|
||||
@property
|
||||
def status(self)->t.Literal['off','ready','stopping','starting']:return self.metadata['state']
|
||||
def start(self)->bool:
|
||||
response=self.inner.patch(f'/machines/{self.id}/start')
|
||||
if response.status_code==400 or self.status=='ready':
|
||||
print('machine is already running')
|
||||
return False
|
||||
elif response.status_code!=200:raise ValueError(f'Error while starting machine: {response.json()}')
|
||||
return True
|
||||
def stop(self)->bool:
|
||||
response=self.inner.patch(f'/machines/{self.id}/stop')
|
||||
if response.status_code==400 or self.status=='off':
|
||||
print('machine is already off')
|
||||
return False
|
||||
elif response.status_code!=200:raise ValueError(f'Error while stopping machine {response.json()}')
|
||||
return True
|
||||
@classmethod
|
||||
def ci(cls,template_id:str)->Machine:
|
||||
client = httpx.Client(headers=HEADERS,base_url=API_URL,timeout=60)
|
||||
machines=client.get('/machines',params=dict(limit=1,name='openllm-ci')).json()
|
||||
if len(machines['items']) == 1:
|
||||
return cls(id=machines['items'][0]['id'],inner=client)
|
||||
response=client.post('/machines',json=dict(
|
||||
name=f'openllm-ci-{datetime.datetime.now().timestamp()}', machineType='A4000',templateId=template_id,
|
||||
networkId=os.getenv("PAPERSPACE_NETWORK_ID"), diskSize=500,region='ny2',publicIpType='dynamic',startOnCreate=True))
|
||||
if response.status_code!=200:raise ValueError(f'Failed while creating a machine: {response.json()}')
|
||||
return cls(id=response.json()['data']['id'],inner=client)
|
||||
def actions(self,ip:bool=False,id:bool=False)->str:
|
||||
if ip:return self.metadata["publicIp"]
|
||||
if id:return self.id
|
||||
raise ValueError('cannot be all false.')
|
||||
def main():
|
||||
parser=argparse.ArgumentParser()
|
||||
group=parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--start',metavar='ID')
|
||||
group.add_argument('--stop',metavar='ID')
|
||||
group.add_argument('--delete',metavar='ID')
|
||||
group.add_argument('--ci-template',metavar='ID')
|
||||
parser.add_argument('--output-ip',action='store_true')
|
||||
parser.add_argument('--output-id',action='store_true')
|
||||
args=parser.parse_args()
|
||||
|
||||
if args.ci_template:
|
||||
machine=Machine.ci(args.ci_template)
|
||||
while machine.status!='ready':time.sleep(5)
|
||||
print(machine.actions(args.output_ip,args.output_id))
|
||||
machine.close()
|
||||
elif args.delete:
|
||||
with httpx.Client(headers=HEADERS, base_url=API_URL, timeout=60) as client:
|
||||
response=client.delete(f'/machines/{args.delete}')
|
||||
if response.status_code!=200:
|
||||
print('Error while deleting machine', response.json())
|
||||
return 1
|
||||
elif args.start:
|
||||
with Machine(id=args.start) as machine:
|
||||
if machine.start():
|
||||
while machine.status!='ready':
|
||||
print('Waiting for machine to be ready...')
|
||||
time.sleep(5)
|
||||
else:
|
||||
print('Failed to start machine "%s"', machine.id)
|
||||
return 1
|
||||
elif args.stop:
|
||||
with Machine(id=args.stop) as machine:
|
||||
if machine.stop():
|
||||
while machine.status!='ready':
|
||||
print('Waiting for machine to stop...')
|
||||
time.sleep(5)
|
||||
else:
|
||||
print('Failed to stop machine')
|
||||
return 1
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":raise SystemExit(main())
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
GIT_ROOT="$(git rev-parse --show-toplevel)"
|
||||
cd "$GIT_ROOT" || exit 1
|
||||
|
||||
mirror() {
|
||||
cp $1 $2
|
||||
}
|
||||
|
||||
mirror README.md openllm-python/README.md
|
||||
mirror LICENSE.md openllm-python/LICENSE.md
|
||||
mirror CHANGELOG.md openllm-python/CHANGELOG.md
|
||||
@@ -1,6 +0,0 @@
|
||||
ipython
|
||||
jupyter
|
||||
tomlkit
|
||||
ghapi
|
||||
pre-commit
|
||||
orjson
|
||||
@@ -1,148 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e -o pipefail
|
||||
|
||||
# Function to print script usage
|
||||
print_usage() {
|
||||
echo "Usage: $0 [--release <major|minor|patch|alpha>]"
|
||||
}
|
||||
|
||||
# Function to validate release argument
|
||||
validate_release() {
|
||||
local release=$1
|
||||
|
||||
if [[ $release == "major" || $release == "minor" || $release == "patch" || $release == "alpha" ]]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_membership() {
|
||||
local org="BentoML"
|
||||
local username=$(gh api user | jq -r '.login')
|
||||
if gh api orgs/$org/members/$username -q '.message' | grep -q "Not Found"; then
|
||||
echo "ERROR: You must be a member of $org to run this script."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
for cmd in gh jq; do
|
||||
if ! command -v "$cmd" @ >&1 >/dev/null; then
|
||||
echo "ERROR: $cmd not installed. Aborting..."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
check_membership
|
||||
|
||||
# Check if release flag is provided
|
||||
if [[ $1 == "--release" ]]; then
|
||||
# Check if release argument is provided
|
||||
if [[ -z $2 ]]; then
|
||||
echo "Error: No release argument provided."
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
release=$2
|
||||
|
||||
if ! validate_release "$release"; then
|
||||
echo "Error: Invalid release argument. Only 'major', 'minor', 'patch', or 'alpha' are allowed."
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Error: Unknown option or no option provided."
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the current version and separate the alpha part if it exists
|
||||
version="$(git describe --tags "$(git rev-list --tags --max-count=1)")"
|
||||
VERSION="${version#v}"
|
||||
|
||||
# Initialize variables for alpha versioning
|
||||
ALPHA=""
|
||||
ALPHA_NUM=0
|
||||
|
||||
# Check if current version is an alpha version and split accordingly
|
||||
if [[ $VERSION =~ -alpha ]]; then
|
||||
IFS='-' read -r BASE_VERSION ALPHA <<<"$VERSION"
|
||||
if [[ $ALPHA =~ [.] ]]; then
|
||||
IFS='.' read -r ALPHA ALPHA_NUM <<<"$ALPHA"
|
||||
fi
|
||||
else
|
||||
BASE_VERSION="$VERSION"
|
||||
fi
|
||||
|
||||
# Save the current value of IFS to restore it later and split the base version
|
||||
OLD_IFS=$IFS
|
||||
IFS='.'
|
||||
read -ra VERSION_BITS <<<"$BASE_VERSION"
|
||||
IFS=$OLD_IFS
|
||||
|
||||
# Assign split version numbers
|
||||
VNUM1=${VERSION_BITS[0]}
|
||||
VNUM2=${VERSION_BITS[1]}
|
||||
VNUM3=${VERSION_BITS[2]}
|
||||
|
||||
# Adjust the version numbers based on the release type
|
||||
if [[ $release == 'major' ]]; then
|
||||
VNUM1=$((VNUM1 + 1))
|
||||
VNUM2=0
|
||||
VNUM3=0
|
||||
ALPHA="" # Reset alpha for major release
|
||||
elif [[ $release == 'minor' ]]; then
|
||||
if [[ -n $ALPHA ]]; then
|
||||
ALPHA="" # Remove alpha suffix for minor release from an alpha version
|
||||
else
|
||||
VNUM2=$((VNUM2 + 1))
|
||||
VNUM3=0
|
||||
fi
|
||||
elif [[ $release == 'patch' ]]; then
|
||||
VNUM3=$((VNUM3 + 1))
|
||||
ALPHA="" # Reset alpha for patch release
|
||||
elif [[ $release == 'alpha' ]]; then
|
||||
if [ -n "$ALPHA" ]; then
|
||||
ALPHA_NUM=$((ALPHA_NUM + 1))
|
||||
else
|
||||
VNUM2=$((VNUM2 + 1))
|
||||
VNUM3=0
|
||||
ALPHA="alpha"
|
||||
ALPHA_NUM=0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Construct the new version string
|
||||
if [ -n "$ALPHA" ]; then
|
||||
if ((ALPHA_NUM > 0)); then
|
||||
RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3-alpha.$ALPHA_NUM"
|
||||
else
|
||||
RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3-alpha"
|
||||
fi
|
||||
else
|
||||
RELEASE_TAG="v$VNUM1.$VNUM2.$VNUM3"
|
||||
fi
|
||||
|
||||
echo "Releasing version: $RELEASE_TAG"
|
||||
|
||||
if [[ -v DRYRUN ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Running release actions (create-releases.yml)..."
|
||||
echo '{"release_type": "'"$release"'"}' | gh workflow run create-releases.yml --repo bentoml/openllm --json
|
||||
|
||||
sleep 20
|
||||
|
||||
set -x
|
||||
|
||||
echo "Waiting for new tags to be released from 'create-releases.yml'"
|
||||
while true; do
|
||||
git pull --autostash --no-edit --gpg-sign --ff origin main
|
||||
if git ls-remote -t --exit-code origin "refs/tags/${RELEASE_TAG}" &>/dev/null; then
|
||||
break
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
@@ -1,9 +0,0 @@
|
||||
pytest
|
||||
pytest-cov
|
||||
pytest-mock
|
||||
pytest-randomly
|
||||
pytest-rerunfailures
|
||||
pytest-asyncio>=0.21.0
|
||||
coverage[toml]
|
||||
filelock>=3.7.1
|
||||
pytest-xdist[psutil]
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
DEBUG=${DEBUG:-false}
|
||||
[[ "${DEBUG}" == "true" ]] && set -x
|
||||
|
||||
GIT_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
cd "$GIT_ROOT" || exit 1
|
||||
|
||||
[[ -x "$(command -v docker)" ]] || (
|
||||
echo "docker not found. Make sure to have docker running to run this job."
|
||||
exit 1
|
||||
)
|
||||
|
||||
docker version &>/dev/null || (
|
||||
echo "docker is not healthy. Make sure to have docker running"
|
||||
exit 1
|
||||
)
|
||||
|
||||
[[ -z "${ACTIONS_TOKEN}" ]] && (
|
||||
echo "ACTIONS_TOKEN not found. Make sure to have ACTIONS_TOKEN set to run this job."
|
||||
exit 1
|
||||
)
|
||||
|
||||
find "${GIT_ROOT}/.github/workflows" -type f -iname '*.yml' -exec docker run --rm -v "${PWD}":"${PWD}" -w "${PWD}" -e ACTIONS_TOKEN -e RATCHET_EXP_KEEP_NEWLINES=true ghcr.io/sethvargo/ratchet:0.4.0 update {} \;
|
||||
@@ -1,170 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
|
||||
# currently we are assuming the indentatio level is 2 for comments
|
||||
START_COMMENT = f'# {os.path.basename(__file__)}: start\n'
|
||||
END_COMMENT = f'# {os.path.basename(__file__)}: stop\n'
|
||||
# Stubs for auto class
|
||||
START_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs start\n'
|
||||
END_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs stop\n'
|
||||
# Stubs for actual imports
|
||||
START_IMPORT_STUBS_COMMENT = f'# {os.path.basename(__file__)}: import stubs start\n'
|
||||
END_IMPORT_STUBS_COMMENT = f'# {os.path.basename(__file__)}: import stubs stop\n'
|
||||
|
||||
ROOT = Path(__file__).parent.parent
|
||||
_TARGET_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / '_configuration.py'
|
||||
_TARGET_AUTO_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'config' / 'configuration_auto.py'
|
||||
_TARGET_CORE_INIT_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'config' / '__init__.py'
|
||||
_TARGET_INIT_FILE = ROOT / 'openllm-python' / 'src' / 'openllm' / '__init__.pyi'
|
||||
_TARGET_IMPORT_UTILS_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'utils' / 'import_utils.pyi'
|
||||
|
||||
sys.path.insert(0, (ROOT / 'openllm-core' / 'src').__fspath__())
|
||||
from openllm_core._configuration import GenerationConfig, ModelSettings
|
||||
from openllm_core.config.configuration_auto import CONFIG_MAPPING_NAMES
|
||||
from openllm_core.utils import codegen, import_utils as iutils
|
||||
# from openllm_core.utils.peft import PeftType
|
||||
|
||||
|
||||
def process_annotations(annotations: str) -> str:
|
||||
if 'NotRequired' in annotations:
|
||||
return annotations[len('NotRequired[') : -1]
|
||||
elif 'Required' in annotations:
|
||||
return annotations[len('Required[') : -1]
|
||||
else:
|
||||
return annotations
|
||||
|
||||
|
||||
_transformed = {'fine_tune_strategies': 't.Dict[AdapterType, FineTuneConfig]'}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
with _TARGET_FILE.open('r') as f:
|
||||
processed = f.readlines()
|
||||
|
||||
start_idx, end_idx = processed.index(' ' * 2 + START_COMMENT), processed.index(' ' * 2 + END_COMMENT)
|
||||
|
||||
# NOTE: inline runtime __getitem__ overload process
|
||||
lines: list[str] = []
|
||||
lines.append(' ' * 2 + '# NOTE: ModelSettings arguments\n')
|
||||
for keys, ForwardRef in codegen.get_annotations(ModelSettings).items():
|
||||
lines.extend([
|
||||
' ' * 2 + line
|
||||
for line in [
|
||||
'@overload\n',
|
||||
f"def __getitem__(self, item: t.Literal['{keys}']) -> {process_annotations(ForwardRef.__forward_arg__)}: ...\n",
|
||||
]
|
||||
])
|
||||
lines.append(' ' * 2 + '# NOTE: GenerationConfig arguments\n')
|
||||
generation_config_anns = codegen.get_annotations(GenerationConfig)
|
||||
for keys, type_pep563 in generation_config_anns.items():
|
||||
lines.extend([
|
||||
' ' * 2 + line
|
||||
for line in ['@overload\n', f"def __getitem__(self, item: t.Literal['{keys}']) -> {type_pep563}: ...\n"]
|
||||
])
|
||||
# backward compatible
|
||||
lines.extend([
|
||||
' ' * 2 + line
|
||||
for line in [
|
||||
'@overload\n',
|
||||
"def __getitem__(self, item: t.Literal['max_new_tokens']) -> int: ...\n",
|
||||
'@overload\n',
|
||||
"def __getitem__(self, item: t.Literal['start_name']) -> str: ...\n",
|
||||
'@overload\n',
|
||||
"def __getitem__(self, item: t.Literal['model_name']) -> str: ...\n",
|
||||
]
|
||||
])
|
||||
|
||||
processed = (
|
||||
processed[:start_idx] + [' ' * 2 + START_COMMENT, *lines, ' ' * 2 + END_COMMENT] + processed[end_idx + 1 :]
|
||||
)
|
||||
with _TARGET_FILE.open('w') as f:
|
||||
f.writelines(processed)
|
||||
|
||||
with _TARGET_AUTO_FILE.open('r') as f:
|
||||
processed = f.readlines()
|
||||
|
||||
start_auto_stubs_idx, end_auto_stubs_idx = (
|
||||
processed.index(' ' * 2 + START_AUTO_STUBS_COMMENT),
|
||||
processed.index(' ' * 2 + END_AUTO_STUBS_COMMENT),
|
||||
)
|
||||
lines = []
|
||||
for model, class_name in CONFIG_MAPPING_NAMES.items():
|
||||
lines.extend([
|
||||
' ' * 2 + line
|
||||
for line in [
|
||||
'@t.overload\n',
|
||||
'@classmethod\n',
|
||||
f"def for_model(cls, model_name: t.Literal['{model}'], **attrs: t.Any) -> openllm_core.config.{class_name}: ...\n",
|
||||
]
|
||||
])
|
||||
lines.extend([
|
||||
' ' * 2 + line
|
||||
for line in [
|
||||
'@t.overload\n',
|
||||
'@classmethod\n',
|
||||
'def for_model(cls, model_name: LiteralString, **attrs: t.Any) -> openllm_core.LLMConfig: ...\n',
|
||||
]
|
||||
])
|
||||
processed = (
|
||||
processed[:start_auto_stubs_idx]
|
||||
+ [' ' * 2 + START_AUTO_STUBS_COMMENT, *lines, ' ' * 2 + END_AUTO_STUBS_COMMENT]
|
||||
+ processed[end_auto_stubs_idx + 1 :]
|
||||
)
|
||||
with _TARGET_AUTO_FILE.open('w') as f:
|
||||
f.writelines(processed)
|
||||
|
||||
with _TARGET_INIT_FILE.open('r') as f:
|
||||
processed = f.readlines()
|
||||
|
||||
start_import_stubs_idx, end_import_stubs_idx = (
|
||||
processed.index(START_IMPORT_STUBS_COMMENT),
|
||||
processed.index(END_IMPORT_STUBS_COMMENT),
|
||||
)
|
||||
mm = {
|
||||
'_configuration': ('GenerationConfig', 'LLMConfig'),
|
||||
'_schemas': ('GenerationInput', 'GenerationOutput', 'MetadataOutput', 'MessageParam'),
|
||||
'utils': ('api',),
|
||||
}
|
||||
lines = [
|
||||
'from openllm_client import AsyncHTTPClient as AsyncHTTPClient, HTTPClient as HTTPClient',
|
||||
f'from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, {", ".join([a+" as "+a for a in CONFIG_MAPPING_NAMES.values()])}',
|
||||
]
|
||||
lines.extend([
|
||||
f'from openllm_core.{module} import {", ".join([a+" as "+a for a in attr])}' for module, attr in mm.items()
|
||||
])
|
||||
processed = (
|
||||
processed[:start_import_stubs_idx]
|
||||
+ [START_IMPORT_STUBS_COMMENT, '\n'.join(lines) + '\n', END_IMPORT_STUBS_COMMENT]
|
||||
+ processed[end_import_stubs_idx + 1 :]
|
||||
)
|
||||
with _TARGET_INIT_FILE.open('w') as f:
|
||||
f.writelines(processed)
|
||||
|
||||
lines = [
|
||||
'# fmt: off\n',
|
||||
f'# AUTOGENERATED BY {os.path.basename(__file__)}. DO NOT EDIT\n',
|
||||
'from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig\n',
|
||||
*[f'from .configuration_{k} import {a} as {a}\n' for k, a in CONFIG_MAPPING_NAMES.items()],
|
||||
]
|
||||
with _TARGET_CORE_INIT_FILE.open('w') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
lines = [
|
||||
'# fmt: off\n',
|
||||
f'# AUTOGENERATED BY {os.path.basename(__file__)}. DO NOT EDIT\n',
|
||||
'import typing as t\n',
|
||||
'def is_autoawq_available() -> bool: ...\n',
|
||||
'def is_vllm_available() -> bool: ...\n',
|
||||
*[f'def {k}() -> bool: ...\n' for k in iutils.caller],
|
||||
'ENV_VARS_TRUE_VALUES: t.Set[str] = ...\n',
|
||||
]
|
||||
with _TARGET_IMPORT_UTILS_FILE.open('w') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
@@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import concurrent.futures
|
||||
import configparser
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
_MYPY_CONFIG = {
|
||||
'pretty': 'true',
|
||||
'python_version': '3.9',
|
||||
'show_error_codes': 'true',
|
||||
'strict': 'true',
|
||||
'plugins': 'pydantic.mypy',
|
||||
'ignore_missing_imports': 'true',
|
||||
'warn_unreachable': 'true',
|
||||
'explicit_package_bases': 'true',
|
||||
}
|
||||
|
||||
|
||||
# Function to find .pyi files in a given directory
|
||||
def pyi_in_subdir(directory: str, git_root: str) -> List[str]:
|
||||
pyi_files = []
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith('.pyi') or file == '_typing_compat.py' or '_openllm_tiny' in file:
|
||||
full_path = os.path.join(root, file)
|
||||
# Convert to relative path with respect to the git root
|
||||
relative_path = os.path.relpath(full_path, git_root)
|
||||
pyi_files.append(relative_path)
|
||||
return pyi_files
|
||||
|
||||
|
||||
def find_pyi_files(git_root: str) -> List[str]:
|
||||
# List all subdirectories
|
||||
subdirectories = [
|
||||
os.path.join(git_root, name)
|
||||
for name in os.listdir(git_root)
|
||||
if os.path.isdir(os.path.join(git_root, name)) and name not in ['venv', '.git', '.venv']
|
||||
]
|
||||
|
||||
# Use a thread pool to execute searches concurrently
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
# Map of future to subdirectory
|
||||
future_to_subdir = {executor.submit(pyi_in_subdir, subdir, git_root): subdir for subdir in subdirectories}
|
||||
|
||||
all_pyi_files = set()
|
||||
for future in concurrent.futures.as_completed(future_to_subdir):
|
||||
pyi_files = future.result()
|
||||
all_pyi_files.update(pyi_files)
|
||||
|
||||
return list(all_pyi_files)
|
||||
|
||||
|
||||
# Function to update mypy.ini file
|
||||
def update_mypy_ini(pyi_files: List[str], mypy_ini_path: str) -> int:
|
||||
config = configparser.ConfigParser()
|
||||
config.read(mypy_ini_path)
|
||||
|
||||
# Existing files from mypy.ini
|
||||
existing_files = config.get('mypy', 'files', fallback='').split(', ')
|
||||
|
||||
# Add new .pyi files if they are not already in the list
|
||||
updated_files = existing_files + [f for f in pyi_files if f not in existing_files]
|
||||
|
||||
# Update the 'files' entry
|
||||
config['mypy']['files'] = ', '.join(updated_files)
|
||||
for key, value in _MYPY_CONFIG.items():
|
||||
config.set('mypy', key, value)
|
||||
|
||||
# Write changes back to mypy.ini
|
||||
with open(mypy_ini_path, 'w') as configfile:
|
||||
configfile.write(
|
||||
f'# The following is autogenerated by {os.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__))}\n'
|
||||
)
|
||||
config.write(configfile)
|
||||
# Remove last newline if exists
|
||||
with open(mypy_ini_path, 'rb+') as file:
|
||||
file.seek(-1, os.SEEK_END)
|
||||
if file.read(1) == b'\n':
|
||||
file.seek(-1, os.SEEK_END)
|
||||
file.truncate()
|
||||
return 0
|
||||
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
MYPY_CONFIG = os.path.join(ROOT, 'mypy.ini')
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(update_mypy_ini(find_pyi_files(ROOT), MYPY_CONFIG))
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import os, shutil, sys, tomlkit
|
||||
|
||||
START_COMMENT = f'<!-- {os.path.basename(__file__)}: start -->\n'
|
||||
END_COMMENT = f'<!-- {os.path.basename(__file__)}: stop -->\n'
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(ROOT, 'openllm-core', 'src'))
|
||||
from openllm_core.config import CONFIG_MAPPING
|
||||
from openllm_core.config.configuration_auto import CONFIG_TO_ALIAS_NAMES
|
||||
|
||||
|
||||
def markdown_noteblock(text: str):
|
||||
return ['\n', f'> **Note:** {text}\n']
|
||||
|
||||
|
||||
def main() -> int:
|
||||
with open(os.path.join(ROOT, 'README.md'), 'r') as f:
|
||||
readme = f.readlines()
|
||||
|
||||
start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT)
|
||||
|
||||
content = []
|
||||
|
||||
for it in CONFIG_MAPPING.values():
|
||||
it = it()
|
||||
architecture_name = it.__class__.__name__[:-6]
|
||||
details_block = ['<details>\n', f'<summary>{architecture_name}</summary>\n\n', '### Quickstart\n']
|
||||
nitem = CONFIG_TO_ALIAS_NAMES[it.__class__.__name__]
|
||||
details_block.extend([
|
||||
f'Run the following command to quickly spin up a {architecture_name} server:\n',
|
||||
f"""\
|
||||
```bash
|
||||
openllm start {it['default_id']}{'' if not it['trust_remote_code'] else ' --trust-remote-code'}
|
||||
```""",
|
||||
'You can run the following code in a different terminal to interact with the server:',
|
||||
"""\
|
||||
```python
|
||||
import openllm_client
|
||||
client = openllm_client.HTTPClient('http://localhost:3000')
|
||||
client.generate('What are large language models?')
|
||||
```""",
|
||||
*markdown_noteblock(
|
||||
f'Any {architecture_name} variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search={nitem}) to see more {architecture_name}-compatible models.\n'
|
||||
),
|
||||
'\n### Supported models\n',
|
||||
f'You can specify any of the following {architecture_name} models via `openllm start`:\n\n',
|
||||
])
|
||||
list_ids = [f'- [{model_id}](https://huggingface.co/{model_id})' for model_id in it['model_ids']]
|
||||
details_block.extend(list_ids)
|
||||
details_block.append('\n</details>\n\n')
|
||||
|
||||
content.append('\n'.join(details_block))
|
||||
|
||||
readme = readme[:start_index] + [START_COMMENT] + content + [END_COMMENT] + readme[stop_index + 1 :]
|
||||
with open(os.path.join(ROOT, 'README.md'), 'w') as f:
|
||||
f.writelines(readme)
|
||||
|
||||
shutil.copyfile(os.path.join(ROOT, 'README.md'), os.path.join(ROOT, 'openllm-python', 'README.md'))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
from decimal import ROUND_DOWN, Decimal
|
||||
from pathlib import Path
|
||||
import orjson
|
||||
|
||||
PRECISION = Decimal('.01')
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def main() -> int:
|
||||
coverage_summary = ROOT / 'coverage-summary.json'
|
||||
|
||||
coverage_data = orjson.loads(coverage_summary.read_text(encoding='utf-8'))
|
||||
total_data = coverage_data.pop('total')
|
||||
|
||||
lines = ['\n', 'Package | Statements\n', '------- | ----------\n']
|
||||
|
||||
for package, data in sorted(coverage_data.items()):
|
||||
statements_covered = data['statements_covered']
|
||||
statements = data['statements']
|
||||
|
||||
rate = Decimal(statements_covered) / Decimal(statements) * 100
|
||||
rate = rate.quantize(PRECISION, rounding=ROUND_DOWN)
|
||||
lines.append(f'{package} | {100 if rate == 100 else rate}% ({statements_covered} / {statements})\n')
|
||||
|
||||
total_statements_covered = total_data['statements_covered']
|
||||
total_statements = total_data['statements']
|
||||
total_rate = Decimal(total_statements_covered) / Decimal(total_statements) * 100
|
||||
total_rate = total_rate.quantize(PRECISION, rounding=ROUND_DOWN)
|
||||
color = 'ok' if float(total_rate) >= 95 else 'critical'
|
||||
lines.insert(0, f'\n')
|
||||
|
||||
lines.append(
|
||||
f'**Summary** | {100 if total_rate == 100 else total_rate}% ({total_statements_covered} / {total_statements})\n'
|
||||
)
|
||||
|
||||
coverage_report = ROOT / 'coverage-report.md'
|
||||
with coverage_report.open('w', encoding='utf-8') as f:
|
||||
f.write(''.join(lines))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
26
wheels.sh
26
wheels.sh
@@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
GIT_ROOT="$(git rev-parse --show-toplevel)"
|
||||
cd "$GIT_ROOT" || exit 1
|
||||
|
||||
mkdir -p dist
|
||||
|
||||
pushd openllm-client &>/dev/null
|
||||
|
||||
python -m build -w && mv dist/* ../dist
|
||||
|
||||
popd &>/dev/null
|
||||
|
||||
pushd openllm-core &>/dev/null
|
||||
|
||||
python -m build -w && mv dist/* ../dist
|
||||
|
||||
popd &>/dev/null
|
||||
|
||||
pushd openllm-python &>/dev/null
|
||||
|
||||
python -m build -w && mv dist/* ../dist
|
||||
|
||||
popd &>/dev/null
|
||||
Reference in New Issue
Block a user