commit dd8b6050b2742138eaf1c1d2eba7e73cb2378a8b Author: Chaoyu Date: Tue Apr 18 17:27:53 2023 -0700 feat: FLAN-T5 supports - add infrastructure, to be implemented: cache, chat history - Base Runnable Implementation, that fits LangChain API - Added a Prompt descriptor and utils. feat: license headers and auto factory impl and CLI Auto construct args from pydantic config Add auto factory for ease of use only provide `/generate` to streamline UX experience CLI > envvar > input contract for configuration fix: serve from a thread fix CLI args chore: cleanup names and refactor imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> diff --git a/.bazelignore b/.bazelignore new file mode 100644 index 00000000..e3fbd983 --- /dev/null +++ b/.bazelignore @@ -0,0 +1,2 @@ +build +node_modules diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 00000000..625f1e43 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,3 @@ +# load bazelrc from the legacy location as recommended +# in https://github.com/bazelbuild/bazel/issues/6319 +import %workspace%/tools/bazel.rc diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 00000000..09b254e9 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +6.0.0 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..aa8a63dc --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @aarnphm @parano @ssheng diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..96a6be27 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,57 @@ +name: 🐛 Bug Report +description: Create a bug report on OpenLLM. +title: '' +labels: [''] +body: + - type: markdown + id: exists + attributes: + value: | + Please search to see if an issue already exists for the bug you encountered. + See [Searching Issues and Pull Requests](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests) for how to use the GitHub search bar and filters. + - type: textarea + id: describe-the-bug + validations: + required: true + attributes: + label: Describe the bug + description: | + Please provide a clear and concise description about the problem you ran into. + placeholder: This happened when I... + - type: textarea + id: to-reproduce + validations: + required: false + attributes: + label: To reproduce + description: | + Please provide a code sample or a code snipet to reproduce said problem. If you have code snippets, error messages, stack trace please also provide them here. + + **IMPORTANT**: make sure to use [code tag](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#syntax-highlighting) to correctly format your code. Screenshot is helpful but don't use it for code snippets as it doesn't allow others to copy-and-paste your code. + + To give us more information for diagnosing the issue, it would be great if you can provide a minimal reproducible! + placeholder: | + Steps to reproduce the bug: + + 1. Provide '...' + 2. Run '...' + 3. See error + - type: textarea + id: logs + attributes: + label: Logs + description: 'Please include the Python logs if you can.' + render: shell + - type: textarea + id: environment-info + attributes: + label: Environment + description: | + Please share your environment with us. You should run `bentoml env`, `transformers-cli env` and paste the result here. + placeholder: | + bentoml: ... + transformers: ... + python: ... + platform: ... + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..1980b167 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,9 @@ +blank_issues_enabled: true +version: 2.1 +contact_links: + - name: Blank issues + url: https://github.com/bentoml/open-llm-server/issues/new + about: To create a blank issue + - name: BentoML Discussions + url: https://github.com/bentoml/BentoML/discussions + about: Please ask general questions here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..a542e870 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,35 @@ +name: 🚀 Feature Request +description: Submit a proposal/request for new OpenLLM features. +title: '' +labels: [''] +body: + - type: textarea + id: feature-request + validations: + required: true + attributes: + label: Feature request + description: | + A clear and concise description of the feature request. + placeholder: | + I would like it if... + - type: textarea + id: motivation + validations: + required: false + attributes: + label: Motivation + description: | + Please outline the motivation for this feature request. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. + If this is related to another issue, please link here too. + If you have a current workaround, please also provide it here. + placeholder: | + This feature would solve ... + - type: textarea + id: other + attributes: + label: Other + description: | + Is there any way that you could help, e.g. by submitting a PR? + placeholder: | + I would love to contribute ... diff --git a/.github/actions/setup-repo/action.yml b/.github/actions/setup-repo/action.yml new file mode 100644 index 00000000..625651c2 --- /dev/null +++ b/.github/actions/setup-repo/action.yml @@ -0,0 +1,46 @@ +name: Setup repo +description: Setup repo with all features on CI +inputs: + python-version: + description: 'Python version' + required: true + default: '3.8' + architecture: + description: 'Which architecture to run on' + required: true + default: x64 +runs: + using: composite + steps: + - name: Fetch base reference. + shell: bash + run: git fetch origin $GITHUB_BASE_REF + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python-version }} + architecture: ${{ inputs.architecture }} + - name: Setup node + uses: actions/setup-node@v3 + with: + node-version: '17' + - name: Get cache key prefix + id: get-cache-key-prefix + shell: bash + run: echo "prefix=${{ runner.os }}-${{ inputs.python-version }}" >> $GITHUB_OUTPUT + - name: Get pip cache dir + id: pip-cache-dir + shell: bash + run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.pip-cache-dir.outputs.dir }} + key: ${{ steps.get-cache-key-prefix.outputs.prefix }}-pypi + restore-keys: | + ${{ steps.get-cache-key-prefix.outputs.prefix }}-pypi- + # TODO: setup hatch actions instead + - name: Install dependencies + shell: bash + run: pip install -r requirements/tests.txt diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..d036e3ef --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,15 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: '/' + schedule: + interval: 'weekly' + day: 'monday' + time: '09:00' + # maintain required dependencies + - package-ecosystem: pip + directory: '/' + schedule: + interval: 'daily' + open-pull-requests-limit: 5 + versioning-strategy: increase-if-necessary diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..b96f3e9c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,211 @@ +name: ci +on: + push: + branches: [main] + pull_request: + schedule: + - cron: '0 0 * * 1/2' +env: + LINES: 120 + COLUMNS: 120 + BENTOML_DO_NOT_TRACK: True + PYTEST_PLUGINS: bentoml.testing.pytest.plugin +# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun +defaults: + run: + shell: bash --noprofile --norc -exo pipefail {0} +jobs: + codestyle_check: + runs-on: ubuntu-latest + needs: + - diff + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.bentoml == 'true') || github.event_name == 'push' }} + steps: + - uses: actions/checkout@v3 + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: '3.10.6' + architecture: x64 + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + - name: Fetch origin + run: git fetch origin "$GITHUB_BASE_REF" + - name: Setup node + uses: actions/setup-node@v3 + with: + node-version: '17' + - name: Install pyright + run: | + npm install -g npm@^7 pyright + - name: Setup bufbuild/buf + uses: bufbuild/buf-setup-action@v1.17.0 + with: + github_token: ${{ github.token }} + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: codestyle-${{ hashFiles('requirements/dev-requirements.txt') }} + - name: Install dependencies + run: | + pip install . + pip install -r requirements/dev-requirements.txt + - name: Format check + run: | + black --check src examples tests + black --check --pyi typings + isort --check . + - name: Lint check + run: ruff check src tests examples + - name: Type check + if: ${{ github.event_name == 'pull_request' }} + run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright + - name: Proto check + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.protos == 'true') || github.event_name == 'push' }} + run: | + buf lint --config "src/bentoml/grpc/buf.yaml" --error-format msvs src + documentation_spelling_check: + runs-on: ubuntu-latest + needs: + - diff + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.docs == 'true') || github.event_name == 'push' }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetch all tags and branches + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: ${{ runner.os }}-docs-${{ hashFiles('requirements/docs-requirements.txt') }} + - name: Install dependencies + run: | + pip install . + pip install -r requirements/docs-requirements.txt + - name: Install libenchant + run: | + sudo apt-get update && sudo apt-get install -y libenchant-2-dev + - name: Run spellcheck script + run: make spellcheck-docs + unit_tests: + needs: + - diff + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.7', '3.8', '3.9', '3.10'] + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.bentoml == 'true') || github.event_name == 'push' }} + name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetch all tags and branches + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }} + - name: Install dependencies + run: | + pip install ".[grpc]" + pip install -r requirements/tests-requirements.txt + - name: Run unit tests + run: | + OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append) + if [ "${{ matrix.os }}" != 'windows-latest' ]; then + # we will use pytest-xdist to improve tests run-time. + OPTS=(${OPTS[@]} --dist loadfile -n auto --run-grpc-tests) + fi + # Now run the unit tests + coverage run -m pytest tests/unit "${OPTS[@]}" + bento_server_e2e_tests: + needs: + - diff + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.7', '3.8', '3.9', '3.10'] + server_type: ['http', 'grpc'] + exclude: + - os: windows-latest + server_type: 'grpc' + - os: macos-latest + server_type: 'grpc' + python-version: '3.10' + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.bentoml == 'true') || github.event_name == 'push' }} + name: python${{ matrix.python-version }}_${{ matrix.server_type }}_e2e_tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + timeout-minutes: 20 + env: + SETUPTOOLS_USE_DISTUTILS: stdlib + BENTOML_BUNDLE_LOCAL_BUILD: True + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetch all tags and branches + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + if: ${{ matrix.os == 'ubuntu-latest' }} + - name: Set up Docker Buildx + if: ${{ matrix.os == 'ubuntu-latest' }} + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }} + - name: Install dependencies for ${{ matrix.server_type }}-based tests. + run: | + pip install -r requirements/tests-requirements.txt + if [ "${{ matrix.server_type }}" == 'grpc' ]; then + pip install -e ".[grpc]" + else + pip install -e . + fi + if [ -f "tests/e2e/bento_server_${{ matrix.server_type }}/requirements.txt" ]; then + pip install -r tests/e2e/bento_server_${{ matrix.server_type }}/requirements.txt + fi + - name: Run ${{ matrix.server_type }} tests and generate coverage report + run: | + OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append) + coverage run -m pytest tests/e2e/bento_server_${{ matrix.server_type }} "${OPTS[@]}" +concurrency: + group: ci-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000..ec55784a --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,33 @@ +name: 'codeql' +on: + push: + branches: [main] + pull_request: + # The branches below must be a subset of the branches above + branches: [main] + schedule: + - cron: '37 15 * * 2' +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + strategy: + fail-fast: false + matrix: + language: ['python'] + steps: + - name: Checkout repository + uses: actions/checkout@v3 + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..28c87b44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,130 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +bazel-* diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 00000000..365e7de4 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,185 @@ +load("@bazel_skylib//rules:write_file.bzl", "write_file") + +# rules_python +load("@com_github_bazelbuild_buildtools//buildifier:def.bzl", "buildifier", "buildifier_test") +load("//rules/py/vendorred:pypi.bzl", "entry_point") + +package(default_visibility = ["//:__subpackages__"]) + +exports_files([ + "README.md", + "package.json", + "yarn.lock", +]) + +# export default pyproject.toml config +filegroup( + name = "pyproject", + srcs = ["pyproject.toml"], +) + +genrule( + name = "make-tests-requirements", + srcs = ["@tests//:requirements.bzl"], + outs = ["tests.clean.bzl"], + cmd = " | ".join([ + "cat $<", + "cat >$@", + ]), +) + +write_file( + name = "gen-tests-starlark", + out = "update-tests.sh", + content = [ + # This depends on bash, which is not going to work on Windows. + "#!/usr/bin/env bash", + "cd $BUILD_WORKSPACE_DIRECTORY", + "cp -fv bazel-bin/tests.clean.bzl rules/py/vendorred/tests.bzl", + ], +) + +sh_binary( + name = "vendor-tests", + srcs = ["update-tests.sh"], + data = [":make-tests-requirements"], +) + +genrule( + name = "make-tensorflow-requirements", + srcs = ["@tensorflow//:requirements.bzl"], + outs = ["tensorflow.clean.bzl"], + cmd = " | ".join([ + "cat $<", + "cat >$@", + ]), +) + +write_file( + name = "gen-tensorflow-starlark", + out = "update-tensorflow.sh", + content = [ + # This depends on bash, which is not going to work on Windows. + "#!/usr/bin/env bash", + "cd $BUILD_WORKSPACE_DIRECTORY", + "cp -fv bazel-bin/tensorflow.clean.bzl rules/py/vendorred/tensorflow.bzl", + ], +) + +sh_binary( + name = "vendor-tensorflow", + srcs = ["update-tensorflow.sh"], + data = [":make-tensorflow-requirements"], +) + +genrule( + name = "make-pypi-requirements", + srcs = ["//rules/py/vendorred:pypi.bzl"], + outs = ["pypi.clean.bzl"], + cmd = " | ".join([ + "cat $<", + "cat >$@", + ]), +) + +write_file( + name = "gen-pypi-starlark", + out = "update-pypi.sh", + content = [ + # This depends on bash, which is not going to work on Windows. + "#!/usr/bin/env bash", + "cd $BUILD_WORKSPACE_DIRECTORY", + "cp -fv bazel-bin/pypi.clean.bzl rules/py/vendorred/pypi.bzl", + ], +) + +sh_binary( + name = "vendor-pypi", + srcs = ["update-pypi.sh"], + data = [":make-pypi-requirements"], +) + +[ + alias( + name = "{}_proto".format(version), + actual = "//src/bentoml/grpc/{}:service_proto".format(version), + visibility = ["//visibility:public"], + ) + for version in [ + "v1alpha1", + "v1", + ] +] + +[ + alias( + name = "{}_proto_lint".format(version), + actual = "//src/bentoml/grpc/{}:service_proto_lint".format(version), + visibility = ["//visibility:public"], + ) + for version in [ + "v1alpha1", + "v1", + ] +] + +## Expose public entrypoint for Bazel target. +alias( + name = "cli", + actual = "//src/bentoml_cli:cli", + visibility = ["//visibility:public"], +) + +alias( + name = "sdk", + actual = "//src/bentoml:bentoml", + visibility = ["//visibility:public"], +) + +# thirdparty alias +alias( + name = "buildozer", + actual = "@com_github_bazelbuild_buildtools//buildozer", +) + +buildifier( + name = "buildfmt", +) + +buildifier_test( + name = "buildcheck", + srcs = glob([ + "**/*.bzl", + "**/*.bazel", + ]), +) + +alias( + name = "pyright", + actual = "@npm//:node_modules/pyright/index.js", +) + +alias( + name = "sphinx-build", + actual = entry_point( + "sphinx", + script = "sphinx-build", + ), +) + +alias( + name = "sphinx-autobuild", + actual = entry_point("sphinx-autobuild"), +) + +[ + alias( + name = tool, + actual = entry_point(tool), + ) + for tool in [ + "black", + "isort", + "pylint", + ] +] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..ce3d54b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023 Aaron Pham and BentoML Team. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..3ebca145 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +
+

OpenLLM

+
+ REST/gRPC API server for running any Open Large-Language Model - StableLM, Llama, Alpaca, Dolly, Flan-T5, and more
+ Powered by BentoML 🍱 + HuggingFace 🤗 +
+
diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 00000000..a49cdbaf --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,53 @@ +# TODO: Migrate to bzlmod once 6.0.0 is released. +workspace(name = "com_github_bentoml_bentoml") + +load("//rules:deps.bzl", "bentoml_dependencies") + +bentoml_dependencies() + +load("@com_github_bentoml_plugins//rules:deps.bzl", "plugins_dependencies") + +plugins_dependencies() + +# NOTE: external users wish to use BentoML workspace setup +# should always be loaded in this order. +load("@com_github_bentoml_plugins//rules:workspace0.bzl", "workspace0") + +workspace0() + +load("@com_github_bentoml_plugins//rules:workspace1.bzl", "workspace1") + +workspace1() + +load("@com_github_bentoml_plugins//rules:workspace2.bzl", "workspace2") + +workspace2() + +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + name = "pypi", + requirements = "//requirements:bazel-requirements.lock.txt", +) + +pip_parse( + name = "tensorflow", + requirements = "//requirements:bazel-tensorflow-requirements.lock.txt", +) + +pip_parse( + name = "tests", + requirements = "//requirements:bazel-tests-requirements.lock.txt", +) + +load("//rules/py/vendorred:pypi.bzl", pypi_deps = "install_deps") + +pypi_deps() + +load("//rules/py/vendorred:tests.bzl", tests_deps = "install_deps") + +tests_deps() + +load("//rules/py/vendorred:tensorflow.bzl", tensorflow_deps = "install_deps") + +tensorflow_deps() diff --git a/examples/interaction.py b/examples/interaction.py new file mode 100644 index 00000000..e69de29b diff --git a/package.json b/package.json new file mode 100644 index 00000000..3f1f3259 --- /dev/null +++ b/package.json @@ -0,0 +1,14 @@ +{ + "name": "openllm-tools", + "version": "0.0.0", + "description": "JS tooling for OpenLLM", + "author": "Aaron Pham", + "license": "Apache-2.0", + "dependencies": { + "pyright": "^1.1.279", + "@grpc/grpc-js": "^1.7.1", + "google-protobuf": "^3.21.0", + "grpc-tools": "^1.11.2", + "ts-protoc-gen": "^0.15.0" + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..29a6234c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,188 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "openllm" +dynamic = ["version"] +description = 'OpenLLM: REST/gRPC API server for running any open Large-Language Model - StableLM, Llama, Alpaca, Dolly, Flan-T5, Custom' +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [ + "MLOps", + "AI", + "BentoML", + "Model Serving", + "Model Deployment", + "LLMOps", + "Large Language Model", + "Generative AI", + "Stable Diffusion", + "StableLM", + "Alpaca", + "PyTorch", + "Transformers", +] +authors = [ + { name = "Aaron Pham", email = "aarnphm@bentoml.com" }, + { name = "BentoML Team", email = "contact@bentoml.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + # bentoml[io] includes pydantic, PIL, filetype, pandas and numpy + # bentoml[grpc,grpc-reflection] include grpcio, grpcio-reflection + "bentoml[io,grpc,grpc-reflection]>=1.0.19", + # bentoml[torch] includes torch and transformers + "transformers[torch]>=4.28.1", + # Super fast JSON serialization + "orjson", + # clidantic + "clidantic", +] + +[project.urls] +Documentation = "https://github.com/bentoml/open-llm-server#readme" +Issues = "https://github.com/bentoml/open-llm-server/issues" +Source = "https://github.com/bentoml/open-llm-server" + +[project.scripts] +openllm = "openllm.__main__:cli" + +[tool.hatch.version] +path = "src/openllm/__about__.py" + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + "pytest-asyncio>=0.21.0", + "pytest-xdist[psutil]", + "pytest-cov", + "pytest-mock", + "pytest-randomly", + "pytest-rerunfailures", +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11"] + +[tool.hatch.envs.lint] +detached = true +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243", "pyright"] +[tool.hatch.envs.lint.scripts] +typing = "pyright {args:src/openllm tests}" +style = ["ruff {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] + +[tool.pytest.ini_options] +addopts = ["-rfEX", "-pno:warnings"] +python_files = ["test_*.py", "*_test.py"] +testpaths = ["tests"] + +[tool.black] +target-version = ["py311"] +line-length = 120 +exclude = ''' +( + /( + \.eggs + | \.git + | \.tox + | \.venv + | _build + | .build + | bazel-* + | build + | venv + | lib + | dist + )/ + | src/openllm/__about__.py +) +''' + +[tool.ruff] +target-version = "py311" +line-length = 120 +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Allow boolean positional values in function calls, like `dict.get(... True)` + "FBT003", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", +] +unfixable = [ + "F401", # Don't touch unused imports, just warn about it. +] + +[tool.ruff.pydocstyle] +convention = "google" + +[tool.ruff.isort] +known-first-party = ["openllm", "bentoml"] + +[tool.ruff.flake8-quotes] +inline-quotes = "single" + +[tool.ruff.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] + +[tool.pyright] +pythonVersion = "3.11" +include = ["src/", "tests/"] +analysis.useLibraryCodeForTypes = true +typeCheckingMode = "strict" +strictListInference = true +strictDictionaryInference = true +strictSetInference = true +strictParameterNoneValue = true +enableTypeIgnoreComments = true + + +[tool.coverage.run] +source_pkgs = ["openllm", "tests"] +branch = true +parallel = true +omit = ["src/openllm/__about__.py"] + +[tool.coverage.paths] +openllm = ["src/openllm", "*/openllm/src/openllm"] +tests = ["tests", "*/openllm/tests"] + +[tool.coverage.report] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] diff --git a/rules/BUILD b/rules/BUILD new file mode 100644 index 00000000..c8b1eabb --- /dev/null +++ b/rules/BUILD @@ -0,0 +1,11 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") + +package(default_visibility = ["//:__pkg__"]) + +[ + bzl_library( + name = "{}.bzl".format(src), + srcs = ["{}.bzl".format(src)], + ) + for src in ["deps"] +] diff --git a/rules/deps.bzl b/rules/deps.bzl new file mode 100644 index 00000000..a1dbfa28 --- /dev/null +++ b/rules/deps.bzl @@ -0,0 +1,190 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository") + +# NOTE: sync with pyproject.toml +GRPC_VERSION = "1.51.1" +GRPC_SHA256 = "b55696fb249669744de3e71acc54a9382bea0dce7cd5ba379b356b12b82d4229" +PROTOBUF_VERSION = "21.11" +PROTOBUF_SHA256 = "b1d6dd2cbb5d87e17af41cadb720322ce7e13af826268707bd8db47e5654770b" + +def bentoml_dependencies(): + # bentoml/plugins + maybe( + git_repository, + name = "com_github_bentoml_plugins", + remote = "https://github.com/bentoml/plugins.git", + branch = "main", + ) + + maybe( + http_archive, + name = "bazel_skylib", + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + ], + ) + + maybe( + http_archive, + name = "io_bazel_rules_go", + sha256 = "d6b2513456fe2229811da7eb67a444be7785f5323c6708b38d851d2b51e54d83", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.30.0/rules_go-v0.30.0.zip", + "https://github.com/bazelbuild/rules_go/releases/download/v0.30.0/rules_go-v0.30.0.zip", + ], + ) + + maybe( + http_archive, + name = "io_bazel_rules_docker", + sha256 = "b1e80761a8a8243d03ebca8845e9cc1ba6c82ce7c5179ce2b295cd36f7e394bf", + urls = ["https://github.com/bazelbuild/rules_docker/releases/download/v0.25.0/rules_docker-v0.25.0.tar.gz"], + ) + + maybe( + http_archive, + name = "bazel_gazelle", + sha256 = "de69a09dc70417580aabf20a28619bb3ef60d038470c7cf8442fafcf627c21cb", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz", + ], + ) + + maybe( + http_archive, + name = "rules_proto", + sha256 = "80d3a4ec17354cccc898bfe32118edd934f851b03029d63ef3fc7c8663a7415c", + strip_prefix = "rules_proto-5.3.0-21.5", + urls = [ + "https://github.com/bazelbuild/rules_proto/archive/refs/tags/5.3.0-21.5.tar.gz", + ], + ) + + maybe( + http_archive, + name = "rules_proto_grpc", + strip_prefix = "rules_proto_grpc-4.2.0", + sha256 = "bbe4db93499f5c9414926e46f9e35016999a4e9f6e3522482d3760dc61011070", + urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/archive/4.2.0.tar.gz"], + ) + + maybe( + http_archive, + name = "com_google_protobuf", + strip_prefix = "protobuf-{}".format(PROTOBUF_VERSION), + sha256 = PROTOBUF_SHA256, + urls = [ + "https://github.com/protocolbuffers/protobuf/archive/v{}.tar.gz".format(PROTOBUF_VERSION), + ], + ) + + maybe( + http_archive, + name = "com_github_grpc_grpc", + strip_prefix = "grpc-{}".format(GRPC_VERSION), + sha256 = GRPC_SHA256, + urls = [ + "https://github.com/grpc/grpc/archive/v{}.tar.gz".format(GRPC_VERSION), + ], + ) + + maybe( + http_archive, + name = "rules_foreign_cc", + sha256 = "2a4d07cd64b0719b39a7c12218a3e507672b82a97b98c6a89d38565894cf7c51", + strip_prefix = "rules_foreign_cc-0.9.0", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/refs/tags/0.9.0.tar.gz", + ) + + # buildifier + maybe( + http_archive, + name = "com_github_bazelbuild_buildtools", + sha256 = "ae34c344514e08c23e90da0e2d6cb700fcd28e80c02e23e4d5715dddcb42f7b3", + strip_prefix = "buildtools-4.2.2", + urls = [ + "https://github.com/bazelbuild/buildtools/archive/refs/tags/4.2.2.tar.gz", + ], + ) + + # buf rules + maybe( + http_archive, + name = "rules_buf", + sha256 = "523a4e06f0746661e092d083757263a249fedca535bd6dd819a8c50de074731a", + strip_prefix = "rules_buf-0.1.1", + urls = [ + "https://github.com/bufbuild/rules_buf/archive/refs/tags/v0.1.1.zip", + ], + ) + + # python rules + maybe( + http_archive, + name = "rules_python", + sha256 = "8c15896f6686beb5c631a4459a3aa8392daccaab805ea899c9d14215074b60ef", + strip_prefix = "rules_python-0.17.3", + url = "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.17.3.tar.gz", + ) + + # The following library will need to be built from source. + maybe( + new_git_repository, + name = "com_github_microsoft_lightgbm", + init_submodules = True, + recursive_init_submodules = True, + commit = "f1d3181ced9fd01f4b2899054abd99be6773e939", + build_file = Label("//third_party:BUILD.lightgbm"), + remote = "https://github.com/microsoft/LightGBM.git", + shallow_since = "1667710116 -0500", + ) + + # io_grpc_grpc_java is for java_grpc_library and related dependencies. + # Using commit 0cda133c52ed937f9b0a19bcbfc36bf2892c7aa8 + maybe( + http_archive, + name = "rules_jvm_external", + sha256 = "c21ce8b8c4ccac87c809c317def87644cdc3a9dd650c74f41698d761c95175f3", + strip_prefix = "rules_jvm_external-1498ac6ccd3ea9cdb84afed65aa257c57abf3e0a", + url = "https://github.com/bazelbuild/rules_jvm_external/archive/1498ac6ccd3ea9cdb84afed65aa257c57abf3e0a.zip", + ) + maybe( + http_archive, + name = "io_grpc_grpc_java", + sha256 = "35189faf484096c9eb2928c43b39f2457d1ca39046704ba8c65a69482f8ceed5", + strip_prefix = "grpc-java-0cda133c52ed937f9b0a19bcbfc36bf2892c7aa8", + urls = ["https://github.com/grpc/grpc-java/archive/0cda133c52ed937f9b0a19bcbfc36bf2892c7aa8.tar.gz"], + ) + + # rules_kotlin + maybe( + http_archive, + name = "io_bazel_rules_kotlin", + sha256 = "a57591404423a52bd6b18ebba7979e8cd2243534736c5c94d35c89718ea38f94", + urls = ["https://github.com/bazelbuild/rules_kotlin/releases/download/v1.6.0/rules_kotlin_release.tgz"], + ) + maybe( + http_archive, + name = "com_github_grpc_grpc_kotlin", + sha256 = "b1ec1caa5d81f4fa4dca0662f8112711c82d7db6ba89c928ca7baa4de50afbb2", + strip_prefix = "grpc-kotlin-a1659c1b3fb665e01a6854224c7fdcafc8e54d56", + urls = ["https://github.com/grpc/grpc-kotlin/archive/a1659c1b3fb665e01a6854224c7fdcafc8e54d56.tar.gz"], + ) + + # rules_swift and rules_apple + maybe( + http_archive, + name = "build_bazel_rules_swift", + sha256 = "51efdaf85e04e51174de76ef563f255451d5a5cd24c61ad902feeadafc7046d9", + url = "https://github.com/bazelbuild/rules_swift/releases/download/1.2.0/rules_swift.1.2.0.tar.gz", + ) + maybe( + http_archive, + name = "build_bazel_apple_support", + sha256 = "2e3dc4d0000e8c2f5782ea7bb53162f37c485b5d8dc62bb3d7d7fc7c276f0d00", + url = "https://github.com/bazelbuild/apple_support/releases/download/1.3.2/apple_support.1.3.2.tar.gz", + ) diff --git a/src/openllm/__about__.py b/src/openllm/__about__.py new file mode 100644 index 00000000..3ab2e17e --- /dev/null +++ b/src/openllm/__about__.py @@ -0,0 +1,14 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +__version__ = "0.0.2" diff --git a/src/openllm/__init__.py b/src/openllm/__init__.py new file mode 100644 index 00000000..dcaae940 --- /dev/null +++ b/src/openllm/__init__.py @@ -0,0 +1,228 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +OpenLLM +======= + +OpenLLM: Your one stop-and-go-solution for serving any Open Large-Language Model + +- StableLM, Llama, Alpaca, Dolly, Flan-T5, and more +- Powered by BentoML 🍱 + HuggingFace 🤗 +""" +from __future__ import annotations + +import typing as t + +from .__about__ import __version__ as __version__ +from .exceptions import MissingDependencyError +from .utils import LazyModule as _LazyModule +from .utils import import_utils_shim as imports + +_import_structure = { + "build_utils": [], + # TODO: implement + # "cache": [], + "cli": [], + "configuration_utils": ["LLMConfig"], + "exceptions": [], + "prompts": ["Prompt"], + "runner_utils": ["LLMRunner", "LLMRunnable", "generate_tokenizer_runner"], + "schema": ["PromptTemplate"], + "server_utils": ["start"], + "types": [], + "utils": [ + "get_pretrained_env", + "get_working_dir", + "FRAMEWORK_ENV_VAR", + "generate_service_name", + "generate_tag_from_model_name", + ], + "utils.logging": [], + "models": [], + # NOTE: models + "models.auto": [ + "Config", + "CONFIG_MAPPING", + "Tokenizer", + "TOKENIZER_MAPPING", + "TOKENIZER_MAPPING_NAMES", + ], + "models.flan_t5": ["FlanT5Config", "START_FLAN_T5_COMMAND_DOCSTRING"], +} + +try: + if not imports.is_torch_available(): + raise MissingDependencyError +except MissingDependencyError: + pass +else: + _import_structure["models.flan_t5"].extend(["FlanT5", "FlanT5WithTokenizer", "FlanT5Tokenizer"]) + _import_structure["models.auto"].extend( + [ + "LLM", + "LLMWithTokenizer", + "MODEL_MAPPING_NAMES", + "MODEL_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_MAPPING", + "MODEL_WITH_TOKENIZER_MAPPING", + ] + ) + +try: + if not imports.is_flax_available(): + raise MissingDependencyError +except MissingDependencyError: + pass +else: + _import_structure["models.flan_t5"].extend(["FlaxFlanT5", "FlaxFlanT5WithTokenizer"]) + _import_structure["models.auto"].extend( + [ + "FlaxLLM", + "FlaxLLMWithTokenizer", + "MODEL_FLAX_MAPPING_NAMES", + "MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_FLAX_MAPPING", + "MODEL_FLAX_WITH_TOKENIZER_MAPPING", + ] + ) + +try: + if not imports.is_tf_available(): + raise MissingDependencyError +except MissingDependencyError: + pass +else: + _import_structure["models.flan_t5"].extend(["TFFlanT5", "TFFlanT5WithTokenizer"]) + _import_structure["models.auto"].extend( + [ + "TFLLM", + "TFLLMWithTokenizer", + "MODEL_TF_MAPPING_NAMES", + "MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_TF_MAPPING", + "MODEL_TF_WITH_TOKENIZER_MAPPING", + ] + ) + + +# declaration for OpenLLM-related modules +if t.TYPE_CHECKING: + from . import build_utils as build_utils + from . import cli as cli + from . import configuration_utils as configuration_utils + from . import exceptions as exceptions + from . import models as models + from . import prompts as prompts + from . import runner_utils as runner_utils + from . import schema as schema + from . import server_utils as server_utils + from . import types as types + from . import utils as utils + # Specific types import + from .configuration_utils import LLMConfig as LLMConfig + from .models.auto import CONFIG_MAPPING as CONFIG_MAPPING + from .models.auto import TOKENIZER_MAPPING as TOKENIZER_MAPPING + from .models.auto import TOKENIZER_MAPPING_NAMES as TOKENIZER_MAPPING_NAMES + from .models.auto import Config as Config + from .models.auto import Tokenizer as Tokenizer + from .models.flan_t5 import \ + START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING + from .models.flan_t5 import FlanT5Config as FlanT5Config + from .prompts import Prompt as Prompt + from .runner_utils import LLMRunnable as LLMRunnable + from .runner_utils import LLMRunner as LLMRunner + from .runner_utils import \ + generate_tokenizer_runner as generate_tokenizer_runner + from .schema import PromptTemplate as PromptTemplate + from .server_utils import start as start + from .utils import FRAMEWORK_ENV_VAR as FRAMEWORK_ENV_VAR + from .utils import generate_service_name as generate_service_name + from .utils import \ + generate_tag_from_model_name as generate_tag_from_model_name + from .utils import get_pretrained_env as get_pretrained_env + from .utils import get_working_dir as get_working_dir + + try: + if not imports.is_torch_available(): + raise MissingDependencyError + except MissingDependencyError: + pass + else: + from .models.auto import LLM as LLM + from .models.auto import MODEL_MAPPING as MODEL_MAPPING + from .models.auto import MODEL_MAPPING_NAMES as MODEL_MAPPING_NAMES + from .models.auto import \ + MODEL_WITH_TOKENIZER_MAPPING as MODEL_WITH_TOKENIZER_MAPPING + from .models.auto import \ + MODEL_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_WITH_TOKENIZER_MAPPING_NAMES + from .models.auto import LLMWithTokenizer as LLMWithTokenizer + from .models.flan_t5 import FlanT5 as FlanT5 + from .models.flan_t5 import FlanT5Tokenizer as FlanT5Tokenizer + from .models.flan_t5 import FlanT5WithTokenizer as FlanT5WithTokenizer + + try: + if not imports.is_flax_available(): + raise MissingDependencyError + except MissingDependencyError: + pass + else: + from .models.auto import MODEL_FLAX_MAPPING as MODEL_FLAX_MAPPING + from .models.auto import \ + MODEL_FLAX_MAPPING_NAMES as MODEL_FLAX_MAPPING_NAMES + from .models.auto import \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING as \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING + from .models.auto import \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES + from .models.auto import FlaxLLM as FlaxLLM + from .models.auto import FlaxLLMWithTokenizer as FlaxLLMWithTokenizer + from .models.flan_t5 import FlaxFlanT5 as FlaxFlanT5 + from .models.flan_t5 import \ + FlaxFlanT5WithTokenizer as FlaxFlanT5WithTokenizer + + try: + if not imports.is_tf_available(): + raise MissingDependencyError + except MissingDependencyError: + pass + else: + from .models.auto import MODEL_TF_MAPPING as MODEL_TF_MAPPING + from .models.auto import \ + MODEL_TF_MAPPING_NAMES as MODEL_TF_MAPPING_NAMES + from .models.auto import \ + MODEL_TF_WITH_TOKENIZER_MAPPING as MODEL_TF_WITH_TOKENIZER_MAPPING + from .models.auto import \ + MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES + from .models.auto import TFLLM as TFLLM + from .models.auto import TFLLMWithTokenizer as TFLLMWithTokenizer + from .models.flan_t5 import TFFlanT5 as TFFlanT5 + from .models.flan_t5 import \ + TFFlanT5WithTokenizer as TFFlanT5WithTokenizer + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + module_spec=__spec__, + extra_objects={"__version__": __version__}, + ) + del sys, _LazyModule + +del imports, t, _import_structure, MissingDependencyError diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py new file mode 100644 index 00000000..e9c17f26 --- /dev/null +++ b/src/openllm/__main__.py @@ -0,0 +1,104 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +CLI entrypoint for OpenLLM. + +Usage: + openllm --help + +To start any OpenLLM model: + openllm start --options ... +""" +from __future__ import annotations + +import typing as t + +import click + +import openllm + +_CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} + + +@click.group(cls=openllm.cli.OpenLLMCommandGroup, context_settings=_CONTEXT_SETTINGS) +@click.version_option(openllm.__version__, "-v", "--version") +def cli(): + """ + \b + ██████╗ ██████╗ ███████╗███╗ ██╗██╗ ██╗ ███╗ ███╗ + ██╔═══██╗██╔══██╗██╔════╝████╗ ██║██║ ██║ ████╗ ████║ + ██║ ██║██████╔╝█████╗ ██╔██╗ ██║██║ ██║ ██╔████╔██║ + ██║ ██║██╔═══╝ ██╔══╝ ██║╚██╗██║██║ ██║ ██║╚██╔╝██║ + ╚██████╔╝██║ ███████╗██║ ╚████║███████╗███████╗██║ ╚═╝ ██║ + ╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═══╝╚══════╝╚══════╝╚═╝ ╚═╝ + + \b + OpenLLM: Your one stop-and-go-solution for serving any Open Large-Language Model + + - StableLM, Llama, Alpaca, Dolly, Flan-T5, and more + + \b + - Powered by BentoML 🍱 + HuggingFace 🤗 + """ + + +@cli.group(cls=openllm.cli.StartCommand, context_settings=_CONTEXT_SETTINGS) +def start(): + """ + Start any LLM as a REST server. + + $ openllm start -- ... + """ + + +@cli.group(cls=openllm.cli.StartCommand, context_settings=_CONTEXT_SETTINGS, _serve_grpc=True, name="start-grpc") +def start_grpc(): + """ + Start any LLM as a gRPC server. + + $ openllm start-grpc -- ... + """ + + +@cli.command(aliases=["bundle"]) +def build(): + """ + Package a given models. + + If given format is container, then also package the bundle into a container. + """ + + +@cli.command(hidden=True) +def deploy(): + """ + Deploy a model to a target platform. + + Deployment options: + - BentoCloud + - Self-hosted Yatai + - SageMaker, ECR, EC2 + """ + + +@cli.command(name="supported-models") +def supported_models(): + """ + List all supported models. + """ + click.secho(f"\nSupported LLM: {', '.join(openllm.CONFIG_MAPPING.keys())}", fg="blue") + + +if __name__ == "__main__": + cli() diff --git a/src/openllm/build_utils.py b/src/openllm/build_utils.py new file mode 100644 index 00000000..a06ae784 --- /dev/null +++ b/src/openllm/build_utils.py @@ -0,0 +1,17 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Any build-related utilities. This is used for CI. +""" +from __future__ import annotations diff --git a/src/openllm/cache/__init__.py b/src/openllm/cache/__init__.py new file mode 100644 index 00000000..aa6319c7 --- /dev/null +++ b/src/openllm/cache/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Cache utilities for OpenLLM client. +""" diff --git a/src/openllm/cache/inmemory.py b/src/openllm/cache/inmemory.py new file mode 100644 index 00000000..e69de29b diff --git a/src/openllm/cli.py b/src/openllm/cli.py new file mode 100644 index 00000000..aa9f6607 --- /dev/null +++ b/src/openllm/cli.py @@ -0,0 +1,244 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +CLI utilities for OpenLLM. + +This extends clidantic and BentoML's internal CLI CommandGroup. +""" +from __future__ import annotations + +import difflib +import functools +import inspect +import logging +import typing as t + +import click +from click_option_group import optgroup + +import openllm + +if t.TYPE_CHECKING: + from openllm.types import F, P + + class ClickFunctionProtocol(t.Protocol[P]): + __name__: str + __click_params__: list[click.Option] + + def __call__(*args: P.args, **kwargs: P.kwargs) -> t.Any: + ... + + ServeCommand = t.Literal["serve", "serve-grpc", "start-http-server", "start-grpc-server", "start-runner-server"] + + +logger = logging.getLogger(__name__) + + +class OpenLLMCommandGroup(click.Group): + NUM_COMMON_PARAMS = 2 + + @staticmethod + def common_params(f: F[P]) -> ClickFunctionProtocol[t.Any]: + # The following logics is similar to one of BentoMLCommandGroup + + from bentoml._internal.configuration import (DEBUG_ENV_VAR, + QUIET_ENV_VAR, + set_debug_mode, + set_quiet_mode) + from bentoml._internal.log import configure_logging + + @click.option("-q", "--quiet", envvar=QUIET_ENV_VAR, is_flag=True, default=False, help="Suppress all output.") + @click.option( + "--debug", "--verbose", envvar=DEBUG_ENV_VAR, is_flag=True, default=False, help="Print out debug logs." + ) + @functools.wraps(f) + def wrapper(quiet: bool, debug: bool, *args: P.args, **kwargs: P.kwargs) -> t.Any: + if quiet: + set_quiet_mode(True) + if debug: + logger.warning("'--quiet' passed; ignoring '--verbose/--debug'") + elif debug: + set_debug_mode(True) + + configure_logging() + + return f(*args, **kwargs) + + return wrapper + + def __init__(self, *args: t.Any, **kwargs: t.Any) -> None: + super(OpenLLMCommandGroup, self).__init__(*args, **kwargs) + # these two dictionaries will store known aliases for commands and groups + self._commands: dict[str, list[str]] = {} + self._aliases: dict[str, str] = {} + + # ported from bentoml_cli.utils.BentoMLCommandGroup to handle aliases and command difflib. + def resolve_alias(self, cmd_name: str): + return self._aliases[cmd_name] if cmd_name in self._aliases else cmd_name + + def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None: + cmd_name = self.resolve_alias(cmd_name) + return super(OpenLLMCommandGroup, self).get_command(ctx, cmd_name) + + @staticmethod + def common_chain(f: F[P]) -> ClickFunctionProtocol[t.Any]: + # Wrap implementation withc common parameters + wrapped = OpenLLMCommandGroup.common_params(f) + # TODO: Tracking + # TODO: Handling exception, using ExceptionGroup and Rich + + # move common parameters to end of the parameters list + wrapped.__click_params__ = ( + wrapped.__click_params__[-OpenLLMCommandGroup.NUM_COMMON_PARAMS :] + + wrapped.__click_params__[: -OpenLLMCommandGroup.NUM_COMMON_PARAMS] + ) + return wrapped + + def command(self, *args: t.Any, **kwargs: t.Any) -> t.Callable[[F[P]], click.Command]: + if "context_settings" not in kwargs: + kwargs["context_settings"] = {} + kwargs["context_settings"]["max_content_width"] = 119 + aliases = kwargs.pop("aliases", None) + + def wrapper(f: F[P]) -> click.Command: + name = f.__name__.lower().replace("_", "-") + kwargs.setdefault("help", inspect.getdoc(f)) + kwargs.setdefault("name", name) + + cmd = super(OpenLLMCommandGroup, self).command(*args, **kwargs)(OpenLLMCommandGroup.common_chain(f)) + # add aliases to a given commands if it is specified. + if aliases is not None: + assert cmd.name + self._commands[cmd.name] = aliases + self._aliases.update({alias: cmd.name for alias in aliases}) + + return cmd + + return wrapper + + def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: + rows: list[tuple[str, str]] = [] + sub_commands = self.list_commands(ctx) + + max_len = max(len(cmd) for cmd in sub_commands) + limit = formatter.width - 6 - max_len + + for sub_command in sub_commands: + cmd = self.get_command(ctx, sub_command) + if cmd is None: + continue + # If the command is hidden, then we skip it. + if hasattr(cmd, "hidden") and cmd.hidden: + continue + if sub_command in self._commands: + aliases = ",".join(sorted(self._commands[sub_command])) + sub_command = "%s (%s)" % (sub_command, aliases) + # this cmd_help is available since click>=7 + # BentoML requires click>=7. + cmd_help = cmd.get_short_help_str(limit) + rows.append((sub_command, cmd_help)) + if rows: + with formatter.section("Commands"): + formatter.write_dl(rows) + + def resolve_command( + self, ctx: click.Context, args: list[str] + ) -> tuple[str | None, click.Command | None, list[str]]: + try: + return super(OpenLLMCommandGroup, self).resolve_command(ctx, args) + except click.exceptions.UsageError as e: + error_msg = str(e) + original_cmd_name = click.utils.make_str(args[0]) + matches = difflib.get_close_matches(original_cmd_name, self.list_commands(ctx), 3, 0.5) + if matches: + fmt_matches = "\n ".join(matches) + error_msg += "\n\n" + error_msg += f"Did you mean?\n {fmt_matches}" + raise click.exceptions.UsageError(error_msg, e.ctx) + + +def start_model_command( + model_name: str, _context_settings: dict[str, t.Any] | None = None, _serve_grpc: bool = False +) -> click.Command: + _context_settings = _context_settings or {} + config = openllm.Config.for_model(model_name) + + def decorator(f: F[P]) -> click.Command: + f = openllm.configuration_utils.LLMConfig.generate_click_options(config)(f) + f = parse_serve_args(_serve_grpc)(f) + return click.command( + model_name, + short_help=f"Start a LLMServer for '{model_name}' ('--help' for more details)", + context_settings=_context_settings, + help=getattr(openllm, f"START_{openllm.utils.kebab_to_snake_case(model_name).upper()}_COMMAND_DOCSTRING"), + )(openllm.cli.OpenLLMCommandGroup.common_chain(f)) + + # The actual `start ` implementation + def model_start(**attrs: t.Any): + llm_config_args = {k: attrs[k] for k in config.__fields__ if k in attrs} + # The rest should be server-related args + server_args = {k: v for k, v in attrs.items() if k not in list(llm_config_args.keys())} + + openllm.start(model_name, server_args=server_args, serve_grpc=_serve_grpc, **llm_config_args) + + return decorator(model_start) + + +class StartCommand(click.MultiCommand): + def __init__(self, *args: t.Any, **kwargs: t.Any): + self._serve_grpc = kwargs.pop("_serve_grpc", False) + super(StartCommand, self).__init__(*args, **kwargs) + self._cached_command: dict[str, click.Command] = {} + + def list_commands(self, ctx: click.Context): + return openllm.CONFIG_MAPPING.keys() + + def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command: + if cmd_name not in self._cached_command: + self._cached_command[cmd_name] = start_model_command(cmd_name, _serve_grpc=self._serve_grpc) + return self._cached_command[cmd_name] + + +def parse_serve_args(serve_grpc: bool = False) -> F[P]: + """Parsing `bentoml serve|serve-grpc` click.Option to be parsed via `openllm start`""" + from bentoml_cli.cli import cli + + command = "serve-http" if not serve_grpc else "serve-grpc" + group = optgroup.group( + f"Start a {'HTTP' if not serve_grpc else 'gRPC'} server options", + help=f"Related to serving the model [synonymous to `bentoml {command}`]", + ) + + def decorator(f: F[P]) -> F[P]: + _, serve_command, _ = cli.resolve_command(click.get_current_context(), [command]) + # The first variable is the argument bento + # and the last three are shared default, which we don't need. + serve_options = serve_command.params[1:-3] + for options in reversed(serve_options): + attrs = options.to_info_dict() + # we don't need param_type_name, since it should all be options + attrs.pop("param_type_name") + # name is not a valid args + name = attrs.pop("name") + # type can be determine from default value + attrs.pop("type") + param_decls = (*attrs.pop("opts"), *attrs.pop("secondary_opts")) + # NOTE: User shouldn't set '--working-dir', as OpenLLM will setup this. + # NOTE: production is also deprecated + if name not in ("working_dir", "production"): + f = optgroup.option(*param_decls, **attrs)(f) + + return group(f) + + return decorator diff --git a/src/openllm/configuration_utils.py b/src/openllm/configuration_utils.py new file mode 100644 index 00000000..8f675e10 --- /dev/null +++ b/src/openllm/configuration_utils.py @@ -0,0 +1,101 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Configuration utilities for OpenLLM. All model configuration will inherit from openllm.configuration_utils.LLMConfig. + +Note that ``openllm.configuration_utils.LLMConfig`` is a subclass of ``pydantic.BaseModel``. It also has a ``to_cli`` +that returns a list of Click-compatible options for the model. such options will then be parsed to ``openllm.__main__.cli``. +""" +from __future__ import annotations + +import typing as t +from abc import ABC + +import click +import pydantic +import yaml +from bentoml._internal.models.model import ModelSignature +from click_option_group import optgroup +from clidantic.click import (allows_multiple, parse_default, parse_type, + should_show_default) +from clidantic.convert import param_from_field +from pydantic.utils import lenient_issubclass + +import openllm + +if t.TYPE_CHECKING: + from openllm.types import F, P + + +__all__ = ["LLMConfig", "ModelSignature"] + +delimiter = "." +internal_delimiter = "__" + + +def models_to_options( + model: type[LLMConfig], parent_path: tuple[str, ...] = tuple() +) -> t.Generator[t.Callable[[F[P]], F[P]], None, None]: + # The following logics are inspired from clidantic.convert + for field in model.__fields__.values(): + kebab_name = field.name.replace("_", "-") + assert internal_delimiter not in kebab_name, f"Field name {kebab_name} contains internal delimiter" + if lenient_issubclass(field.outer_type_, pydantic.BaseModel): + yield from models_to_options(field.outer_type_, parent_path=parent_path + (kebab_name,)) + continue + + params = param_from_field(field, kebab_name, delimiter, internal_delimiter, parent_path) + yield optgroup.option( + *params, + type=parse_type(field.outer_type_), + required=field.required, + default=parse_default(field.default, field.outer_type_), + show_default=should_show_default(field.default, field.outer_type_), + multiple=allows_multiple(field.outer_type_), + help=field.field_info.description, + show_envvar=True, + envvar=f"OPENLLM_{model.__name__.partition('Config')[0].upper()}_{field.name.upper()}", + ) + + +class BaseConfig(pydantic.BaseModel, ABC): + class Config: + extra = "forbid" + underscore_attrs_are_private = True + + def with_options(self, **kwargs: t.Any) -> BaseConfig: + return self.copy(update=kwargs) + + @classmethod + def from_yaml(cls, path: str, ctx: str | None = None) -> BaseConfig: + with open(openllm.utils.resolve_user_filepath(path, ctx=ctx), "rb") as f: + content = yaml.safe_load(f) + return cls(**content) + + def to_yaml(self): + return yaml.safe_dump(self.dict(), sort_keys=False) + + +class LLMConfig(BaseConfig): + @staticmethod + def generate_click_options(config: LLMConfig) -> t.Callable[[t.Callable[..., t.Any]], click.Command]: + klass = config.__class__ + group = optgroup.group(f"{klass.__name__} options", help=f"[Auto-generated from '{klass}']") + + def wrapper(f: t.Callable[..., t.Any]) -> click.Command: + for option in reversed(list(models_to_options(klass))): + f = option(f) + return group(f) + + return wrapper diff --git a/src/openllm/exceptions.py b/src/openllm/exceptions.py new file mode 100644 index 00000000..2a27e631 --- /dev/null +++ b/src/openllm/exceptions.py @@ -0,0 +1,31 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Base exceptions for OpenLLM. This extends BentoML exceptions. +""" +from __future__ import annotations + +import bentoml + + +class OpenLLMException(bentoml.exceptions.BentoMLException): + """Base class for all OpenLLM exceptions. This extends BentoMLException.""" + + +class ForbiddenAttributeError(OpenLLMException): + """Raised when using an _internal field.""" + + +class MissingDependencyError(BaseException): + """Raised when a dependency is missing.""" diff --git a/src/openllm/models/__init__.py b/src/openllm/models/__init__.py new file mode 100644 index 00000000..b32b98e0 --- /dev/null +++ b/src/openllm/models/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import auto as auto +from . import flan_t5 as flan_t5 diff --git a/src/openllm/models/auto/__init__.py b/src/openllm/models/auto/__init__.py new file mode 100644 index 00000000..b41f1f5f --- /dev/null +++ b/src/openllm/models/auto/__init__.py @@ -0,0 +1,141 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is derived from HuggingFace's AutoConfig, Tokenizer, AutoModel, etc.""" + +from __future__ import annotations + +import typing as t + +import openllm + +from ...utils import LazyModule +from ...utils import import_utils_shim as imports + +_import_structure = { + "configuration_auto": ["Config", "CONFIG_MAPPING", "CONFIG_MAPPING_NAMES"], + "tokenization_auto": ["Tokenizer", "TOKENIZER_MAPPING", "TOKENIZER_MAPPING_NAMES"], +} + +try: + if not imports.is_torch_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_auto"] = [ + "LLM", + "LLMWithTokenizer", + "MODEL_MAPPING_NAMES", + "MODEL_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_MAPPING", + "MODEL_WITH_TOKENIZER_MAPPING", + ] + +try: + if not imports.is_flax_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_flax_auto"] = [ + "FlaxLLM", + "FlaxLLMWithTokenizer", + "MODEL_FLAX_MAPPING_NAMES", + "MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_FLAX_MAPPING", + "MODEL_FLAX_WITH_TOKENIZER_MAPPING", + ] + +try: + if not imports.is_tf_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_tf_auto"] = [ + "TFLLM", + "TFLLMWithTokenizer", + "MODEL_TF_MAPPING_NAMES", + "MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES", + "MODEL_TF_MAPPING", + "MODEL_TF_WITH_TOKENIZER_MAPPING", + ] + +if t.TYPE_CHECKING: + from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING + from .configuration_auto import \ + CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES + from .configuration_auto import Config as Config + from .tokenization_auto import TOKENIZER_MAPPING as TOKENIZER_MAPPING + from .tokenization_auto import \ + TOKENIZER_MAPPING_NAMES as TOKENIZER_MAPPING_NAMES + from .tokenization_auto import Tokenizer as Tokenizer + + try: + if not imports.is_torch_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_auto import LLM as LLM + from .modeling_auto import MODEL_MAPPING as MODEL_MAPPING + from .modeling_auto import MODEL_MAPPING_NAMES as MODEL_MAPPING_NAMES + from .modeling_auto import \ + MODEL_WITH_TOKENIZER_MAPPING as MODEL_WITH_TOKENIZER_MAPPING + from .modeling_auto import \ + MODEL_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_WITH_TOKENIZER_MAPPING_NAMES + from .modeling_auto import LLMWithTokenizer as LLMWithTokenizer + + try: + if not imports.is_flax_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_flax_auto import \ + MODEL_FLAX_MAPPING as MODEL_FLAX_MAPPING + from .modeling_flax_auto import \ + MODEL_FLAX_MAPPING_NAMES as MODEL_FLAX_MAPPING_NAMES + from .modeling_flax_auto import \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING as \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING + from .modeling_flax_auto import \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES + from .modeling_flax_auto import FlaxLLM as FlaxLLM + from .modeling_flax_auto import \ + FlaxLLMWithTokenizer as FlaxLLMWithTokenizer + + try: + if not imports.is_tf_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_tf_auto import MODEL_TF_MAPPING as MODEL_TF_MAPPING + from .modeling_tf_auto import \ + MODEL_TF_MAPPING_NAMES as MODEL_TF_MAPPING_NAMES + from .modeling_tf_auto import \ + MODEL_TF_WITH_TOKENIZER_MAPPING as MODEL_TF_WITH_TOKENIZER_MAPPING + from .modeling_tf_auto import \ + MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES as \ + MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES + from .modeling_tf_auto import TFLLM as TFLLM + from .modeling_tf_auto import TFLLMWithTokenizer as TFLLMWithTokenizer +else: + import sys + + sys.modules[__name__] = LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) diff --git a/src/openllm/models/auto/configuration_auto.py b/src/openllm/models/auto/configuration_auto.py new file mode 100644 index 00000000..5e366137 --- /dev/null +++ b/src/openllm/models/auto/configuration_auto.py @@ -0,0 +1,88 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import types +import typing as t +from collections import OrderedDict + +import openllm + +# NOTE: This is the entrypoint when adding new model config +CONFIG_MAPPING_NAMES = OrderedDict([("flan_t5", "FlanT5Config")]) + + +class _LazyConfigMapping(OrderedDict[str, str]): + def __init__(self, mapping: OrderedDict[str, str]): + self._mapping = mapping + self._extra_content: dict[str, t.Any] = {} + self._modules: dict[str, types.ModuleType] = {} + + def __getitem__(self, key: str): + if key in self._extra_content: + return self._extra_content[key] + if key not in self._mapping: + raise KeyError(key) + value = self._mapping[key] + module_name = openllm.utils.kebab_to_snake_case(key) + if module_name not in self._modules: + self._modules[module_name] = openllm.utils.get_lazy_module(module_name) + if hasattr(self._modules[module_name], value): + return getattr(self._modules[module_name], value) + + # Some of the mappings have entries model_type -> config of another model type. In that case we try to grab the + # object at the top level. + return getattr(openllm, value) + + def keys(self): + return list(self._mapping.keys()) + list(self._extra_content.keys()) + + def values(self): + return [self[k] for k in self._mapping.keys()] + list(self._extra_content.values()) + + def items(self): + return [(k, self[k]) for k in self._mapping.keys()] + list(self._extra_content.items()) + + def __iter__(self): + return iter(list(self._mapping.keys()) + list(self._extra_content.keys())) + + def __contains__(self, item: t.Any): + return item in self._mapping or item in self._extra_content + + def register(self, key: str, value: t.Any): + """ + Register a new configuration in this mapping. + """ + if key in self._mapping.keys(): + raise ValueError(f"'{key}' is already used by a OpenLLM config, pick another name.") + self._extra_content[key] = value + + +CONFIG_MAPPING = _LazyConfigMapping(CONFIG_MAPPING_NAMES) + + +class Config: + def __init__(self, *args: t.Any, **kwargs: t.Any): + raise EnvironmentError("Cannot instantiate Config. Please use `Config.for_model(model_name)` instead.") + + @classmethod + def for_model(cls, model_name: str, *args: t.Any, **kwargs: t.Any): + model_name = openllm.utils.kebab_to_snake_case(model_name) + if model_name in CONFIG_MAPPING: + return CONFIG_MAPPING[model_name](*args, **kwargs) + raise ValueError( + f"Unrecognized configuration class for {model_name}. " + f"Model name should be one of {', '.join(CONFIG_MAPPING.keys())}." + ) diff --git a/src/openllm/models/auto/factory.py b/src/openllm/models/auto/factory.py new file mode 100644 index 00000000..29362af0 --- /dev/null +++ b/src/openllm/models/auto/factory.py @@ -0,0 +1,211 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import importlib +import types +import typing as t +from collections import OrderedDict + +import openllm + +from .configuration_auto import Config + +M = t.TypeVar("M") +K = t.TypeVar("K") +V = t.TypeVar("V") + + +def _get_runnable_class( + config: openllm.LLMConfig, runnable_mapping: _LazyAutoMapping[t.Any, t.Any] +) -> type[openllm.LLMRunnable[t.Any, t.Any]]: + supported_runnables = runnable_mapping[type(config)] + if not isinstance(supported_runnables, (list, tuple)): + return supported_runnables + return supported_runnables[0] + + +class _BaseAutoRunnerFactory(t.Generic[M]): + _model_mapping: _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[M, t.Any]]] + + def __init__(self, *args: t.Any, **kwargs: t.Any): + raise EnvironmentError( + f"Cannot instantiate {self.__class__.__name__} directly. \ + Please use '{self.__class__.__name__}.create_runner(model_name)' instead." + ) + + @classmethod + def create_runner(cls, model_name: str, pretrained_or_path: str | None = None, **kwargs: t.Any): + config = kwargs.pop("config", None) + + runner_kwargs_name = [ + "runner_name", + "models", + "max_batch_size", + "max_latency_ms", + "method_configs", + "embedded", + "import_model_kwargs", + "import_tokenizer_kwargs", + "import_config_kwargs", + ] + create_runner_kwargs = {k: kwargs.pop(k) for k in runner_kwargs_name if k in kwargs} + + if not isinstance(config, openllm.LLMConfig): + # The rest of kwargs is now passed to config + config = Config.for_model(model_name, **kwargs) + if type(config) in cls._model_mapping.keys(): + runnable_class = _get_runnable_class(config, cls._model_mapping) + if pretrained_or_path is None: + pretrained_or_path = openllm.utils.get_pretrained_env(runnable_class.start_model_name) + return runnable_class.create_runner( + pretrained_or_path=pretrained_or_path, config=config, **create_runner_kwargs + ) + raise ValueError( + f"Unrecognized configuration class {config.__class__} for this kind of AutoRunner: {cls.__name__}.\n" + f"Runnable type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." + ) + + @classmethod + def register(cls, config_class: type[openllm.LLMConfig], runnable_class: type[openllm.LLMRunnable[t.Any, t.Any]]): + """ + Register a new model for this class. + + Args: + config_class: The configuration corresponding to the model to register. + runnable_class: The runnable to register. + """ + if hasattr(runnable_class, "config_class") and runnable_class.config_class != config_class: + raise ValueError( + "The model class you are passing has a `config_class` attribute that is not consistent with the " + f"config class you passed (model has {runnable_class.config_class} and you passed {config_class}. Fix " + "one of those so they match!" + ) + cls._model_mapping.register(config_class, runnable_class) + + +def getattribute_from_module(module: types.ModuleType, attr: t.Any) -> t.Any: + if attr is None: + return None + if isinstance(attr, tuple): + return tuple(getattribute_from_module(module, a) for a in attr) + if hasattr(module, attr): + return getattr(module, attr) + # Some of the mappings have entries model_type -> object of another model type. In that case we try to grab the + # object at the top level. + openllm_module = importlib.import_module("openllm") + + if module != openllm_module: + try: + return getattribute_from_module(openllm_module, attr) + except ValueError: + raise ValueError(f"Could not find {attr} neither in {module} nor in {openllm_module}!") + else: + raise ValueError(f"Could not find {attr} in {openllm_module}!") + + +class _LazyAutoMapping(OrderedDict[K, V]): + """Based on transformers.models.auto.configuration_auto._LazyAutoMapping""" + + def __init__(self, config_mapping: OrderedDict[str, str], model_mapping: OrderedDict[str, str]): + self._config_mapping = config_mapping + self._reverse_config_mapping = {v: k for k, v in config_mapping.items()} + self._model_mapping = model_mapping + self._extra_content: dict[K, V] = {} + self._modules: dict[str, types.ModuleType] = {} + + def __len__(self): + common_keys = set(self._config_mapping.keys()).intersection(self._model_mapping.keys()) + return len(common_keys) + len(self._extra_content) + + def __getitem__(self, key: K) -> V: + if key in self._extra_content: + return self._extra_content[key] + model_type = self._reverse_config_mapping[key.__name__] + if model_type in self._model_mapping: + model_name = self._model_mapping[model_type] + return self._load_attr_from_module(model_type, model_name) + + # Maybe there was several model types associated with this config. + model_types = [k for k, v in self._config_mapping.items() if v == key.__name__] + for mtype in model_types: + if mtype in self._model_mapping: + model_name = self._model_mapping[mtype] + return self._load_attr_from_module(mtype, model_name) + raise KeyError(key) + + def _load_attr_from_module(self, model_type: str, attr: str) -> t.Any: + module_name = openllm.utils.kebab_to_snake_case(model_type) + if module_name not in self._modules: + self._modules[module_name] = openllm.utils.get_lazy_module(module_name) + return getattribute_from_module(self._modules[module_name], attr) + + def keys(self): + mapping_keys = [ + self._load_attr_from_module(key, name) + for key, name in self._config_mapping.items() + if key in self._model_mapping.keys() + ] + return mapping_keys + list(self._extra_content.keys()) + + def get(self, key: str, default: t.Any): + try: + return self.__getitem__(key) + except KeyError: + return default + + def __bool__(self): + return bool(self.keys()) + + def values(self): + mapping_values = [ + self._load_attr_from_module(key, name) + for key, name in self._model_mapping.items() + if key in self._config_mapping.keys() + ] + return mapping_values + list(self._extra_content.values()) + + def items(self): + mapping_items = [ + ( + self._load_attr_from_module(key, self._config_mapping[key]), + self._load_attr_from_module(key, self._model_mapping[key]), + ) + for key in self._model_mapping.keys() + if key in self._config_mapping.keys() + ] + return mapping_items + list(self._extra_content.items()) + + def __iter__(self): + return iter(self.keys()) + + def __contains__(self, item: t.Any): + if item in self._extra_content: + return True + if not hasattr(item, "__name__") or item.__name__ not in self._reverse_config_mapping: + return False + model_type = self._reverse_config_mapping[item.__name__] + return model_type in self._model_mapping + + def register(self, key: K, value: V): + """ + Register a new model in this mapping. + """ + if hasattr(key, "__name__") and key.__name__ in self._reverse_config_mapping: + model_type = self._reverse_config_mapping[key.__name__] + if model_type in self._model_mapping.keys(): + raise ValueError(f"'{key}' is already used by a OpenLLM model.") + + self._extra_content[key] = value diff --git a/src/openllm/models/auto/modeling_auto.py b/src/openllm/models/auto/modeling_auto.py new file mode 100644 index 00000000..a555630c --- /dev/null +++ b/src/openllm/models/auto/modeling_auto.py @@ -0,0 +1,43 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing as t +from collections import OrderedDict + +import openllm + +from .configuration_auto import CONFIG_MAPPING_NAMES +from .factory import _BaseAutoRunnerFactory, _LazyAutoMapping + +MODEL_MAPPING_NAMES = OrderedDict([("flan_t5", "FlanT5")]) + +MODEL_WITH_TOKENIZER_MAPPING_NAMES = OrderedDict([("flan_t5", "FlanT5WithTokenizer")]) + +MODEL_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_MAPPING_NAMES +) + +MODEL_WITH_TOKENIZER_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_WITH_TOKENIZER_MAPPING_NAMES +) + + +class LLM(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_MAPPING + + +class LLMWithTokenizer(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_WITH_TOKENIZER_MAPPING diff --git a/src/openllm/models/auto/modeling_flax_auto.py b/src/openllm/models/auto/modeling_flax_auto.py new file mode 100644 index 00000000..4f3fac7a --- /dev/null +++ b/src/openllm/models/auto/modeling_flax_auto.py @@ -0,0 +1,43 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing as t +from collections import OrderedDict + +import openllm + +from .configuration_auto import CONFIG_MAPPING_NAMES +from .factory import _BaseAutoRunnerFactory, _LazyAutoMapping + +MODEL_FLAX_MAPPING_NAMES = OrderedDict([("flan_t5", "FlaxFlanT5")]) + +MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES = OrderedDict([("flan_t5", "FlaxFlanT5WithTokenizer")]) + +MODEL_FLAX_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES +) + +MODEL_FLAX_WITH_TOKENIZER_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_FLAX_WITH_TOKENIZER_MAPPING_NAMES +) + + +class FlaxLLM(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_FLAX_MAPPING + + +class FlaxLLMWithTokenizer(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_FLAX_WITH_TOKENIZER_MAPPING diff --git a/src/openllm/models/auto/modeling_tf_auto.py b/src/openllm/models/auto/modeling_tf_auto.py new file mode 100644 index 00000000..14599f90 --- /dev/null +++ b/src/openllm/models/auto/modeling_tf_auto.py @@ -0,0 +1,43 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing as t +from collections import OrderedDict + +import openllm + +from .configuration_auto import CONFIG_MAPPING_NAMES +from .factory import _BaseAutoRunnerFactory, _LazyAutoMapping + +MODEL_TF_MAPPING_NAMES = OrderedDict([("flan_t5", "TFFlanT5")]) + +MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES = OrderedDict([("flan_t5", "FlaxFlanT5WithTokenizer")]) + +MODEL_TF_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES +) + +MODEL_TF_WITH_TOKENIZER_MAPPING = _LazyAutoMapping[type[openllm.LLMConfig], type[openllm.LLMRunnable[t.Any, t.Any]]]( + CONFIG_MAPPING_NAMES, MODEL_TF_WITH_TOKENIZER_MAPPING_NAMES +) + + +class TFLLM(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_TF_MAPPING + + +class TFLLMWithTokenizer(_BaseAutoRunnerFactory[type[openllm.LLMRunnable[t.Any, t.Any]]]): + _model_mapping = MODEL_TF_WITH_TOKENIZER_MAPPING diff --git a/src/openllm/models/auto/tokenization_auto.py b/src/openllm/models/auto/tokenization_auto.py new file mode 100644 index 00000000..7e3917f6 --- /dev/null +++ b/src/openllm/models/auto/tokenization_auto.py @@ -0,0 +1,46 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing as t +from collections import OrderedDict + +import openllm + +from .configuration_auto import _LazyConfigMapping + +TOKENIZER_MAPPING_NAMES = OrderedDict([("flan_t5", "FlanT5Tokenizer")]) + +TOKENIZER_MAPPING = _LazyConfigMapping(TOKENIZER_MAPPING_NAMES) + + +class Tokenizer: + def __init__(self): + raise EnvironmentError( + "This class should not be initialized directly. Instead use 'Tokenizer.create_runner' instead" + ) + + @classmethod + def create_runner(cls, model_name: str, pretrained_or_path: str | None = None, **kwargs: t.Any): + model_name = openllm.utils.kebab_to_snake_case(model_name) + if model_name in TOKENIZER_MAPPING: + tokenizer_class = TOKENIZER_MAPPING[model_name] + if pretrained_or_path is None: + pretrained_or_path = openllm.utils.get_pretrained_env(tokenizer_class.start_model_name) + return tokenizer_class(pretrained_or_path, **kwargs) + raise ValueError( + f"Unrecognized model {model_name} to build an Tokenizer.\n" + f"Model type should be one of {', '.join(TOKENIZER_MAPPING.keys())}." + ) diff --git a/src/openllm/models/flan_t5/__init__.py b/src/openllm/models/flan_t5/__init__.py new file mode 100644 index 00000000..eaebaaaf --- /dev/null +++ b/src/openllm/models/flan_t5/__init__.py @@ -0,0 +1,95 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing as t + +import openllm +from openllm.utils import import_utils_shim as imports + +_import_structure = { + "prompts_flan_t5": ["DEFAULT_PROMPT_TEMPLATE"], + "configuration_flan_t5": ["FlanT5Config", "START_FLAN_T5_COMMAND_DOCSTRING"], + "service_flan_t5": ["svc", "model_runner", "tokenizer_runner", "generate"], +} + +try: + if not imports.is_torch_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_flan_t5"] = ["FlanT5", "FlanT5WithTokenizer", "FlanT5Tokenizer"] + +try: + if not imports.is_flax_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_flax_flan_t5"] = ["FlaxFlanT5", "FlaxFlanT5WithTokenizer"] + +try: + if not imports.is_tf_available(): + raise openllm.exceptions.MissingDependencyError +except openllm.exceptions.MissingDependencyError: + pass +else: + _import_structure["modeling_flax_flan_t5"] = ["TFFlanT5", "TFFlanT5WithTokenizer"] + + +if t.TYPE_CHECKING: + from .configuration_flan_t5 import \ + START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING + from .configuration_flan_t5 import FlanT5Config as FlanT5Config + from .prompts_flan_t5 import \ + DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE + + try: + if not imports.is_torch_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_flan_t5 import FlanT5 as FlanT5 + from .modeling_flan_t5 import FlanT5Tokenizer as FlanT5Tokenizer + from .modeling_flan_t5 import \ + FlanT5WithTokenizer as FlanT5WithTokenizer + + try: + if not imports.is_flax_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_flax_flan_t5 import FlaxFlanT5 as FlaxFlanT5 + from .modeling_flax_flan_t5 import \ + FlaxFlanT5WithTokenizer as FlaxFlanT5WithTokenizer + + try: + if not imports.is_tf_available(): + raise openllm.exceptions.MissingDependencyError + except openllm.exceptions.MissingDependencyError: + pass + else: + from .modeling_tf_flan_t5 import TFFlanT5 as TFFlanT5 + from .modeling_tf_flan_t5 import \ + TFFlanT5WithTokenizer as TFFlanT5WithTokenizer +else: + import sys + + sys.modules[__name__] = openllm.utils.LazyModule( + __name__, globals()["__file__"], _import_structure, module_spec=__spec__ + ) diff --git a/src/openllm/models/flan_t5/configuration_flan_t5.py b/src/openllm/models/flan_t5/configuration_flan_t5.py new file mode 100644 index 00000000..b98ab793 --- /dev/null +++ b/src/openllm/models/flan_t5/configuration_flan_t5.py @@ -0,0 +1,52 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import pydantic + +from ...configuration_utils import LLMConfig + +START_FLAN_T5_COMMAND_DOCSTRING = """\ +Run a LLMServer for FLAN-T5 models . + +\b +> See more information about FLAN-T5 at [huggingface/transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5) + +\b +## Usage + +By default, this model will use the PyTorch model for inference. However, this model supports both Flax and Tensorflow. + +\b +- To use Flax, set the environment variable ``OPENLLM_FLAN_T5_FRAMEWORK="flax"`` + +\b +- To use Tensorflow, set the environment variable ``OPENLLM_FLAN_T5_FRAMEWORK="tf"`` + +\b +FLAN-T5 Runner will use google/flan-t5-large as the default model. To change any to any other FLAN-T5 +saved pretrained, or a fine-tune FLAN-T5, provide ``OPENLLM_FLAN_T5_PRETRAINED='google/flan-t5-xxl'`` +""" + + +class FlanT5Config(LLMConfig): + """Configuration for the FLAN-T5 model.""" + + temperature: float = pydantic.Field(0.75, ge=0.01, le=5, description="Determine how random generation should be.") + max_length: int = pydantic.Field( + 3000, ge=1, description="Maximum number of tokens to generate. A word is around 2-3 tokens." + ) + top_k: float = pydantic.Field(1, description="Total number of tokens to consider at each step.") + top_p: float = pydantic.Field(0.25, description="Total probability mass of tokens to consider at each step.") + repetition_penalty: float = pydantic.Field(1.2, description="Penalizes repeated tokens according to frequency.") diff --git a/src/openllm/models/flan_t5/modeling_flan_t5.py b/src/openllm/models/flan_t5/modeling_flan_t5.py new file mode 100644 index 00000000..c7229974 --- /dev/null +++ b/src/openllm/models/flan_t5/modeling_flan_t5.py @@ -0,0 +1,145 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import typing as t + +import openllm + +from ...runner_utils import (LLMRunnable, assign_start_model_name, + generate_tokenizer_runner) +from .configuration_flan_t5 import FlanT5Config + +if t.TYPE_CHECKING: + import bentoml + import torch + import transformers +else: + torch = openllm.utils.LazyLoader("torch", globals(), "torch") + transformers = openllm.utils.LazyLoader("transformers", globals(), "transformers") + bentoml = openllm.utils.LazyLoader("bentoml", globals(), "bentoml") + + +def import_model( + pretrained_or_path: str, + model_kwargs: dict[str, t.Any] | None = None, + tokenizer_kwargs: dict[str, t.Any] | None = None, + config_kwargs: dict[str, t.Any] | None = None, +) -> bentoml.Model: + """Import any PyTorch Flan-T5 pretrained models weight into the store. + + Args: + model_name: The name of the model to import. + model_kwargs: Additional kwargs to pass to the ``transformers.AutoModelForSeq2SeqLM`` constructors. + tokenizer_kwargs: Additional kwargs to pass to the ``transformers.AutoTokenizer`` constructors. + config_kwargs: Additional kwargs to pass to the ``transformers.AutoConfig`` constructors to determine the model tag. + + Returns: + a ``bentoml.Model`` instance. + """ + model_kwargs = model_kwargs or {} + tokenizer_kwargs = tokenizer_kwargs or {} + config_kwargs = config_kwargs or {} + + tag = openllm.utils.generate_tag_from_model_name(pretrained_or_path, **config_kwargs) + + try: + return bentoml.transformers.get(tag) + except bentoml.exceptions.NotFound: + model = transformers.AutoModelForSeq2SeqLM.from_pretrained(pretrained_or_path, **model_kwargs) + tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_or_path, **tokenizer_kwargs) + return bentoml.transformers.save_model(str(tag), model, custom_objects={"tokenizer": tokenizer}) + + +def _FlanT5Tokenizer( + pretrained_or_path: str | None = None, embedded: bool = True, **kwargs: t.Any +) -> openllm.types.TokenizerRunner: + """Get the runner for the tokenizer. + + Args: + model_name: The name of the FLAN-T5 model to import. + embedded: Whether to use the embedded runner or not. + **kwargs: Additional kwargs to pass to the ``transformers.AutoTokenizer`` constructors. + + Returns: + The runner for the tokenizer. + """ + if pretrained_or_path is None: + pretrained_or_path = FlanT5.default_model + + return generate_tokenizer_runner( + import_model(pretrained_or_path, **kwargs).custom_objects["tokenizer"], embedded=embedded + ) + + +FlanT5Tokenizer = assign_start_model_name("flan-t5")(_FlanT5Tokenizer) + + +class FlanT5( + LLMRunnable[transformers.T5ForConditionalGeneration, transformers.T5TokenizerFast], start_model_name="flan-t5" +): + default_model: str = "google/flan-t5-large" + config_class = FlanT5Config + + ATTACH_TOKENIZER = False + + _llm_config: FlanT5Config + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + @staticmethod + def pretrained_models() -> list[str]: + return [ + "google/flan-t5-small", + "google/flan-t5-base", + "google/flan-t5-large", + "google/flan-t5-xl", + "google/flan-t5-xxl", + ] + + def _generate( + self, + input_ids: torch.Tensor, + max_length: int | None = None, + do_sample: bool = True, + temperature: float | None = None, + top_k: float | None = None, + top_p: float | None = None, + repetition_penalty: float | None = None, + **kwargs: t.Any, + ) -> torch.Tensor: + return self.model.generate( + input_ids, + max_length=max_length if max_length is not None else self._llm_config.max_length, + do_sample=do_sample, + temperature=temperature if temperature is not None else self._llm_config.temperature, + top_k=top_k if top_k is not None else self._llm_config.top_k, + top_p=top_p if top_p is not None else self._llm_config.top_p, + repetition_penalty=repetition_penalty + if repetition_penalty is not None + else self._llm_config.repetition_penalty, + **kwargs, + ) + + +class FlanT5WithTokenizer(FlanT5, start_model_name="flan-t5"): + default_model: str = "google/flan-t5-large" + + ATTACH_TOKENIZER = True + + def _generate(self, prompt: str, **kwargs: t.Any) -> list[str]: + input_ids: torch.Tensor = self.tokenizer(prompt, return_tensors="pt").input_ids + input_ids = input_ids.to(self.device) + outputs = super()._generate(input_ids, **kwargs) + return self.tokenizer.batch_decode(outputs, skip_special_tokens=True) diff --git a/src/openllm/models/flan_t5/modeling_flax_flan_t5.py b/src/openllm/models/flan_t5/modeling_flax_flan_t5.py new file mode 100644 index 00000000..ddd10091 --- /dev/null +++ b/src/openllm/models/flan_t5/modeling_flax_flan_t5.py @@ -0,0 +1,119 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import typing as t + +import openllm + +from ...runner_utils import LLMRunnable +from .configuration_flan_t5 import FlanT5Config as FlanT5Config + +if t.TYPE_CHECKING: + import bentoml + import jax.numpy as jnp + import transformers + +else: + jnp = openllm.utils.LazyLoader("jnp", globals(), "jax.numpy") + bentoml = openllm.utils.LazyLoader("bentoml", globals(), "bentoml") + transformers = openllm.utils.LazyLoader("transformers", globals(), "transformers") + + +def import_model( + pretrained_or_path: str, + model_kwargs: dict[str, t.Any] | None = None, + tokenizer_kwargs: dict[str, t.Any] | None = None, + config_kwargs: dict[str, t.Any] | None = None, +) -> bentoml.Model: + """Import any Flax Flan-T5 pretrained models weight into the store. + + Args: + model_name: The name of the model to import. + model_kwargs: Additional kwargs to pass to the ``transformers.AutoModelForSeq2SeqLM`` constructors. + tokenizer_kwargs: Additional kwargs to pass to the ``transformers.AutoTokenizer`` constructors. + config_kwargs: Additional kwargs to pass to the ``transformers.AutoConfig`` constructors to determine the model tag. + + Returns: + a ``bentoml.Model`` instance. + """ + model_kwargs = model_kwargs or {} + tokenizer_kwargs = tokenizer_kwargs or {} + config_kwargs = config_kwargs or {} + + tag = openllm.utils.generate_tag_from_model_name(pretrained_or_path, prefix="flax", **config_kwargs) + try: + return bentoml.transformers.get(tag) + except bentoml.exceptions.NotFound: + model = transformers.FlaxT5ForConditionalGeneration.from_pretrained(pretrained_or_path, **model_kwargs) + tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_or_path, **tokenizer_kwargs) + return bentoml.transformers.save_model(str(tag), model, custom_objects={"tokenizer": tokenizer}) + + +class FlaxFlanT5( + LLMRunnable[transformers.FlaxT5ForConditionalGeneration, transformers.T5TokenizerFast], start_model_name="flan-t5" +): + default_model: str = "google/flan-t5-large" + config_class = FlanT5Config + + ATTACH_TOKENIZER = False + + _llm_config: FlanT5Config + + @staticmethod + def pretrained_models() -> list[str]: + return [ + "google/flan-t5-small", + "google/flan-t5-base", + "google/flan-t5-large", + "google/flan-t5-xl", + "google/flan-t5-xxl", + ] + + def _generate( + self, + input_ids: jnp.ndarray, + max_length: int | None = None, + do_sample: bool = True, + temperature: float | None = None, + top_k: float | None = None, + top_p: float | None = None, + repetition_penalty: float | None = None, + **kwargs: t.Any, + ) -> jnp.ndarray: + return self.model.generate( + input_ids, + max_length=max_length if max_length is not None else self._llm_config.max_length, + do_sample=do_sample, + temperature=temperature if temperature is not None else self._llm_config.temperature, + top_k=top_k if top_k is not None else self._llm_config.top_k, + top_p=top_p if top_p is not None else self._llm_config.top_p, + repetition_penalty=repetition_penalty + if repetition_penalty is not None + else self._llm_config.repetition_penalty, + **kwargs, + ) + + +class FlaxFlanT5WithTokenizer(FlaxFlanT5, start_model_name="flan-t5"): + default_model: str = "google/flan-t5-large" + + ATTACH_TOKENIZER = True + + def _generate(self, prompt: str, **kwargs: t.Any) -> list[str]: + input_ids = self.tokenizer(prompt, return_tensors="np")["input_ids"] + outputs = super()._generate(input_ids, **kwargs) + return self.tokenizer.batch_decode( + outputs.sequences, skip_special_tokens=True, clean_up_tokenization_spaces=True + ) diff --git a/src/openllm/models/flan_t5/modeling_tf_flan_t5.py b/src/openllm/models/flan_t5/modeling_tf_flan_t5.py new file mode 100644 index 00000000..9f489f7a --- /dev/null +++ b/src/openllm/models/flan_t5/modeling_tf_flan_t5.py @@ -0,0 +1,116 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import typing as t + +import openllm + +from ...runner_utils import LLMRunnable +from .configuration_flan_t5 import FlanT5Config + +if t.TYPE_CHECKING: + import bentoml + import tensorflow as tf + import transformers +else: + tf = openllm.utils.LazyLoader("tf", globals(), "tensorflow") + transformers = openllm.utils.LazyLoader("transformers", globals(), "transformers") + bentoml = openllm.utils.LazyLoader("bentoml", globals(), "bentoml") + + +def import_model( + pretrained_or_path: str, + model_kwargs: dict[str, t.Any] | None = None, + tokenizer_kwargs: dict[str, t.Any] | None = None, + config_kwargs: dict[str, t.Any] | None = None, +) -> bentoml.Model: + """Import any PyTorch Flan-T5 pretrained models weight into the store. + + Args: + model_name: The name of the model to import. + model_kwargs: Additional kwargs to pass to the ``transformers.AutoModelForSeq2SeqLM`` constructors. + tokenizer_kwargs: Additional kwargs to pass to the ``transformers.AutoTokenizer`` constructors. + config_kwargs: Additional kwargs to pass to the ``transformers.AutoConfig`` constructors to determine the model tag. + + Returns: + a ``bentoml.Model`` instance. + """ + model_kwargs = model_kwargs or {} + tokenizer_kwargs = tokenizer_kwargs or {} + config_kwargs = config_kwargs or {} + + tag = openllm.utils.generate_tag_from_model_name(pretrained_or_path, prefix="tf", **config_kwargs) + try: + return bentoml.transformers.get(tag) + except bentoml.exceptions.NotFound: + model = transformers.TFT5ForConditionalGeneration.from_pretrained(pretrained_or_path, **model_kwargs) + tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_or_path, **tokenizer_kwargs) + return bentoml.transformers.save_model(str(tag), model, custom_objects={"tokenizer": tokenizer}) + + +class TFFlanT5( + LLMRunnable[transformers.TFT5ForConditionalGeneration, transformers.T5TokenizerFast], start_model_name="flan-t5" +): + default_model: str = "google/flan-t5-large" + config_class = FlanT5Config + + ATTACH_TOKENIZER = False + + _llm_config: FlanT5Config + + @staticmethod + def pretrained_models() -> list[str]: + return [ + "google/flan-t5-small", + "google/flan-t5-base", + "google/flan-t5-large", + "google/flan-t5-xl", + "google/flan-t5-xxl", + ] + + def _generate( + self, + input_ids: tf.Tensor, + max_length: int | None = None, + do_sample: bool = True, + temperature: float | None = None, + top_k: float | None = None, + top_p: float | None = None, + repetition_penalty: float | None = None, + **kwargs: t.Any, + ) -> tf.Tensor: + return self.model.generate( + input_ids, + max_length=max_length if max_length is not None else self._llm_config.max_length, + do_sample=do_sample, + temperature=temperature if temperature is not None else self._llm_config.temperature, + top_k=top_k if top_k is not None else self._llm_config.top_k, + top_p=top_p if top_p is not None else self._llm_config.top_p, + repetition_penalty=repetition_penalty + if repetition_penalty is not None + else self._llm_config.repetition_penalty, + **kwargs, + ) + + +class TFFlanT5WithTokenizer(TFFlanT5, start_model_name="flan-t5"): + default_model: str = "google/flan-t5-large" + + ATTACH_TOKENIZER = True + + def _generate(self, prompt: str, **kwargs: t.Any) -> list[str]: + input_ids: tf.Tensor = self.tokenizer(prompt, return_tensors="tf").input_ids + outputs = super()._generate(input_ids, **kwargs) + return self.tokenizer.batch_decode(outputs, skip_special_tokens=True) diff --git a/src/openllm/models/flan_t5/prompts_flan_t5.py b/src/openllm/models/flan_t5/prompts_flan_t5.py new file mode 100644 index 00000000..90193956 --- /dev/null +++ b/src/openllm/models/flan_t5/prompts_flan_t5.py @@ -0,0 +1,21 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DEFAULT_PROMPT_TEMPLATE = """\ +Please use the following piece of context to answer the question at the end. + +{context} + +Question: {question} +Answer:""" diff --git a/src/openllm/models/flan_t5/service_flan_t5.py b/src/openllm/models/flan_t5/service_flan_t5.py new file mode 100644 index 00000000..3e4d66c7 --- /dev/null +++ b/src/openllm/models/flan_t5/service_flan_t5.py @@ -0,0 +1,59 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import bentoml + +import openllm + +framework = openllm.utils.get_framework_env("flan-t5") +if framework == "flax": + klass = openllm.FlaxLLM +elif framework == "pt": + klass = openllm.LLM +elif framework == "tf": + klass = openllm.TFLLM +else: + raise ValueError(f"Invalid framework {framework}") + +model_runner = klass.create_runner("flan-t5") +tokenizer_runner = openllm.Tokenizer.create_runner("flan-t5") + +svc = bentoml.Service(name=openllm.utils.generate_service_name(model_runner), runners=[model_runner, tokenizer_runner]) + + +@svc.api(input=openllm.Prompt(default="flan-t5"), output=openllm.Prompt(default="flan-t5")) +async def generate(qa: openllm.schema.PromptInput) -> openllm.schema.PromptOutput: + """Returns the generated text from given prompts.""" + llm_config = model_runner.llm_config.dict() + + return_tensors = "np" if framework == "flax" else framework + input_tensor = await tokenizer_runner.async_run(qa.prompt, return_tensors=return_tensors) + if framework == "flax": + outputs = await model_runner.generate.async_run(input_tensor["input_ids"], **llm_config) + responses = await tokenizer_runner.batch_decode.async_run( + outputs.sequences, skip_special_tokens=True, clean_up_tokenization_spaces=True + ) + else: + outputs = await model_runner.generate.async_run(input_tensor.input_ids, **llm_config) + responses = await tokenizer_runner.batch_decode.async_run(outputs, skip_special_tokens=True) + return openllm.schema.PromptOutput(responses=responses, configuration=llm_config) + + +@svc.api(input=bentoml.io.JSON(pydantic_model=openllm.FlanT5Config), output=bentoml.io.JSON()) +def update_llm_config(llm_config: openllm.FlanT5Config) -> str: + """Update the LLM configuration.""" + object.__setattr__(model_runner, "llm_config", llm_config) + return model_runner.llm_config.dict() diff --git a/src/openllm/prompts/__init__.py b/src/openllm/prompts/__init__.py new file mode 100644 index 00000000..1e41c2fc --- /dev/null +++ b/src/openllm/prompts/__init__.py @@ -0,0 +1,26 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Prompt interface for OpenLLM. + +This module exposes the `Prompt` descriptor, which is used to create prompts as a ``bentoml.io.IODescriptor``. +This ``Prompt`` can also be used to interact with the client and provide similar interface to ``langchain.PromptTemplate``. + +Example: + >>> from openllm.prompts import Prompt + >>> prompt = Prompt.from_template("Use the following as context: {context}!") +""" + +from .descriptors import Prompt as Prompt +from .formatter import default_formatter as default_formatter diff --git a/src/openllm/prompts/descriptors.py b/src/openllm/prompts/descriptors.py new file mode 100644 index 00000000..f01bbc39 --- /dev/null +++ b/src/openllm/prompts/descriptors.py @@ -0,0 +1,286 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Descriptor definition for OpenLLM. ``Prompt`` in hindsight extends the functionality of ``bentoml.io.JSON``. + +However, it uses ``orjson`` instead of ``json`` for faster serialization and deserialization. + +One distinct difference is that ``Prompt`` can only be initialized with ``Prompt.from_template``. + +Example: + >>> from openllm.prompts import Prompt + >>> prompt = Prompt.from_template("Use the following as context: {context}!") + >>> prompt = Prompt("Use the following as context: {context}\nQuestion: {question}\nAnswer:") + +It also adds a ``from_http`` and ``to_http`` method which wraps around ``from_http_request`` and ``to_http_response``. + +TODO: +- Add support for ``langchain`` PromptTemplate under ``template``. + +Prompt can also be set via ``OPENLLM_PROMPT_TEMPLATE`` environment variable. +""" +from __future__ import annotations + +import logging +import os +import typing as t + +import bentoml +import orjson +import pydantic +import pydantic.schema +from bentoml._internal.io_descriptors.json import parse_dict_to_proto +from bentoml._internal.utils.http import set_cookies +from starlette.requests import Request +from starlette.responses import Response + +import openllm + +if t.TYPE_CHECKING: + from google.protobuf import struct_pb2 + + from openllm.types import OpenAPIResponse +else: + struct_pb2 = openllm.utils.LazyLoader("struct_pb2", globals(), "google.protobuf.struct_pb2") + +logger = logging.getLogger(__name__) + + +class Prompt( + bentoml.io.IODescriptor[pydantic.BaseModel], + descriptor_id="openllm.prompts.descriptors.Prompt", + proto_fields=("json",), +): + _mime_type = "application/json" + + # TODO: support langchain PromptTemplate under template + prompt_template: openllm.schema.PromptTemplate + + input_schema = openllm.schema.PromptInput + output_schema = openllm.schema.PromptOutput + + def __setattr__(self, attr_name: str, value: t.Any) -> None: + if attr_name in ("input_schema", "output_schema"): + raise openllm.exceptions.OpenLLMException(f"{attr_name} is frozen as custom schema is not yet supported.") + super().__setattr__(attr_name, value) + + def __init__( + self, + template: openllm.schema.PromptTemplate | openllm.schema.PromptInput | str | None = None, + default: str | None = None, + ) -> None: + if default: + default = openllm.utils.kebab_to_snake_case(default) + template_from_env = os.environ.get("OPENLLM_PROMPT_TEMPLATE", None) + if template is None and template_from_env is None: + if default: + if default not in openllm.CONFIG_MAPPING.keys(): + raise ValueError( + "Invalid default prompt template. Please choose from: " + + ", ".join(openllm.CONFIG_MAPPING.keys()) + ) + self.prompt_template = openllm.schema.PromptTemplate.from_default(default) + return + else: + raise ValueError( + "Prompt template is not set. Please set it via 'OPENLLM_PROMPT_TEMPLATE' environment variable or pass it to 'Prompt'." + ) + + # This logic ensure that we will always respect the template from env if set, even if `template` is passed + template = template_from_env if template_from_env is not None else template + + if isinstance(template, str): + self.prompt_template = openllm.schema.PromptTemplate.from_template(template) + elif isinstance(template, openllm.schema.PromptInput): + self.prompt_template = openllm.schema.PromptTemplate.from_template(template.prompt) + elif isinstance(template, openllm.schema.PromptTemplate): + self.prompt_template = template + else: + raise openllm.exceptions.OpenLLMException( + f"Invalid sample type: {type(template)}. Sample must be one of: {openllm.schema.PromptInput}, {openllm.schema.PromptTemplate} or str." + ) + + @classmethod + def from_template( + cls, + template: openllm.schema.PromptTemplate | openllm.schema.PromptInput | str | None = None, + default: str | None = None, + ) -> Prompt: + return cls(template=template, default=default) + + @property + def template(self) -> str: + assert self.prompt_template, "Make sure to initialize Prompt with 'from_template' first." + return self.prompt_template.template + + @property + def input_variables(self) -> t.Sequence[str]: + assert self.prompt_template, "Make sure to initialize Prompt with 'from_template' first." + return self.prompt_template.input_variables + + async def from_http(self, request: Request) -> openllm.schema.PromptInput: + json_str = await request.body() + try: + parsed = orjson.loads(json_str) + except orjson.JSONDecodeError as e: + raise bentoml.exceptions.InvalidArgument(f"Invalid JSON: {e}") + + if "prompt" in parsed: + # In this branch, user pass in a full prompt. + # We need to parse it and extract the input variables. + prompt = parsed.pop("prompt") + prompt_template = openllm.schema.PromptTemplate.from_template(prompt) + return self.input_schema(prompt=prompt_template.to_str(**parsed), inputs=parsed) + # In this branch, users only pass in the input variables for set prompt + # (either default or set via environment variables) + return self.input_schema(prompt=self.prompt_template.to_str(**parsed), inputs=parsed) + + async def from_proto(self, field: struct_pb2.Value | bytes) -> openllm.schema.PromptInput: + from google.protobuf.json_format import MessageToDict + + if isinstance(field, bytes): + content = field + try: + parsed = orjson.loads(content) + except orjson.JSONDecodeError as e: + raise bentoml.exceptions.BadInput(f"Invalid JSON: {e}") + else: + assert isinstance(field, struct_pb2.Value) + parsed = MessageToDict(field, preserving_proto_field_name=True) + + if "prompt" in parsed: + # In this branch, user pass in a full prompt. + # We need to parse it and extract the input variables. + prompt = parsed.pop("prompt") + prompt_template = openllm.schema.PromptTemplate.from_template(prompt) + return self.input_schema(prompt=prompt_template.to_str(**parsed), inputs=parsed) + # In this branch, users only pass in the input variables for set prompt + # (either default or set via environment variables) + return self.input_schema(prompt=self.prompt_template.to_str(**parsed), inputs=parsed) + + async def to_http(self, obj: pydantic.BaseModel, ctx: bentoml.Context | None = None): + if not isinstance(obj, openllm.schema.PromptOutput): + raise bentoml.exceptions.BadInput(f"Expected output of type {openllm.schema.PromptOutput}, got {type(obj)}") + logger.debug("Converting '%s' to '%s'", obj, self.output_schema) + + json_str = orjson.dumps(self.output_schema(**obj.dict()).dict()) + if ctx is not None: + res = Response( + json_str, + media_type=self._mime_type, + headers=ctx.response.metadata, + status_code=ctx.response.status_code, + ) + set_cookies(res, ctx.response.cookies) + else: + res = Response(json_str, media_type=self._mime_type) + + return res + + async def to_proto(self, obj: pydantic.BaseModel) -> struct_pb2.Value: + if not isinstance(obj, openllm.schema.PromptOutput): + raise bentoml.exceptions.BadInput(f"Expected output of type {openllm.schema.PromptOutput}, got {type(obj)}") + logger.debug("Converting '%s' to '%s'", obj, self.output_schema) + + json_ = self.output_schema(**obj.dict()).dict() + + msg = struct_pb2.Value() + return parse_dict_to_proto(json_, msg) + + # NOTE: OpenAPI specification for Prompt to be a bentoml.io.IODescriptor + def input_type(self) -> type[pydantic.BaseModel]: + return pydantic.BaseModel + + def to_spec(self) -> dict[str, t.Any]: + return { + "id": self.descriptor_id, + "args": {"template": self.prompt_template.template}, + } + + @classmethod + def from_spec(cls, spec: dict[str, t.Any]) -> t.Self: + if "args" not in spec: + raise bentoml.exceptions.InvalidArgument(f"Missing args key in JSON spec: {spec}") + return cls.from_template(**spec["args"]) + + def openapi_schema(self) -> t.Any: + # NOTE: not used + return {"type": "object"} + + @property + def _model_name_map(self) -> dict[pydantic.schema.TypeModelOrEnum, str]: + return pydantic.schema.get_model_name_map( + pydantic.schema.get_flat_models_from_models({self.input_schema, self.output_schema}) + ) + + def openapi_input_schema(self) -> dict[str, t.Any]: + return pydantic.schema.model_process_schema( + self.input_schema, model_name_map=self._model_name_map, ref_prefix="#/components/schemas/" + )[0] + + def openapi_output_schema(self) -> dict[str, t.Any]: + return pydantic.schema.model_process_schema( + self.output_schema, model_name_map=self._model_name_map, ref_prefix="#/components/schemas/" + )[0] + + def openapi_components(self) -> dict[str, t.Any] | None: + # TODO: Support custom input and output schema in BentoML OpenAPI components parsing. + return + + def openapi_example(self) -> dict[str, t.Any]: + return {k: "" for k in self.input_variables} + + def openapi_input_example(self) -> dict[str, t.Any]: + return self.openapi_example() + + def openapi_output_example(self) -> dict[str, t.Any]: + return {} + + def openapi_request_body(self) -> dict[str, t.Any]: + return { + "content": { + self._mime_type: { + "schema": self.openapi_input_schema(), + "example": self.openapi_input_example(), + } + }, + "required": True, + "x-bentoml-io-descriptor": self.to_spec(), + } + + def openapi_responses(self) -> OpenAPIResponse: + return { + "description": "Prompt received successfully!", + "content": { + self._mime_type: { + "schema": self.openapi_output_schema(), + "example": self.openapi_output_example(), # TODO: Support output example + } + }, + "x-bentoml-io-descriptor": self.to_spec(), + } + + # NOTE: The below override default the loose contract from bentoml.io.IODescriptor + @classmethod + def from_sample(cls, sample: openllm.schema.PromptInput | str) -> Prompt: + return cls.from_template(sample) + + def _from_sample(self, sample: t.Any) -> pydantic.BaseModel: + return sample + + async def from_http_request(self, request: Request) -> openllm.schema.PromptInput: + return await self.from_http(request) + + async def to_http_response(self, obj: pydantic.BaseModel, ctx: bentoml.Context | None = None): + return await self.to_http(obj, ctx) diff --git a/src/openllm/prompts/formatter.py b/src/openllm/prompts/formatter.py new file mode 100644 index 00000000..1a4a5919 --- /dev/null +++ b/src/openllm/prompts/formatter.py @@ -0,0 +1,47 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Formatter for prompts. Currently, it is a strict wrapper from string.Formatter that process f-string. + +Inspired by langchain.formatting + +TODO: +- Support jinja2 template, go template and possible other prompt template engine. +""" +from __future__ import annotations + +import string +import typing as t + + +class PromptFormatter(string.Formatter): + def vformat(self, format_string: str, args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]) -> str: + if len(args) > 0: + raise ValueError("Positional arguments are not supported") + return super().vformat(format_string, args, kwargs) + + def check_unused_args( + self, used_args: set[int | str], args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any] + ) -> None: + """Check if extra params is passed.""" + extras = set(kwargs).difference(used_args) + if extras: + raise KeyError(f"Extra params passed: {extras}") + + def extract_template_variables(self, template: str) -> t.Sequence[str]: + """Extract template variables from a template string.""" + return [field[1] for field in self.parse(template) if field[1] is not None] + + +default_formatter = PromptFormatter() diff --git a/src/openllm/py.typed b/src/openllm/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/openllm/runner_utils.py b/src/openllm/runner_utils.py new file mode 100644 index 00000000..dd8cd560 --- /dev/null +++ b/src/openllm/runner_utils.py @@ -0,0 +1,381 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +OpenLLM Runnable interface. This define a loose contract for LLMRunnable, which can then be implemented per any model. + +LLMRunnable also follow a loose API definition from LangChain's LLM, therefore it can also be used in conjunction with LangChain. +""" +from __future__ import annotations + +import logging +import typing as t +from abc import ABC, abstractmethod + +import attr +import bentoml + +import openllm + +from .configuration_utils import LLMConfig, ModelSignature +from .utils import _object_setattr + +if t.TYPE_CHECKING: + import transformers + + from openllm.types import (InferenceConfig, LLMModuleType, + ModelSignatureDict, ModelSignaturesType, + TokenizerRunner) + +else: + InferenceConfig = ModelSignatureDict = dict + + transformers = openllm.utils.LazyLoader("transformers", globals(), "transformers") + +logger = logging.getLogger(__name__) + +M = t.TypeVar("M") +T = t.TypeVar("T") + + +def assign_start_model_name(start_model_name: str) -> t.Callable[..., t.Any]: + def wrapper(fn: t.Callable[..., t.Any]) -> t.Callable[..., t.Any]: + _object_setattr(fn, "start_model_name", start_model_name) + return fn + + return wrapper + + +def generate_tokenizer_runner( + tokenizer: transformers.PreTrainedTokenizer + | transformers.PreTrainedTokenizerBase + | transformers.PreTrainedTokenizerFast, + embedded: bool = False, +) -> TokenizerRunner: + """Generate a runner from any given transformers.AutoTokenizer. + + Args: + tokenizer: The tokenizer to generate the runner from. + """ + + # NOTE: I'm going to maintain this function from bentoml side, so internal imports here. + from bentoml._internal.frameworks.transformers import \ + make_default_signatures + + signatures: ModelSignaturesType = make_default_signatures(tokenizer) + + def __init_runnable__(self: bentoml.Runnable): + # keep a reference object to the tokenizer + self._tokenizer = tokenizer + + self._predict_fns = {} + for method_name in signatures: + self._predict_fns[method_name] = getattr(tokenizer, method_name) + + def add_method(cls: type[bentoml.Runnable], method_name: str, options: ModelSignature): + def fn(self: bentoml.Runnable, *args: t.Any, **kwargs: t.Any) -> t.Any: + try: + return self._predict_fns[method_name](*args, **kwargs) + except KeyError: + raise bentoml.exceptions.BadInput(f"Method {method_name} is not supported by this tokenizer.") + + cls.add_method( + fn, + method_name, + batchable=options.batchable, + batch_dim=options.batch_dim, + input_spec=options.input_spec, + output_spec=options.output_spec, + ) + + RunnableCls: type[bentoml.Runnable] = type( + f"{tokenizer.__class__.__qualname__}Runnable", + (bentoml.Runnable,), + { + "SUPPORTED_RESOURCES": ("cpu",), + "SUPPORTS_CPU_MULTI_THREADING": True, + "__init__": __init_runnable__, + }, + ) + + for method_name, options in signatures.items(): + add_method(RunnableCls, method_name, options) + + return t.cast( + "TokenizerRunner", + bentoml.Runner(RunnableCls, name=f"{tokenizer.__class__.__qualname__.lower()}-runner", embedded=embedded), + ) + + +class BaseLLMRunnable(bentoml.Runnable, ABC): + SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu") + SUPPORTS_CPU_MULTI_THREADING = True + + # NOTE: A loose contract for LLMRunnable implementations. + ATTACH_TOKENIZER: bool = True + """This boolean determine whether to attach a tokenizer runner to this runnable or not. + By default, it is True for _generate.""" + + start_model_name: str + """The default model to use when using ``openllm start ``.""" + + default_model: str | None = None + """Return the default model to use when using ``openllm start ``. + This could be one of the keys in self.pretrained_models() or custom users model.""" + + inference_config: InferenceConfig = InferenceConfig(generate=ModelSignatureDict(batchable=False)) + """The inference config for the two endpoints of this model. + This is the equivalent of args that is passed into ``bentoml.Runnable.method``. + """ + + config_class: type[LLMConfig] = LLMConfig + """The config class for any given LLMRunnable implementation.""" + + @staticmethod + @abstractmethod + def pretrained_models() -> list[str]: + """A list of supported pretrainede models tag for this given runnable. + + For example: + For FLAN-T5 impl, this would be ["google/flan-t5-small", "google/flan-t5-base", + "google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"] + """ + + raise NotImplementedError + + @abstractmethod + def _generate(self, *args: t.Any, **kwargs: t.Any) -> t.Any: + """This method should be implemented to provide a generation interface for the given model.""" + raise NotImplementedError + + +# TODO: Add support for model validation +class LLMRunnable(BaseLLMRunnable, t.Generic[M, T]): + # The section below defines a loose contract with langchain's LLM interface. + @property + def _llm_type(self) -> str: + assert self.default_model is not None + return openllm.utils.convert_transformers_model_name(self.default_model) + + @property + def _identifying_params(self) -> dict[str, t.Any]: + return { + "configuration": self._llm_config.dict(), + "inference_config": self.inference_config, + "pretrained": self.pretrained_models(), + } + + # XXX: INTERNAL + _module: LLMModuleType + _model: M | None = None + _tokenizer: T | None = None + + def __setattr__(self, attr_name: str, value: t.Any) -> None: + if attr_name in ("ATTACH_TOKENIZER",): + raise openllm.exceptions.OpenLLMException( + f"{attr_name} should not be set at runtime, as it determines how the runner is created. \ + Please create a separate Runnable that extends '{self}' instead." + ) + super().__setattr__(attr_name, value) + + def __init_subclass__(cls, *, start_model_name: str, module: str | None = None): + cls.start_model_name = start_model_name + + if module is None: + module = cls.__module__ + cls._module = t.cast("LLMModuleType", openllm.utils.LazyLoader(module, globals(), module)) + + def __init__( + self, + /, + *, + model_name: str | None = None, + _bentomodel: bentoml.Model | None = None, + _llm_config: LLMConfig | None = None, + _internal: bool = False, + _dummy: bool = False, + **kwargs: t.Any, + ): + self._dummy = _dummy + if _dummy: + if not _internal: + raise openllm.exceptions.ForbiddenAttributeError( + "'_dummy' should only be used for internal, not public facing." + ) + return + + if _bentomodel is not None: + if not _internal: + raise openllm.exceptions.ForbiddenAttributeError( + "'_bentomodel' should only be used for internal, not public facing." + ) + self._bentomodel = _bentomodel + else: + import_model_kwargs = kwargs.pop("import_model_kwargs", {}) + import_tokenizer_kwargs = kwargs.pop("import_tokenizer_kwargs", {}) + import_config_kwargs = kwargs.pop("import_config_kwargs", {}) + + assert self.default_model is not None + model_name = self.default_model if model_name is None else model_name + + self._bentomodel = self._module.import_model( + model_name, + model_kwargs=import_model_kwargs, + tokenizer_kwargs=import_tokenizer_kwargs, + config_kwargs=import_config_kwargs, + ) + + if _llm_config is not None: + if not _internal: + raise openllm.exceptions.ForbiddenAttributeError( + "'_llm_config' should only be used for internal, not public facing." + ) + self._llm_config = _llm_config + else: + assert self.config_class is not None, "'config_class' is required." + self._llm_config = self.config_class(**kwargs) + + def __getattribute__(self, item: t.Any): + is_dummy = object.__getattribute__(self, "_dummy") + if is_dummy and item in ("model", "tokenizer", "create_runner", "bentoml_runnable_methods__", "add_method"): + logger.warning(f"Accessing '{item}' on dummy object. Returning None.") + return + return super().__getattribute__(item) + + @classmethod + def dummy_object(cls) -> LLMRunnable[M, T]: + return cls(_dummy=True, _internal=True) + + @property + def model(self) -> M: + # NOTE: should we have support for nested runner here? + if self._model is None: + self._model = self._bentomodel.load_model() + return self._model + + @property + def tokenizer(self) -> T: + # This is the runner generated from the bento model. This can + # then be used for implementation of _generate. + if self._tokenizer is None: + if not self.ATTACH_TOKENIZER: + raise openllm.exceptions.OpenLLMException( + "Tokenizer runner is not attached. Please set ATTACH_TOKENIZER=True when creating this runnable." + ) + self._tokenizer = self._bentomodel.custom_objects["tokenizer"] + return self._tokenizer + + @classmethod + def create_runner( + cls, + pretrained_or_path: str | None = None, + config: LLMConfig | None = None, + runner_name: str | None = None, + models: list[bentoml.Model] | None = None, + max_batch_size: int | None = None, + max_latency_ms: int | None = None, + method_configs: ModelSignaturesType | None = None, + embedded: bool = False, + import_model_kwargs: dict[str, t.Any] | None = None, + import_tokenizer_kwargs: dict[str, t.Any] | None = None, + import_config_kwargs: dict[str, t.Any] | None = None, + **kwargs: t.Any, + ) -> LLMRunner: + """Convert this LLMRunnable to a bentoml.Runner. + + Args: + model_name: The name of the model to generate the runner from. + runner_name: The name of the runner to generate. Optional as this will be generated based on the model_name. + models: Any additional ``bentoml.Model`` to be included in this given models. By default, this will be determined from the model_name. + max_batch_size: The maximum batch size for the runner. + max_latency_ms: The maximum latency for the runner. + method_configs: The method configs for the runner. + embedded: Whether to run this runner in embedded mode. + import_model_kwargs: To pass to model_kwargs in ``import_model``. + import_tokenizer_kwargs: To pass to tokenizer_kwargs in ``import_model``. + import_config_kwargs: To pass to config_kwargs in ``import_model``. + The rest of the kwargs will then be passed into ``self.config_class`` (which is of type ``LLMConfig``). + + Returns: + A bentoml.Runner instance. + """ + if pretrained_or_path is None: + if cls.default_model is None: + raise openllm.exceptions.OpenLLMException( + "A default model is required for any LLMRunnable. Make sure to specify a default_model or pass in a model_name." + ) + pretrained_or_path = cls.default_model + if pretrained_or_path not in cls.pretrained_models(): + logger.debug("Creating runner for custom model '%s'", cls.default_model) + + models = models or [] + bentomodel = cls._module.import_model( + pretrained_or_path, + model_kwargs=import_model_kwargs, + tokenizer_kwargs=import_tokenizer_kwargs, + config_kwargs=import_config_kwargs, + ) + models.append(bentomodel) + + if runner_name is None: + runner_name = f"llm-{cls.start_model_name}-runner" + + _bento_runnable_methods_map = {"generate": cls._generate} + + for method_name, method_config in cls.inference_config.items(): + signature = ModelSignature.from_dict(t.cast(ModelSignatureDict, method_config)) + cls.add_method( + _bento_runnable_methods_map[method_name], + method_name, + batchable=signature.batchable, + batch_dim=signature.batch_dim, + input_spec=signature.input_spec, + output_spec=signature.output_spec, + ) + + # The rest of the kwargs can be then pass to LLMConfig + if config is not None: + llm_config = config.with_options(**kwargs) + else: + llm_config = cls.config_class(**kwargs) + + return LLMRunner( + cls, + llm_config=llm_config, + runnable_init_params={ + "model_name": pretrained_or_path, + "_bentomodel": bentomodel, + "_llm_config": llm_config, + "_internal": True, + }, + name=runner_name, + models=models, + max_batch_size=max_batch_size, + max_latency_ms=max_latency_ms, + method_configs=method_configs, + embedded=embedded, + ) + + +class LLMRunner(bentoml.Runner): + llm_config: LLMConfig = attr.field(factory=lambda: LLMConfig()) + + def __init__( + self, + runnable_class: type[LLMRunnable[t.Any, t.Any]], + llm_config: LLMConfig, + **kwargs: t.Any, + ): + super().__init__(runnable_class, **kwargs) + # A hack around frozen attributes. + _object_setattr(self, "llm_config", llm_config) diff --git a/src/openllm/schema.py b/src/openllm/schema.py new file mode 100644 index 00000000..57ade728 --- /dev/null +++ b/src/openllm/schema.py @@ -0,0 +1,73 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Schema definition for OpenLLM. This can be use for client interaction. +""" +from __future__ import annotations + +import typing as t +from abc import ABC + +import pydantic + +import openllm + + +class PromptTemplate(pydantic.BaseModel): + template: str + input_variables: t.Sequence[str] + + class Config: + extra = "forbid" + + def to_str(self, **kwargs: str) -> str: + """Generate a prompt from the template and input variables""" + if not kwargs: + raise ValueError("Keyword arguments are required") + if not all(k in kwargs for k in self.input_variables): + raise ValueError(f"Missing required input variables: {self.input_variables}") + return openllm.prompts.default_formatter.format(self.template, **kwargs) + + @classmethod + def from_template(cls, template: str) -> PromptTemplate: + input_variables = openllm.prompts.default_formatter.extract_template_variables(template) + return cls(template=template, input_variables=input_variables) + + @classmethod + def from_default(cls, model: str) -> PromptTemplate: + template = getattr(openllm.utils.get_lazy_module(model), "DEFAULT_PROMPT_TEMPLATE") + if template is None: + raise ValueError(f"Model {model} does not have a default prompt template.") + return cls.from_template(template) + + +class BaseIO(pydantic.BaseModel, ABC): + class Config: + extra = "forbid" + + +class PromptInput(BaseIO): + prompt: str + """The prompt to be sent to system.""" + + inputs: t.Dict[str, str] + """A mapping of given inputs value. Should be use for cache.""" + + +class PromptOutput(BaseIO): + responses: t.List[str] + """A list of responses from the system.""" + + configuration: t.Dict[str, t.Any] + """A mapping of configuration values for given system.""" diff --git a/src/openllm/server_utils.py b/src/openllm/server_utils.py new file mode 100644 index 00000000..183ce407 --- /dev/null +++ b/src/openllm/server_utils.py @@ -0,0 +1,85 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Server utilities for OpenLLM. This extends bentoml.Server. + +It independently manage processes and threads for runners and servers separately. +This is an experimental feature and can also be merged to upstream BentoML. +""" +from __future__ import annotations + +import logging +import subprocess +import typing as t +from io import StringIO + +import bentoml + +import openllm + +logger = logging.getLogger(__name__) + + +def start( + model_name: str, + framework: t.Literal["flax", "tf", "pt"] | None = None, + server_args: dict[str, t.Any] | None = None, + serve_grpc: bool = False, + **llm_config_args: t.Any, +): + # NOTE: We need the below imports so that the client can use the custom IO Descriptor. + from openllm.prompts import Prompt as Prompt + + if framework is None: + framework = openllm.utils.get_framework_env(model_name) + + start_env = { + openllm.utils.FRAMEWORK_ENV_VAR(model_name): framework, + } + + server_args = server_args or {} + server_args.update( + { + "working_dir": openllm.utils.get_working_dir(model_name), + "bento": f'service_{model_name.replace("-", "_")}:svc', + } + ) + # NOTE: currently, theres no development args in bentoml.Server. To be fixed upstream. + development = server_args.pop("development") + server_args.setdefault("production", not development) + server = getattr(bentoml, "HTTPServer" if not serve_grpc else "GrpcServer")(**server_args) + server.timeout = 90 + + server.start(env=start_env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + assert server.process is not None + client = server.get_client() + llm_config_args = llm_config_args or {} + if llm_config_args: + res = client.update_llm_config(llm_config_args) + assert res + + logger.info("Server for running '%s' can now be accessed at %s", model_name, client.server_url) + # TODO: Add generated instruction for using client in JS, Python and Go here. + + def log_output(pipe: t.TextIO): + for line in iter(pipe.readline, b""): # b'\n'-separated lines + logger.info(line) + + try: + stdout, _ = server.process.communicate() + log_output(StringIO(stdout)) + except Exception as err: + logger.error("Exception occured while running '%s':\n", model_name) + logger.error(err) + raise diff --git a/src/openllm/types.py b/src/openllm/types.py new file mode 100644 index 00000000..999ed5ec --- /dev/null +++ b/src/openllm/types.py @@ -0,0 +1,68 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Types definition for OpenLLM. + +Note that this module SHOULD NOT BE IMPORTED DURING RUNTIME, as this serve only for typing purposes. +""" +from __future__ import annotations + +import typing as t + +if not t.TYPE_CHECKING: + raise RuntimeError(f"{__name__} should not be imported during runtime") + +import bentoml +from bentoml._internal.io_descriptors.base import OpenAPIResponse +from bentoml._internal.models.model import \ + ModelSignaturesType as ModelSignaturesType +from bentoml.types import ModelSignatureDict + +from openllm.configuration_utils import LLMConfig +from openllm.runner_utils import LLMRunnable +from openllm.utils import LazyLoader + +P = t.ParamSpec("P") + +F = t.Callable[P, t.Any] + + +class InferenceConfig(t.TypedDict): + generate: ModelSignatureDict + + +class LLMModuleType(LazyLoader): + @staticmethod + def import_model( + model_name: str, + model_kwargs: dict[str, t.Any] | None = None, + tokenizer_kwargs: dict[str, t.Any] | None = None, + config_kwargs: dict[str, t.Any] | None = None, + ) -> bentoml.Model: + ... + + class LLMConfigImpl(LLMConfig): + ... + + class LLMRunnableImpl(LLMRunnable[t.Any, t.Any], start_model_name="dummy"): + ... + + @staticmethod + def RunnableNameTokenizer(model_name: str | None = None, embedded: bool = True, **kwargs: t.Any) -> TokenizerRunner: + ... + + +# The following type definition are extensions of bentoml.Runner +class TokenizerRunner(bentoml.Runner): + ... diff --git a/src/openllm/utils/__init__.py b/src/openllm/utils/__init__.py new file mode 100644 index 00000000..25b4648a --- /dev/null +++ b/src/openllm/utils/__init__.py @@ -0,0 +1,198 @@ +# Copyright 2023 BentoML Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Utilities function for OpenLLM. User can import these function for convenience, but +we won't ensure backward compatibility for these functions. So use with caution. +""" +from __future__ import annotations + +import importlib +import importlib.machinery +import itertools +import logging +import os +import re +import types +import typing as t + +import bentoml +# NOTE: The following exports useful utils from bentoml +from bentoml._internal.utils import LazyLoader as LazyLoader +from bentoml._internal.utils import pkg as packaging_utils +from bentoml._internal.utils import reserve_free_port as reserve_free_port +from bentoml._internal.utils import \ + resolve_user_filepath as resolve_user_filepath + +if t.TYPE_CHECKING: + import transformers + import transformers.utils as import_utils_shim + + from openllm.runner_utils import LLMRunner + +else: + transformers = LazyLoader("transformers", globals(), "transformers") + + if packaging_utils.pkg_version_info("transformers")[:2] >= (4, 18): + import_utils_shim = LazyLoader("import_utils_shim", globals(), "transformers.utils") + else: + # NOTE: This logic below handle 4.18 compatibility for checking tf, jax, and torch availability. + import_utils_shim = LazyLoader( + "import_utils_shim", globals(), "bentoml._internal.frameworks.utils.transformers" + ) + +logger = logging.getLogger(__name__) + +_object_setattr = object.__setattr__ + + +def kebab_to_snake_case(name: str) -> str: + """Convert a given kebab-case name to snake_case""" + return re.sub(r"-", "_", name) + + +def get_pretrained_env(model_name: str) -> str | None: + """Convert a given runnable start model name (kebab-case) to a + ENV variable snake_case (OPENLLM_FLAN_T5_PRETRAINED)""" + return os.environ.get(f"OPENLLM_{kebab_to_snake_case(model_name.upper())}_PRETRAINED", None) + + +def get_lazy_module(model_name: str) -> LazyLoader: + snaked_model_name = kebab_to_snake_case(model_name) + return LazyLoader(snaked_model_name, globals(), f"openllm.models.{snaked_model_name}") + + +def get_working_dir(model_name: str) -> str: + """Get the working directory for a given model name""" + return os.path.dirname(importlib.import_module(f".{kebab_to_snake_case(model_name)}", "openllm.models").__file__) + + +def FRAMEWORK_ENV_VAR(model_name: str) -> str: + return f"OPENLLM_{kebab_to_snake_case(model_name.upper())}_FRAMEWORK" + + +def get_framework_env(model_name: str) -> str: + envvar = os.environ.get(FRAMEWORK_ENV_VAR(model_name), "pt") + if envvar not in ("pt", "tf", "flax"): + raise ValueError(f"Invalid framework implementation {envvar}, must be one of 'pt', 'tf', 'flax'") + return envvar + + +def generate_service_name(runner: LLMRunner) -> str: + dummy_object = runner.runnable_class.dummy_object() + return f"llm-{dummy_object.start_model_name}-service" + + +def convert_transformers_model_name(name: str) -> str: + return re.sub("[^a-zA-Z0-9]+", "-", name) + + +def generate_tag_from_model_name(model_name: str, prefix: str | None = None, **kwargs: t.Any) -> bentoml.Tag: + """Generate a ``bentoml.Tag`` from a given transformers model name. + + Note that this depends on your model to have a config class available. + + Args: + model_name: The transformers model name. + **kwargs: Additional kwargs to pass to the ``transformers.AutoConfig`` constructor. + If your pass ``return_unused_kwargs=True``, it will be ignored. + """ + if "return_unused_kwargs" in kwargs: + logger.debug("Ignoring 'return_unused_kwargs' in 'generate_tag_from_model_name'.") + kwargs.pop("return_unused_kwargs") + config = transformers.AutoConfig.from_pretrained(model_name, **kwargs) + commit_hash = getattr(config, "_commit_hash", None) + if commit_hash is None: + logger.warning( + "Given %s from '%s' doesn't contain a commit hash. We will generate the tag without specific version.", + config.__class__, + model_name, + ) + tag_str = ( + convert_transformers_model_name(model_name) + if commit_hash is None + else f"{convert_transformers_model_name(model_name)}:{commit_hash}" + ) + return bentoml.Tag.from_taglike((f"{prefix}-" if prefix is not None else "") + tag_str) + + +class LazyModule(types.ModuleType): + """ + Module class that surfaces all objects but only performs associated imports when the objects are requested. + This is a direct port from transformers.utils.import_utils._LazyModule for backwards compatibility with transformers <4.18 + + This is an extension a more powerful LazyLoader. + """ + + # Very heavily inspired by optuna.integration._IntegrationModule + # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py + def __init__( + self, + name: str, + module_file: str, + import_structure: dict[str, list[str]], + module_spec: importlib.machinery.ModuleSpec | None = None, + extra_objects: dict[str, t.Any] | None = None, + ): + super().__init__(name) + self._modules = set(import_structure.keys()) + self._class_to_module: dict[str, str] = {} + for key, values in import_structure.items(): + for value in values: + self._class_to_module[value] = key + # Needed for autocompletion in an IDE + self.__all__ = list(import_structure.keys()) + list(itertools.chain(*import_structure.values())) + self.__file__ = module_file + self.__spec__ = module_spec + self.__path__ = [os.path.dirname(module_file)] + self._objects = {} if extra_objects is None else extra_objects + self._name = name + self._import_structure = import_structure + + # Needed for autocompletion in an IDE + def __dir__(self): + result = t.cast("list[str]", super().__dir__()) + # The elements of self.__all__ that are submodules may or + # may not be in the dir already, depending on whether + # they have been accessed or not. So we only add the + # elements of self.__all__ that are not already in the dir. + for attr in self.__all__: + if attr not in result: + result.append(attr) + return result + + def __getattr__(self, name: str) -> t.Any: + if name in self._objects: + return self._objects[name] + if name in self._modules: + value = self._get_module(name) + elif name in self._class_to_module.keys(): + module = self._get_module(self._class_to_module[name]) + value = getattr(module, name) + else: + raise AttributeError(f"module {self.__name__} has no attribute {name}") + + setattr(self, name, value) + return value + + def _get_module(self, module_name: str): + try: + return importlib.import_module("." + module_name, self.__name__) + except Exception as e: + raise RuntimeError( + f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its" + f" traceback):\n{e}" + ) from e + + def __reduce__(self): + return (self.__class__, (self._name, self.__file__, self._import_structure)) diff --git a/src/openllm/utils/logging.py b/src/openllm/utils/logging.py new file mode 100644 index 00000000..e69de29b diff --git a/src/openllm_client/__init__.py b/src/openllm_client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/bazel b/tools/bazel new file mode 100755 index 00000000..4295d6c8 --- /dev/null +++ b/tools/bazel @@ -0,0 +1,65 @@ +#!/bin/bash + +set -e + +# courtesy of https://github.com/grpc/grpc +GIT_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/.." + +# DISABLE_BAZEL_WRAPPER can be set to eliminate the wrapper logic +if [ "${DISABLE_BAZEL_WRAPPER}" != "" ] && [ "${OVERRIDE_BAZEL_VERSION}" == "" ]; then + if [ "${BAZEL_REAL}" != "" ]; then + # use BAZEL_REAL as set by + # https://github.com/bazelbuild/bazel/blob/master/scripts/packages/bazel.sh + # that originally invoked this script (this is what happens when you + # run "bazel" in our workspace) + exec -a "$0" "${BAZEL_REAL}" "$@" + else + # if BAZEL_REAL is not set, just invoke the default system bazel + exec bazel "$@" + fi +fi + +VERSION=${OVERRIDE_BAZEL_VERSION:-$(< "$GIT_ROOT/.bazelversion")} +echo "INFO: Running bazel wrapper (see //tools/bazel for details), bazel version $VERSION will be used instead of system-wide bazel installation." >&2 + +BASEURL_MIRROR="https://storage.googleapis.com/grpc-bazel-mirror/github.com/bazelbuild/bazel/releases/download" +BASEURL="https://github.com/bazelbuild/bazel/releases/download" +pushd "$(dirname "$0")" > /dev/null +# bazel binary will be downloaded to GIT_ROOT/tools directory by default +DOWNLOAD_DIR=${OVERRIDE_BAZEL_WRAPPER_DOWNLOAD_DIR:-$GIT_ROOT/tools} + +case $(uname -sm) in + "Linux x86_64") + suffix=linux-x86_64 + ;; + "Linux aarch64") + suffix=linux-arm64 + ;; + "Darwin x86_64") + suffix=darwin-x86_64 + ;; + "Darwin arm64") + suffix=darwin-arm64 + ;; + "MINGW"* | "MSYS_NT"*) + suffix=windows-x86_64.exe + ;; + *) + echo "Unsupported architecture: $(uname -sm)" >&2 + exit 1 + ;; +esac + +filename="bazel-$VERSION-$suffix" +filename_abs="${DOWNLOAD_DIR}/${filename}" + +if [ ! -x "${filename_abs}" ]; then + # first try to download using mirror, fallback to download from github + echo "Downloading bazel, will try URLs: ${BASEURL_MIRROR}/${VERSION}/${filename} ${BASEURL}/${VERSION}/${filename}" >&2 + curl --fail -L --output "${filename_abs}" "${BASEURL_MIRROR}/${VERSION}/${filename}" || curl --fail -L --output "${filename_abs}" "${BASEURL}/${VERSION}/${filename}" + chmod a+x "${filename_abs}" +fi + +popd > /dev/null + +exec "${filename_abs}" "$@" diff --git a/tools/bazel.rc b/tools/bazel.rc new file mode 100644 index 00000000..98348ae8 --- /dev/null +++ b/tools/bazel.rc @@ -0,0 +1,58 @@ +# Coloring for error messages. +common --color=yes + +# Work around https://github.com/bazelbuild/bazel/issues/6293 by setting a dummy lcov. +coverage --combined_report=lcov --coverage_report_generator=@bazel_tools//tools/test:lcov_merger + +# prevent creation of empty __init__.py +# see: https://github.com/bazelbuild/bazel/issues/10076, https://github.com/bazelbuild/bazel/issues/7386 +build --incompatible_default_to_explicit_init_py +test --incompatible_default_to_explicit_init_py + +# Windows requires enable_runfiles +build --enable_runfiles +startup --windows_enable_symlinks + +## Build ## + +# Turn off legacy external runfiles +# This prevents accidentally depending on this feature, which Bazel will remove. +build --nolegacy_external_runfiles +# Use clang as our complier instead of gcc +build --client_env=CC=clang +build --cxxopt=-std=c++14 --host_cxxopt=-std=c++14 +# specify fast_cpp_protos for protobuf +build --define=use_fast_cpp_protos=true +# don't zip, see: https://github.com/bazelbuild/bazel/issues/8981 +build --build_python_zip=false + +build --verbose_failures +build --worker_verbose + +# --config=opt +build:opt --compilation_mode=opt +build:opt --copt=-Wframe-larger-than=16384 + +# --config=dbg +build:dbg --compilation_mode=dbg +build:dbg --copt=-Werror=return-stack-address + +# Dynamic link cause issues like: `dyld: malformed mach-o: load commands size (59272) > 32768` +# https://github.com/bazelbuild/bazel/issues/9190 +build:macos --dynamic_mode=off +# Address https://github.com/bazelbuild/rules_swift/issues/776 +build:macos --host_swiftcopt=-wmo --swiftcopt=-wmo +# The default strategy is worker, which has sandboxing disabled by default, +# which can hide issues with non-hermetic bugs. +build:macos --strategy=SwiftCompile=sandboxed + +## Test ## + +# --test_output=errors: Printout test error +test --test_output=errors + +# with dbg mode we will stream the output +test:dbg --test_output=streamed + +# nix config +build:nix --copt="-I$(nix eval --impure --raw --expr 'let pkgs = import { }; in pkgs.llvmPackages.openmp.out')/include -L$(nix eval --impure --raw --expr 'let pkgs = import { }; in pkgs.llvmPackages.openmp.out')/lib" diff --git a/tools/dependencies b/tools/dependencies new file mode 100755 index 00000000..d2c4af24 --- /dev/null +++ b/tools/dependencies @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e + +GIT_ROOT=$(git rev-parse --show-toplevel) + +cd "$GIT_ROOT" || exit 1 + +# NOTE that we are using local bazel instead of system bazel. +bazel run //requirements:pypi.update +bazel run //:vendor-pypi +bazel run //requirements:tensorflow.update +bazel run //:vendor-tensorflow +bazel run //requirements:tests.update +bazel run //:vendor-tests diff --git a/tools/dev.Dockerfile b/tools/dev.Dockerfile new file mode 100644 index 00000000..bbb09747 --- /dev/null +++ b/tools/dev.Dockerfile @@ -0,0 +1,187 @@ +# syntax=docker/dockerfile-upstream:master + +FROM python:3.10-slim as base + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /workspace + +RUN --mount=type=cache,target=/var/lib/apt \ + --mount=type=cache,target=/var/cache/apt \ + apt-get update && \ + apt-get install -q -y --no-install-recommends --allow-remove-essential \ + bash build-essential ca-certificates git tree + +FROM base as protobuf-3 + +COPY <<-EOT requirements.txt + protobuf>=3.5.0,<4.0dev + grpcio-tools + mypy-protobuf +EOT + +RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt + +FROM base as protobuf-4 + +COPY <<-EOT requirements.txt + protobuf>=4.0,<5.0dev + grpcio-tools + mypy-protobuf +EOT + +RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt + +############################################ + +# BentoML gRPC protobuf 3 generation + +FROM protobuf-3 as run-grpcio-tools-3 + +ARG PROTOCOL_VERSION +ARG GENERATED_PB3_DIR + +RUN mkdir -p /result/${GENERATED_PB3_DIR} + +RUN --mount=type=bind,target=.,rw < /dev/null && pwd 2> /dev/null)" +GIT_ROOT=$(git rev-parse --show-toplevel) + +cd "$GIT_ROOT" || exit 1 + +# NOTE that we are using local bazel instead of system bazel. +bazel run //:buildcheck +bazel run //:black -- --check "$GIT_ROOT/src" +bazel run //:isort -- --check "$GIT_ROOT/src" +# linting protos +bazel run //:v1_proto_lint +bazel run //:v1alpha1_proto_lint + +# Running ruff for whole codebase. +ruff src examples docs tests diff --git a/tools/style b/tools/style new file mode 100755 index 00000000..4e0b953b --- /dev/null +++ b/tools/style @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +BASEDIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &> /dev/null && pwd 2> /dev/null)" + +GIT_ROOT=$(git rev-parse --show-toplevel) + +cd "$GIT_ROOT" || exit 1 + +bazel run //:buildfmt +bazel run //:black -- "$GIT_ROOT/src" +bazel run //:black -- --pyi "$GIT_ROOT/typings" "$GIT_ROOT/src/bentoml/metrics.pyi" +bazel run //:isort -- "$GIT_ROOT/src" + +# Running ruff for whole codebase. +ruff --fix src examples docs tests + +if command -v buf > /dev/null 2>&1; then + buf format --config "$GIT_ROOT/src/bentoml/grpc/buf.yaml" -w src/bentoml/grpc +else + if command -v docker > /dev/null 2>&1; then + docker run --init --rm --volume "$GIT_ROOT/src":/workspace --workdir /workspace bufbuild/buf format --config "/workspace/bentoml/grpc/buf.yaml" -w bentoml/grpc + fi +fi