diff --git a/.gitattributes b/.gitattributes index dc274c61..77abb8ba 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,12 +7,12 @@ openllm-python/tests/models/__snapshots__/* linguist-generated=true openllm-python/src/openllm/utils/dummy_*.py linguist-generated=true openllm-python/src/openllm/models/__init__.py linguist-generated=true openllm-python/README.md linguist-generated=true +openllm-python/CHANGELOG.md linguist-generated=true # Others typings/**/*.pyi linguist-generated=true Formula/openllm.rb linguist-generated=true - * text=auto eol=lf # Needed for setuptools-scm-git-archive .git_archival.txt export-subst diff --git a/.github/actions/release.sh b/.github/actions/release.sh index 0cfa8aa0..e89617dc 100755 --- a/.github/actions/release.sh +++ b/.github/actions/release.sh @@ -56,7 +56,8 @@ release_package() { jq --arg release_version "${version}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json popd &>/dev/null towncrier build --yes --version "${version}" - git add CHANGELOG.md changelog.d package.json openllm-node/package.json contrib/clojure/package.json + cp CHANGELOG.md openllm-python/CHANGELOG.md + git add CHANGELOG.md openllm-python/CHANGELOG.md changelog.d package.json openllm-node/package.json contrib/clojure/package.json git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]" git push origin main echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]" diff --git a/openllm-python/CHANGELOG.md b/openllm-python/CHANGELOG.md deleted file mode 120000 index 04c99a55..00000000 --- a/openllm-python/CHANGELOG.md +++ /dev/null @@ -1 +0,0 @@ -../CHANGELOG.md \ No newline at end of file diff --git a/openllm-python/CHANGELOG.md b/openllm-python/CHANGELOG.md new file mode 100644 index 00000000..2f8917c0 --- /dev/null +++ b/openllm-python/CHANGELOG.md @@ -0,0 +1,658 @@ +# Changelog + +We are following [semantic versioning](https://semver.org/) with strict +backward-compatibility policy. + +You can find out backwards-compatibility policy +[here](https://github.com/bentoml/openllm/blob/main/.github/SECURITY.md). + +Changes for the upcoming release can be found in the +['changelog.d' directory](https://github.com/bentoml/openllm/tree/main/changelog.d) +in our repository. + + + + + +## [0.2.24](https://github.com/bentoml/openllm/tree/v0.2.24) +No significant changes. + + +## [0.2.23](https://github.com/bentoml/openllm/tree/v0.2.23) + +### Features + +- Added all compiled wheels for all supported Python version for Linux and MacOS + [#201](https://github.com/bentoml/openllm/issues/201) + + +## [0.2.22](https://github.com/bentoml/openllm/tree/v0.2.22) +No significant changes. + + +## [0.2.21](https://github.com/bentoml/openllm/tree/v0.2.21) + +### Changes + +- Added lazy eval for compiled modules, which should speed up overall import time + [#200](https://github.com/bentoml/openllm/issues/200) + + +### Bug fix + +- Fixes compiled wheels ignoring client libraries + [#197](https://github.com/bentoml/openllm/issues/197) + + +## [0.2.20](https://github.com/bentoml/openllm/tree/v0.2.20) +No significant changes. + + +## [0.2.19](https://github.com/bentoml/openllm/tree/v0.2.19) +No significant changes. + + +## [0.2.18](https://github.com/bentoml/openllm/tree/v0.2.18) + +### Changes + +- Runners server now will always spawn one instance regardless of the configuration of workers-per-resource + + i.e: If CUDA_VISIBLE_DEVICES=0,1,2 and `--workers-per-resource=0.5`, then runners will only use `0,1` index + [#189](https://github.com/bentoml/openllm/issues/189) + + +### Features + +- OpenLLM now can also be installed via brew tap: + ```bash + brew tap bentoml/openllm https://github.com/bentoml/openllm + + brew install openllm + ``` + [#190](https://github.com/bentoml/openllm/issues/190) + + +## [0.2.17](https://github.com/bentoml/openllm/tree/v0.2.17) + +### Changes + +- Updated loading logics for PyTorch and vLLM where it will check for initialized parameters after placing to correct devices + + Added xformers to base container for requirements on vLLM-based container + [#185](https://github.com/bentoml/openllm/issues/185) + + +### Features + +- Importing models now won't load into memory if it is a remote ID. Note that for GPTQ and local model the behaviour is unchanged. + + Fixes that when there is one GPU, we ensure to call `to('cuda')` to place the model onto the memory. Note that the GPU must have + enough VRAM to offload this model onto the GPU. + [#183](https://github.com/bentoml/openllm/issues/183) + + +## [0.2.16](https://github.com/bentoml/openllm/tree/v0.2.16) +No significant changes. + + +## [0.2.15](https://github.com/bentoml/openllm/tree/v0.2.15) +No significant changes. + + +## [0.2.14](https://github.com/bentoml/openllm/tree/v0.2.14) + +### Bug fix + +- Fixes a bug with `EnvVarMixin` where it didn't respect environment variable for specific fields + + This inherently provide a confusing behaviour with `--model-id`. This is now has been addressed with main + + The base docker will now also include a installation of xformers from source, locked at a given hash, since the latest release of xformers + are too old and would fail with vLLM when running within the k8s + [#181](https://github.com/bentoml/openllm/issues/181) + + +## [0.2.13](https://github.com/bentoml/openllm/tree/v0.2.13) +No significant changes. + + +## [0.2.12](https://github.com/bentoml/openllm/tree/v0.2.12) + +### Features + +- Added support for base container with OpenLLM. The base container will contains all necessary requirements + to run OpenLLM. Currently it does included compiled version of FlashAttention v2, vLLM, AutoGPTQ and triton. + + This will now be the base image for all future BentoLLM. The image will also be published to public GHCR. + + To extend and use this image into your bento, simply specify ``base_image`` under ``bentofile.yaml``: + + ```yaml + docker: + base_image: ghcr.io/bentoml/openllm: + ``` + + The release strategy would include: + - versioning of ``ghcr.io/bentoml/openllm:sha-`` for every commit to main, ``ghcr.io/bentoml/openllm:0.2.11`` for specific release version + - alias ``latest`` will be managed with docker/build-push-action (discouraged) + + Note that all these images include compiled kernels that has been tested on Ampere GPUs with CUDA 11.8. + + To quickly run the image, do the following: + + ```bash + docker run --rm --gpus all -it -v /home/ubuntu/.local/share/bentoml:/tmp/bentoml -e BENTOML_HOME=/tmp/bentoml \ + -e OPENLLM_USE_LOCAL_LATEST=True -e OPENLLM_LLAMA_FRAMEWORK=vllm ghcr.io/bentoml/openllm:2b5e96f90ad314f54e07b5b31e386e7d688d9bb2 start llama --model-id meta-llama/Llama-2-7b-chat-hf --workers-per-resource conserved --debug` + ``` + + In conjunction with this, OpenLLM now also have a set of small CLI utilities via ``openllm ext`` for ease-of-use + + General fixes around codebase bytecode optimization + + Fixes logs output to filter correct level based on ``--debug`` and ``--quiet`` + + ``openllm build`` now will default run model check locally. To skip it pass in ``--fast`` (before this is the default behaviour, but ``--no-fast`` as default makes more sense here as ``openllm build`` should also be able to run standalone) + + All ``LlaMA`` namespace has been renamed to ``Llama`` (internal change and shouldn't affect end users) + + ``openllm.AutoModel.for_model`` now will always return the instance. Runner kwargs will be handled via create_runner + [#142](https://github.com/bentoml/openllm/issues/142) +- All OpenLLM base container now are scanned for security vulnerabilities using + trivy (both SBOM mode and CVE) + [#169](https://github.com/bentoml/openllm/issues/169) + + +## [0.2.11](https://github.com/bentoml/openllm/tree/v0.2.11) + +### Features + +- Added embeddings support for T5 and ChatGLM + [#153](https://github.com/bentoml/openllm/issues/153) + + +## [0.2.10](https://github.com/bentoml/openllm/tree/v0.2.10) + +### Features + +- Added installing with git-archival support + + ```bash + pip install "https://github.com/bentoml/openllm/archive/main.tar.gz" + ``` + [#143](https://github.com/bentoml/openllm/issues/143) +- Users now can call ``client.embed`` to get the embeddings from the running LLMServer + + ```python + client = openllm.client.HTTPClient("http://localhost:3000") + + client.embed("Hello World") + client.embed(["Hello", "World"]) + ``` + + > **Note:** The ``client.embed`` is currently only implemnted for ``openllm.client.HTTPClient`` and ``openllm.client.AsyncHTTPClient`` + + Users can also query embeddings directly from the CLI, via ``openllm embed``: + + ```bash + $ openllm embed --endpoint localhost:3000 "Hello World" "My name is Susan" + + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + ``` + [#146](https://github.com/bentoml/openllm/issues/146) + + +### Bug fix + +- Fixes model location while running within BentoContainer correctly + + This makes sure that the tags and model path are inferred correctly, based on BENTO_PATH and /.dockerenv + [#141](https://github.com/bentoml/openllm/issues/141) + + +## [0.2.9](https://github.com/bentoml/openllm/tree/v0.2.9) +No significant changes. + + +## [0.2.8](https://github.com/bentoml/openllm/tree/v0.2.8) + +### Features + +- APIs for LLMService are now provisional based on the capabilities of the LLM. + + The following APIs are considered provisional: + + - `/v1/embeddings`: This will be available if the LLM supports embeddings (i.e: ``LLM.embeddings`` is implemented. Example model are ``llama``) + - `/hf/agent`: This will be available if LLM supports running HF agents (i.e: ``LLM.generate_one`` is implemented. Example model are ``starcoder``, ``falcon``.) + - `POST /v1/adapters` and `GET /v1/adapters`: This will be available if the server is running with LoRA weights + + ``openllm.LLMRunner`` now include three additional boolean: + - `runner.supports_embeddings`: Whether this runner supports embeddings + - `runner.supports_hf_agent`: Whether this runner support HF agents + - `runner.has_adapters`: Whether this runner is loaded with LoRA adapters. + + Optimized ``openllm.models``'s bytecode performance + [#133](https://github.com/bentoml/openllm/issues/133) + + +## [0.2.7](https://github.com/bentoml/openllm/tree/v0.2.7) +No significant changes. + + +## [0.2.6](https://github.com/bentoml/openllm/tree/v0.2.6) + +### Backwards-incompatible Changes + +- Updated signature for `load_model` and `load_tokenizer` not to allow tag. + Tag can be accessed via `llm.tag`, or if using `openllm.serialisation` or `bentoml.transformers` then you can use `self._bentomodel` + + Updated serialisation shared logics to reduce callstack for saving three calltrace. + [#132](https://github.com/bentoml/openllm/issues/132) + + +## [0.2.5](https://github.com/bentoml/openllm/tree/v0.2.5) + +### Features + +- Added support for sending arguments via CLI. + + ```python + openllm query --endpoint localhost:3000 "What is the difference between noun and pronoun?" --sampling-params temperature 0.84 + ``` + + Fixed llama2 qlora training script to save unquantized weights + [#130](https://github.com/bentoml/openllm/issues/130) + + +## [0.2.4](https://github.com/bentoml/openllm/tree/v0.2.4) +No significant changes. + + +## [0.2.3](https://github.com/bentoml/openllm/tree/v0.2.3) +No significant changes. + + +## [0.2.2](https://github.com/bentoml/openllm/tree/v0.2.2) +No significant changes. + + +## [0.2.1](https://github.com/bentoml/openllm/tree/v0.2.1) +No significant changes. + + +## [0.2.0](https://github.com/bentoml/openllm/tree/v0.2.0) + +### Features + +- Added support for GPTNeoX models. All variants of GPTNeoX, including Dolly-V2 + and StableLM can now also use `openllm start gpt-neox` + + `openllm models -o json` nows return CPU and GPU field. `openllm models` now + show table that mimics the one from README.md + + Added scripts to automatically add models import to `__init__.py` + + `--workers-per-resource` now accepts the following strategies: + + - `round_robin`: Similar behaviour when setting `--workers-per-resource 1`. This + is useful for smaller models. + - `conserved`: This will determine the number of available GPU resources, and + only assign one worker for the LLMRunner with all available GPU resources. For + example, if ther are 4 GPUs available, then `conserved` is equivalent to + `--workers-per-resource 0.25`. + [#106](https://github.com/bentoml/openllm/issues/106) +- Added support for [Baichuan](https://github.com/baichuan-inc/Baichuan-7B) model + generation, contributed by @hetaoBackend + + Fixes how we handle model loader auto class for trust_remote_code in + transformers + [#115](https://github.com/bentoml/openllm/issues/115) + + +### Bug fix + +- Fixes relative model_id handling for running LLM within the container. + + Added support for building container directly with `openllm build`. Users now + can do `openllm build --format=container`: + + ```bash + openllm build flan-t5 --format=container + ``` + + This is equivalent to: + + ```bash + openllm build flan-t5 && bentoml containerize google-flan-t5-large-service + ``` + + Added Snapshot testing and more robust edge cases for model testing + + General improvement in `openllm.LLM.import_model` where it will parse santised + parameters automatically. + + Fixes `openllm start ` to use correct `model_id`, ignoring `--model-id` + (The correct behaviour) + + Fixes `--workers-per-resource conserved` to respect `--device` + + Added initial interface for `LLM.embeddings` + [#107](https://github.com/bentoml/openllm/issues/107) +- Fixes resources to correctly follows CUDA_VISIBLE_DEVICES spec + + OpenLLM now contains a standalone parser that mimic `torch.cuda` parser for set + GPU devices. This parser will be used to parse both AMD and NVIDIA GPUs. + + `openllm` should now be able to parse `GPU-` and `MIG-` UUID from both + configuration or spec. + [#114](https://github.com/bentoml/openllm/issues/114) + + +## [0.1.20](https://github.com/bentoml/openllm/tree/v0.1.20) + +### Features + +- ### Fine-tuning support for Falcon + + Added support for fine-tuning Falcon models with QLoRa + + OpenLLM now brings a `openllm playground`, which create a jupyter notebook for + easy fine-tuning script + + Currently, it supports fine-tuning OPT and Falcon, more to come. + + `openllm.LLM` now provides a `prepare_for_training` helpers to easily setup LoRA + and related configuration for fine-tuning + [#98](https://github.com/bentoml/openllm/issues/98) + + +### Bug fix + +- Fixes loading MPT config on CPU + + Fixes runner StopIteration on GET for Starlette App + [#92](https://github.com/bentoml/openllm/issues/92) +- `openllm.LLM` now generates tags based on given `model_id` and optional + `model_version`. + + If given `model_id` is a custom path, the name would be the basename of the + directory, and version would be the hash of the last modified time. + + `openllm start` now provides a `--runtime`, allowing setup different runtime. + Currently it refactors to `transformers`. GGML is working in progress. + + Fixes miscellaneous items when saving models with quantized weights. + [#102](https://github.com/bentoml/openllm/issues/102) + + +## [0.1.19](https://github.com/bentoml/openllm/tree/v0.1.19) +No significant changes. + + +## [0.1.18](https://github.com/bentoml/openllm/tree/v0.1.18) + +### Features + +- `openllm.LLMConfig` now supports `dict()` protocol + + ```bash + + config = openllm.LLMConfig.for_model("opt") + + print(config.items()) + print(config.values()) + print(config.keys()) + print(dict(config)) + ``` + [#85](https://github.com/bentoml/openllm/issues/85) +- Added supports for MPT to OpenLLM + + Fixes a LLMConfig to only parse environment when it is available + [#91](https://github.com/bentoml/openllm/issues/91) + + +## [0.1.17](https://github.com/bentoml/openllm/tree/v0.1.17) + +### Bug fix + +- Fixes loading logics from custom path. If given model path are given, OpenLLM + won't try to import it to the local store. + + OpenLLM now only imports and fixes the models to loaded correctly within the + bento, see the generated service for more information. + + Fixes service not ready when serving within a container or on BentoCloud. This + has to do with how we load the model before in the bento. + + Falcon loading logics has been reimplemented to fix this major bug. Make sure to + delete all previous save weight for falcon with `openllm prune` + + `openllm start` now supports bento + + ```bash + openllm start llm-bento --help + ``` + [#80](https://github.com/bentoml/openllm/issues/80) + + +## [0.1.16](https://github.com/bentoml/openllm/tree/v0.1.16) +No significant changes. + + +## [0.1.15](https://github.com/bentoml/openllm/tree/v0.1.15) + +### Features + +- `openllm.Runner` now supports AMD GPU, addresses #65. + + It also respect CUDA_VISIBLE_DEVICES set correctly, allowing disabling GPU and + run on CPU only. + [#72](https://github.com/bentoml/openllm/issues/72) + + +## [0.1.14](https://github.com/bentoml/openllm/tree/v0.1.14) + +### Features + +- Added support for standalone binary distribution. Currently works on Linux and + Windows: + + The following are supported: + + - aarch64-unknown-linux-gnu + - x86_64-unknown-linux-gnu + - x86_64-unknown-linux-musl + - i686-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu + - x86_64-pc-windows-msvc + - i686-pc-windows-msvc + + Reverted matrices expansion for CI to all Python version. Now leveraging Hatch + env matrices + [#66](https://github.com/bentoml/openllm/issues/66) + + +### Bug fix + +- Moved implementation of dolly-v2 and falcon serialization to save PreTrainedModel instead of pipeline. + + Save dolly-v2 now save the actual model instead of the pipeline abstraction. If you have a Dolly-V2 + model available locally, kindly ask you to do `openllm prune` to have the new implementation available. + + Dolly-v2 and falcon nows implements some memory optimization to help with loading with lower resources system + + Configuration removed field: 'use_pipeline' + [#60](https://github.com/bentoml/openllm/issues/60) +- Remove duplicated class instance of `generation_config` as it should be set via + instance attributes. + + fixes tests flakiness and one broken cases for parsing env + [#64](https://github.com/bentoml/openllm/issues/64) + + +## [0.1.13](https://github.com/bentoml/openllm/tree/v0.1.13) +No significant changes. + + +## [0.1.12](https://github.com/bentoml/openllm/tree/v0.1.12) + +### Features + +- Serving LLM with fine-tuned LoRA, QLoRA adapters layers + + Then the given fine tuning weights can be served with the model via + `openllm start`: + + ```bash + openllm start opt --model-id facebook/opt-6.7b --adapter-id /path/to/adapters + ``` + + If you just wish to try some pretrained adapter checkpoint, you can use + `--adapter-id`: + + ```bash + openllm start opt --model-id facebook/opt-6.7b --adapter-id aarnphm/opt-6.7b-lora + ``` + + To use multiple adapters, use the following format: + + ```bash + openllm start opt --model-id facebook/opt-6.7b --adapter-id aarnphm/opt-6.7b-lora --adapter-id aarnphm/opt-6.7b-lora:french_lora + ``` + + By default, the first `adapter-id` will be the default lora layer, but + optionally users can change what lora layer to use for inference via + `/v1/adapters`: + + ```bash + curl -X POST http://localhost:3000/v1/adapters --json '{"adapter_name": "vn_lora"}' + ``` + + > Note that for multiple `adapter-name` and `adapter-id`, it is recomended to + > update to use the default adapter before sending the inference, to avoid any + > performance degradation + + To include this into the Bento, one can also provide a `--adapter-id` into + `openllm build`: + + ```bash + openllm build opt --model-id facebook/opt-6.7b --adapter-id ... + ``` + + Separate out configuration builder, to make it more flexible for future + configuration generation. + [#52](https://github.com/bentoml/openllm/issues/52) + + +### Bug fix + +- Fixes how `llm.ensure_model_id_exists` parse `openllm download` correctly + + Renamed `openllm.utils.ModelEnv` to `openllm.utils.EnvVarMixin` + [#58](https://github.com/bentoml/openllm/issues/58) + + +## [0.1.11](https://github.com/bentoml/openllm/tree/v0.1.11) +No significant changes. + + +## [0.1.10](https://github.com/bentoml/openllm/tree/v0.1.10) +No significant changes. + + +## [0.1.9](https://github.com/bentoml/openllm/tree/v0.1.9) + +### Changes + +- Fixes setting logs for agents to info instead of logger object. + [#37](https://github.com/bentoml/openllm/issues/37) + + +## [0.1.8](https://github.com/bentoml/openllm/tree/v0.1.8) +No significant changes. + + +## [0.1.7](https://github.com/bentoml/openllm/tree/v0.1.7) + +### Features + +- OpenLLM now seamlessly integrates with HuggingFace Agents. + Replace the HfAgent endpoint with a running remote server. + + ```python + import transformers + + agent = transformers.HfAgent("http://localhost:3000/hf/agent") # URL that runs the OpenLLM server + + agent.run("Is the following `text` positive or negative?", text="I don't like how this models is generate inputs") + ``` + + Note that only `starcoder` is currently supported for agent feature. + + To use it from the `openllm.client`, do: + ```python + import openllm + + client = openllm.client.HTTPClient("http://123.23.21.1:3000") + + client.ask_agent( + task="Is the following `text` positive or negative?", + text="What are you thinking about?", + agent_type="hf", + ) + ``` + + Fixes a Asyncio exception by increasing the timeout + [#29](https://github.com/bentoml/openllm/issues/29) + + +## [0.1.6](https://github.com/bentoml/openllm/tree/v0.1.6) + +### Changes + +- `--quantize` now takes `int8, int4` instead of `8bit, 4bit` to be consistent + with bitsandbytes concept. + + `openllm CLI` now cached all available model command, allow faster startup time. + + Fixes `openllm start model-id --debug` to filtered out debug message log from + `bentoml.Server`. + + `--model-id` from `openllm start` now support choice for easier selection. + + Updated `ModelConfig` implementation with **getitem** and auto generation value. + + Cleanup CLI and improve loading time, `openllm start` should be 'blazingly + fast'. + [#28](https://github.com/bentoml/openllm/issues/28) + + +### Features + +- Added support for quantization during serving time. + + `openllm start` now support `--quantize int8` and `--quantize int4` `GPTQ` + quantization support is on the roadmap and currently being worked on. + + `openllm start` now also support `--bettertransformer` to use + `BetterTransformer` for serving. + + Refactored `openllm.LLMConfig` to be able to use with `__getitem__`: + `openllm.DollyV2Config()['requirements']`. + + The access order being: + `__openllm_*__ > self. > __openllm_generation_class__ > __openllm_extras__`. + + Added `towncrier` workflow to easily generate changelog entries + + Added `use_pipeline`, `bettertransformer` flag into ModelSettings + + `LLMConfig` now supported `__dataclass_transform__` protocol to help with + type-checking + + `openllm download-models` now becomes `openllm download` + [#27](https://github.com/bentoml/openllm/issues/27) diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index 07e16595..19bb03a3 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -156,7 +156,7 @@ allow-direct-references = true only-include = ["src/openllm"] sources = ["src"] [tool.hatch.build.targets.sdist] -exclude = ["/.git_archival.txt"] +exclude = ["/.git_archival.txt", "tests"] [tool.hatch.build.targets.wheel.hooks.mypyc] dependencies = [ "hatch-mypyc==0.16.0", diff --git a/openllm-python/src/openllm/bundle/oci/Dockerfile b/openllm-python/src/openllm/bundle/oci/Dockerfile index 6da9e17e..8944f7a8 100644 --- a/openllm-python/src/openllm/bundle/oci/Dockerfile +++ b/openllm-python/src/openllm/bundle/oci/Dockerfile @@ -63,7 +63,7 @@ RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ # NOTE: Build vllm CUDA kernels FROM kernel-builder as vllm-builder -ENV COMMIT_HASH e8ddc08ec85495e5faca31bdf9129e0bf59a4fac +ENV COMMIT_HASH d1744376ae9fdbfa6a2dc763e1c67309e138fa3d ARG COMMIT_HASH=${COMMIT_HASH} WORKDIR /usr/src diff --git a/pyproject.toml b/pyproject.toml index c0aee534..704eb2e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -description = "OpenLLM: Operating LLMs in production" +description = "OpenLLM monorepo" dynamic = ["version", "readme", "dependencies"] keywords = [ "MLOps", @@ -58,7 +58,7 @@ keywords = [ "Transformers", ] license = "Apache-2.0" -name = "openllm" +name = "openllm-monorepo" requires-python = ">=3.8" [project.urls] Blog = "https://modelserving.com"