mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-09 02:32:51 -04:00
feat: add dbrx support
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
42
openllm-python/README.md
generated
42
openllm-python/README.md
generated
@@ -290,6 +290,48 @@ You can specify any of the following ChatGLM models via `openllm start`:
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Dbrx</summary>
|
||||
|
||||
|
||||
### Quickstart
|
||||
|
||||
|
||||
|
||||
> **Note:** Dbrx requires to install with:
|
||||
> ```bash
|
||||
> pip install "openllm[dbrx]"
|
||||
> ```
|
||||
|
||||
|
||||
Run the following command to quickly spin up a Dbrx server:
|
||||
|
||||
```bash
|
||||
TRUST_REMOTE_CODE=True openllm start databricks/dbrx-instruct
|
||||
```
|
||||
In a different terminal, run the following command to interact with the server:
|
||||
|
||||
```bash
|
||||
export OPENLLM_ENDPOINT=http://localhost:3000
|
||||
openllm query 'What are large language models?'
|
||||
```
|
||||
|
||||
|
||||
> **Note:** Any Dbrx variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dbrx) to see more Dbrx-compatible models.
|
||||
|
||||
|
||||
|
||||
### Supported models
|
||||
|
||||
You can specify any of the following Dbrx models via `openllm start`:
|
||||
|
||||
|
||||
- [databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct)
|
||||
- [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
|
||||
<summary>DollyV2</summary>
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ dependencies = [
|
||||
"openllm-client>=0.5.0-alpha.2",
|
||||
"openllm-core>=0.5.0-alpha.2",
|
||||
"safetensors",
|
||||
"vllm>=0.4.0",
|
||||
"optimum>=1.12.0",
|
||||
"accelerate",
|
||||
"ghapi",
|
||||
@@ -99,12 +100,13 @@ all = ["openllm[full]"]
|
||||
awq = ["autoawq"]
|
||||
baichuan = ["cpm-kernels"]
|
||||
chatglm = ["cpm-kernels"]
|
||||
dbrx = ["cpm-kernels"]
|
||||
dolly-v2 = ["cpm-kernels"]
|
||||
falcon = ["xformers"]
|
||||
fine-tune = ["peft>=0.6.0", "datasets", "trl", "huggingface-hub"]
|
||||
flan-t5 = ["xformers"]
|
||||
full = [
|
||||
"openllm[agents,awq,baichuan,chatglm,dolly-v2,falcon,fine-tune,flan-t5,gemma,ggml,gpt-neox,gptq,grpc,llama,mistral,mixtral,mpt,openai,opt,phi,playground,qwen,stablelm,starcoder,vllm,yi]",
|
||||
"openllm[agents,awq,baichuan,chatglm,dbrx,dolly-v2,falcon,fine-tune,flan-t5,gemma,ggml,gpt-neox,gptq,grpc,llama,mistral,mixtral,mpt,openai,opt,phi,playground,qwen,stablelm,starcoder,vllm,yi]",
|
||||
]
|
||||
gemma = ["xformers"]
|
||||
ggml = ["ctransformers"]
|
||||
@@ -122,7 +124,7 @@ playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"]
|
||||
qwen = ["cpm-kernels", "tiktoken"]
|
||||
stablelm = ["cpm-kernels", "tiktoken"]
|
||||
starcoder = ["bitsandbytes"]
|
||||
vllm = ["vllm==0.3.2"]
|
||||
vllm = ["vllm==0.4.0"]
|
||||
yi = ["bitsandbytes"]
|
||||
|
||||
[tool.hatch.version]
|
||||
|
||||
@@ -78,7 +78,7 @@ def parse_device_callback(
|
||||
'--version',
|
||||
'-v',
|
||||
package_name=_PACKAGE_NAME,
|
||||
message=f'{_PACKAGE_NAME}, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
|
||||
message=f'{_PACKAGE_NAME}, %(version)s\nPython ({platform.python_implementation()}) {platform.python_version()}',
|
||||
)
|
||||
def cli() -> None:
|
||||
"""\b
|
||||
|
||||
@@ -13,7 +13,7 @@ Fine-tune, serve, deploy, and monitor any LLMs with ease.
|
||||
# fmt: off
|
||||
# update-config-stubs.py: import stubs start
|
||||
from openllm_client import AsyncHTTPClient as AsyncHTTPClient, HTTPClient as HTTPClient
|
||||
from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GemmaConfig as GemmaConfig, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MistralConfig as MistralConfig, MixtralConfig as MixtralConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, PhiConfig as PhiConfig, QwenConfig as QwenConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig, YiConfig as YiConfig
|
||||
from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DbrxConfig as DbrxConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GemmaConfig as GemmaConfig, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MistralConfig as MistralConfig, MixtralConfig as MixtralConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, PhiConfig as PhiConfig, QwenConfig as QwenConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig, YiConfig as YiConfig
|
||||
from openllm_core._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig
|
||||
from openllm_core._schemas import GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, MetadataOutput as MetadataOutput, MessageParam as MessageParam
|
||||
from openllm_core.utils import api as api
|
||||
|
||||
Reference in New Issue
Block a user