feat: add dbrx support

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
2026-04-24 17:12:54 -04:00 · 2024-04-02 04:10:19 +00:00
parent 070cd2f40b
commit 5c0d2787c0
10 changed files with 178 additions and 57 deletions
--- a/openllm-python/README.md
+++ b/openllm-python/README.md
@@ -290,6 +290,48 @@ You can specify any of the following ChatGLM models via `openllm start`:

 <details>

+<summary>Dbrx</summary>
+
+
+### Quickstart
+
+
+
+> **Note:** Dbrx requires to install with:
+> ```bash
+> pip install "openllm[dbrx]"
+> ```
+
+
+Run the following command to quickly spin up a Dbrx server:
+
+```bash
+TRUST_REMOTE_CODE=True openllm start databricks/dbrx-instruct
+```
+In a different terminal, run the following command to interact with the server:
+
+```bash
+export OPENLLM_ENDPOINT=http://localhost:3000
+openllm query 'What are large language models?'
+```
+
+
+> **Note:** Any Dbrx variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search=dbrx) to see more Dbrx-compatible models.
+
+
+
+### Supported models
+
+You can specify any of the following Dbrx models via `openllm start`:
+
+
+- [databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct)
+- [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base)
+
+</details>
+
+<details>
+
 <summary>DollyV2</summary>


--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
    "openllm-client>=0.5.0-alpha.2",
    "openllm-core>=0.5.0-alpha.2",
    "safetensors",
+    "vllm>=0.4.0",
    "optimum>=1.12.0",
    "accelerate",
    "ghapi",
@@ -99,12 +100,13 @@ all = ["openllm[full]"]
 awq = ["autoawq"]
 baichuan = ["cpm-kernels"]
 chatglm = ["cpm-kernels"]
+dbrx = ["cpm-kernels"]
 dolly-v2 = ["cpm-kernels"]
 falcon = ["xformers"]
 fine-tune = ["peft>=0.6.0", "datasets", "trl", "huggingface-hub"]
 flan-t5 = ["xformers"]
 full = [
-    "openllm[agents,awq,baichuan,chatglm,dolly-v2,falcon,fine-tune,flan-t5,gemma,ggml,gpt-neox,gptq,grpc,llama,mistral,mixtral,mpt,openai,opt,phi,playground,qwen,stablelm,starcoder,vllm,yi]",
+    "openllm[agents,awq,baichuan,chatglm,dbrx,dolly-v2,falcon,fine-tune,flan-t5,gemma,ggml,gpt-neox,gptq,grpc,llama,mistral,mixtral,mpt,openai,opt,phi,playground,qwen,stablelm,starcoder,vllm,yi]",
 ]
 gemma = ["xformers"]
 ggml = ["ctransformers"]
@@ -122,7 +124,7 @@ playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"]
 qwen = ["cpm-kernels", "tiktoken"]
 stablelm = ["cpm-kernels", "tiktoken"]
 starcoder = ["bitsandbytes"]
-vllm = ["vllm==0.3.2"]
+vllm = ["vllm==0.4.0"]
 yi = ["bitsandbytes"]

 [tool.hatch.version]
--- a/openllm-python/src/_openllm_tiny/_entrypoint.py
+++ b/openllm-python/src/_openllm_tiny/_entrypoint.py
@@ -78,7 +78,7 @@ def parse_device_callback(
  '--version',
  '-v',
  package_name=_PACKAGE_NAME,
-  message=f'{_PACKAGE_NAME}, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
+  message=f'{_PACKAGE_NAME}, %(version)s\nPython ({platform.python_implementation()}) {platform.python_version()}',
 )
 def cli() -> None:
  """\b
--- a/openllm-python/src/openllm/init.pyi
+++ b/openllm-python/src/openllm/init.pyi
@@ -13,7 +13,7 @@ Fine-tune, serve, deploy, and monitor any LLMs with ease.
 # fmt: off
 # update-config-stubs.py: import stubs start
 from openllm_client import AsyncHTTPClient as AsyncHTTPClient, HTTPClient as HTTPClient
-from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GemmaConfig as GemmaConfig, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MistralConfig as MistralConfig, MixtralConfig as MixtralConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, PhiConfig as PhiConfig, QwenConfig as QwenConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig, YiConfig as YiConfig
+from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DbrxConfig as DbrxConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GemmaConfig as GemmaConfig, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MistralConfig as MistralConfig, MixtralConfig as MixtralConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, PhiConfig as PhiConfig, QwenConfig as QwenConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig, YiConfig as YiConfig
 from openllm_core._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig
 from openllm_core._schemas import GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, MetadataOutput as MetadataOutput, MessageParam as MessageParam
 from openllm_core.utils import api as api