feat: mixtral support (#770)

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-12-12 01:33:13 -05:00
committed by GitHub
parent b9260a8df3
commit d3328343d7
6 changed files with 14 additions and 12 deletions

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
import logging, typing as t
import _service_vars as svars
import bentoml, openllm
import bentoml, openllm, _service_vars as svars
from openllm_core._schemas import MessageParam
from bentoml.io import JSON, Text
@@ -16,23 +15,23 @@ llm_model_class = openllm.GenerationInput.from_llm_config(llm.config)
@svc.api(
route='/v1/generate',
input=JSON.from_sample(llm_model_class.examples()), output=JSON.from_sample(openllm.GenerationOutput.examples()), #
input=JSON.from_sample(llm_model_class.examples()),
output=JSON.from_sample(openllm.GenerationOutput.examples()),
)
async def generate_v1(input_dict: dict[str, t.Any]) -> dict[str, t.Any]: return (await llm.generate(**llm_model_class(**input_dict).model_dump())).model_dump()
@svc.api(
route='/v1/generate_stream',
input=JSON.from_sample(llm_model_class.examples()), output=Text(content_type='text/event-stream'), #
input=JSON.from_sample(llm_model_class.examples()),
output=Text(content_type='text/event-stream'),
)
async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[str, None]:
async for it in llm.generate_iterator(**llm_model_class(**input_dict).model_dump()):
yield f'data: {it.model_dump_json()}\n\n'
async for it in llm.generate_iterator(**llm_model_class(**input_dict).model_dump()): yield f'data: {it.model_dump_json()}\n\n'
yield 'data: [DONE]\n\n'
_Metadata = openllm.MetadataOutput(
timeout=llm.config['timeout'], model_name=llm.config['model_name'], #
backend=llm.__llm_backend__, model_id=llm.model_id, #
configuration=llm.config.model_dump_json().decode(),
backend=llm.__llm_backend__, model_id=llm.model_id, configuration=llm.config.model_dump_json().decode(), #
)
@svc.api(route='/v1/metadata', input=Text(), output=JSON.from_sample(_Metadata.model_dump()))

View File

@@ -31,7 +31,7 @@ def build_editable(path, package='openllm'):
raise RuntimeError('Please install OpenLLM from PyPI or built it from Git source.')
def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=None):
from . import RefResolver
packages = ['scipy', 'bentoml[tracing]>=1.1.10', 'vllm==0.2.4', 'ray==2.6.0', f'openllm>={RefResolver.from_strategy("release").version}'] # apparently bnb misses this one
packages = ['scipy', 'bentoml[tracing]>=1.1.10', f'openllm[vllm]>={RefResolver.from_strategy("release").version}'] # apparently bnb misses this one
if adapter_map is not None: packages += ['openllm[fine-tune]']
if extra_dependencies is not None: packages += [f'openllm[{k}]' for k in extra_dependencies]
if llm.config['requirements'] is not None: packages.extend(llm.config['requirements'])
@@ -50,7 +50,7 @@ def construct_docker_options(llm, _, quantize, adapter_map, dockerfile_template,
environ['OPENLLM_CONFIG'] = f"'{environ['OPENLLM_CONFIG']}'"
environ.pop('BENTOML_HOME', None) # NOTE: irrelevant in container
environ['NVIDIA_DRIVER_CAPABILITIES'] = 'compute,utility'
return DockerOptions(cuda_version='12.1', env=environ, dockerfile_template=dockerfile_template)
return DockerOptions(cuda_version='12.1', python_version='3.11', env=environ, dockerfile_template=dockerfile_template)
@inject
def create_bento(
bento_tag, llm_fs, llm, #

View File

@@ -73,6 +73,7 @@ requestBody:
stream: false
chat_template: __chat_template__
add_generation_prompt: __add_generation_prompt__
echo: false
streaming:
summary: Streaming input example
value:
@@ -92,6 +93,7 @@ requestBody:
- "<|endoftext|>"
chat_template: __chat_template__
add_generation_prompt: __add_generation_prompt__
echo: false
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
responses:

View File

@@ -57,6 +57,7 @@ class ChatCompletionRequest:
max_tokens: t.Optional[int] = attr.field(default=None)
presence_penalty: t.Optional[float] = attr.field(default=None)
frequency_penalty: t.Optional[float] = attr.field(default=None)
echo: t.Optional[bool] = attr.field(default=False)
logit_bias: t.Optional[t.Dict[str, float]] = attr.field(default=None)
user: t.Optional[str] = attr.field(default=None)
# supported by vLLM and us