mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-10 02:57:37 -04:00
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
16 lines
518 B
Python
16 lines
518 B
Python
from typing import Dict, Optional, Any
|
|
from openllm_core._typing_compat import LiteralSerialisation, LiteralQuantise, LiteralString
|
|
from _openllm_tiny._llm import Dtype
|
|
|
|
model_id: str = ...
|
|
model_name: LiteralString = ...
|
|
model_tag: Optional[str] = ...
|
|
model_version: Optional[str] = ...
|
|
quantise: LiteralQuantise = ...
|
|
serialisation: LiteralSerialisation = ...
|
|
dtype: Dtype = ...
|
|
trust_remote_code: bool = ...
|
|
max_model_len: Optional[int] = ...
|
|
gpu_memory_utilization: int = ...
|
|
services_config: Dict[str, Any] = ...
|