mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-26 08:17:52 -05:00
14 lines
425 B
Python
14 lines
425 B
Python
from typing import Dict, Optional, Any
|
|
from openllm_core._typing_compat import LiteralSerialisation, LiteralQuantise
|
|
from _openllm_tiny._llm import Dtype
|
|
|
|
model_id: str = ...
|
|
revision: str = ...
|
|
quantise: LiteralQuantise = ...
|
|
serialisation: LiteralSerialisation = ...
|
|
dtype: Dtype = ...
|
|
trust_remote_code: bool = ...
|
|
max_model_len: Optional[int] = ...
|
|
gpu_memory_utilization: int = ...
|
|
services_config: Dict[str, Any] = ...
|