mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-05-04 05:42:44 -04:00
This is to help with working on CPU machine Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
14 lines
499 B
Python
Generated
14 lines
499 B
Python
Generated
from _typeshed import Incomplete
|
|
from fastapi import Request as Request
|
|
from fastapi.responses import Response
|
|
from vllm.engine.arg_utils import AsyncEngineArgs as AsyncEngineArgs
|
|
from vllm.engine.async_llm_engine import AsyncLLMEngine as AsyncLLMEngine
|
|
from vllm.sampling_params import SamplingParams as SamplingParams
|
|
from vllm.utils import random_uuid as random_uuid
|
|
|
|
TIMEOUT_KEEP_ALIVE: int
|
|
TIMEOUT_TO_PREVENT_DEADLOCK: int
|
|
app: Incomplete
|
|
|
|
async def generate(request: Request) -> Response: ...
|