mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-19 21:08:22 -05:00
fix
This commit is contained in:
2
make.py
2
make.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
|
||||
if ALIAS_PATH.exists():
|
||||
continue
|
||||
with open(ALIAS_PATH, "w") as f:
|
||||
f.write(model_name)
|
||||
f.write(model_version)
|
||||
else: # bentoml currently only support latest alias, copy to other alias
|
||||
shutil.copytree(
|
||||
BENTOML_HOME / "bentos" / model_repo / model_version,
|
||||
|
||||
@@ -59,14 +59,12 @@ class VLLM:
|
||||
Le(ENGINE_CONFIG["max_model_len"]),
|
||||
] = ENGINE_CONFIG["max_model_len"],
|
||||
stop: list[str] = [],
|
||||
**kwargs,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
from vllm import SamplingParams
|
||||
|
||||
SAMPLING_PARAM = SamplingParams(
|
||||
max_tokens=max_tokens,
|
||||
stop=stop,
|
||||
**kwargs,
|
||||
)
|
||||
stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user