This commit is contained in:
bojiang
2024-05-23 02:00:45 +08:00
parent 5db1819527
commit 193d480b09
2 changed files with 1 additions and 3 deletions

View File

@@ -58,7 +58,7 @@ if __name__ == "__main__":
if ALIAS_PATH.exists():
continue
with open(ALIAS_PATH, "w") as f:
f.write(model_name)
f.write(model_version)
else: # bentoml currently only support latest alias, copy to other alias
shutil.copytree(
BENTOML_HOME / "bentos" / model_repo / model_version,

View File

@@ -59,14 +59,12 @@ class VLLM:
Le(ENGINE_CONFIG["max_model_len"]),
] = ENGINE_CONFIG["max_model_len"],
stop: list[str] = [],
**kwargs,
) -> AsyncGenerator[str, None]:
from vllm import SamplingParams
SAMPLING_PARAM = SamplingParams(
max_tokens=max_tokens,
stop=stop,
**kwargs,
)
stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)