diff --git a/make.py b/make.py index 8acac29f..689825df 100644 --- a/make.py +++ b/make.py @@ -58,7 +58,7 @@ if __name__ == "__main__": if ALIAS_PATH.exists(): continue with open(ALIAS_PATH, "w") as f: - f.write(model_name) + f.write(model_version) else: # bentoml currently only support latest alias, copy to other alias shutil.copytree( BENTOML_HOME / "bentos" / model_repo / model_version, diff --git a/vllm-chat/service.py b/vllm-chat/service.py index ca15990b..44c23626 100644 --- a/vllm-chat/service.py +++ b/vllm-chat/service.py @@ -59,14 +59,12 @@ class VLLM: Le(ENGINE_CONFIG["max_model_len"]), ] = ENGINE_CONFIG["max_model_len"], stop: list[str] = [], - **kwargs, ) -> AsyncGenerator[str, None]: from vllm import SamplingParams SAMPLING_PARAM = SamplingParams( max_tokens=max_tokens, stop=stop, - **kwargs, ) stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)