diff --git a/make.py b/make.py
index 8acac29f..689825df 100644
--- a/make.py
+++ b/make.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
                     if ALIAS_PATH.exists():
                         continue
                     with open(ALIAS_PATH, "w") as f:
-                        f.write(model_name)
+                        f.write(model_version)
                 else:  # bentoml currently only support latest alias, copy to other alias
                     shutil.copytree(
                         BENTOML_HOME / "bentos" / model_repo / model_version,
diff --git a/vllm-chat/service.py b/vllm-chat/service.py
index ca15990b..44c23626 100644
--- a/vllm-chat/service.py
+++ b/vllm-chat/service.py
@@ -59,14 +59,12 @@ class VLLM:
             Le(ENGINE_CONFIG["max_model_len"]),
         ] = ENGINE_CONFIG["max_model_len"],
         stop: list[str] = [],
-        **kwargs,
     ) -> AsyncGenerator[str, None]:
         from vllm import SamplingParams
 
         SAMPLING_PARAM = SamplingParams(
             max_tokens=max_tokens,
             stop=stop,
-            **kwargs,
         )
         stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)