fix

2026-01-19 21:08:22 -05:00 · 2024-05-23 02:00:45 +08:00
parent 5db1819527
commit 193d480b09
2 changed files with 1 additions and 3 deletions
--- a/make.py
+++ b/make.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
                    if ALIAS_PATH.exists():
                        continue
                    with open(ALIAS_PATH, "w") as f:
-                        f.write(model_name)
+                        f.write(model_version)
                else:  # bentoml currently only support latest alias, copy to other alias
                    shutil.copytree(
                        BENTOML_HOME / "bentos" / model_repo / model_version,
--- a/vllm-chat/service.py
+++ b/vllm-chat/service.py
@@ -59,14 +59,12 @@ class VLLM:
            Le(ENGINE_CONFIG["max_model_len"]),
        ] = ENGINE_CONFIG["max_model_len"],
        stop: list[str] = [],
-        **kwargs,
    ) -> AsyncGenerator[str, None]:
        from vllm import SamplingParams

        SAMPLING_PARAM = SamplingParams(
            max_tokens=max_tokens,
            stop=stop,
-            **kwargs,
        )
        stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)