diff --git a/openllm-python/src/_openllm_tiny/_llm.py b/openllm-python/src/_openllm_tiny/_llm.py index 7df3a2e4..895d6e30 100644 --- a/openllm-python/src/_openllm_tiny/_llm.py +++ b/openllm-python/src/_openllm_tiny/_llm.py @@ -179,7 +179,7 @@ class LLM: k: config.__getitem__(k) for k in set(inspect.signature(SamplingParams).parameters.keys()) }), request_id=request_id, - prompt_token_ids=prompt_token_ids, + prompt_token_ids=prompt_token_ids if prompt_token_ids else None, ): yield generations except Exception as err: diff --git a/openllm-python/src/_openllm_tiny/_service.py b/openllm-python/src/_openllm_tiny/_service.py index b4574c8b..2cb8a1f0 100644 --- a/openllm-python/src/_openllm_tiny/_service.py +++ b/openllm-python/src/_openllm_tiny/_service.py @@ -106,7 +106,10 @@ class LLMService: finish_reason_sent = [False] * _config['n'] async for generations in self.llm.generate_iterator( - prompt=prompt, prompt_token_ids=prompt_token_ids, request_id=request_id, **llm_config + prompt=prompt, + prompt_token_ids=prompt_token_ids, + request_id=request_id, + **core.utils.dict_filter_none(llm_config), ): for output in generations.outputs: i = output.index