diff --git a/openllm-python/src/_openllm_tiny/_llm.py b/openllm-python/src/_openllm_tiny/_llm.py
index 7df3a2e4..895d6e30 100644
--- a/openllm-python/src/_openllm_tiny/_llm.py
+++ b/openllm-python/src/_openllm_tiny/_llm.py
@@ -179,7 +179,7 @@ class LLM:
           k: config.__getitem__(k) for k in set(inspect.signature(SamplingParams).parameters.keys())
         }),
         request_id=request_id,
-        prompt_token_ids=prompt_token_ids,
+        prompt_token_ids=prompt_token_ids if prompt_token_ids else None,
       ):
         yield generations
     except Exception as err:
diff --git a/openllm-python/src/_openllm_tiny/_service.py b/openllm-python/src/_openllm_tiny/_service.py
index b4574c8b..2cb8a1f0 100644
--- a/openllm-python/src/_openllm_tiny/_service.py
+++ b/openllm-python/src/_openllm_tiny/_service.py
@@ -106,7 +106,10 @@ class LLMService:
     finish_reason_sent = [False] * _config['n']
 
     async for generations in self.llm.generate_iterator(
-      prompt=prompt, prompt_token_ids=prompt_token_ids, request_id=request_id, **llm_config
+      prompt=prompt,
+      prompt_token_ids=prompt_token_ids,
+      request_id=request_id,
+      **core.utils.dict_filter_none(llm_config),
     ):
       for output in generations.outputs:
         i = output.index