diff --git a/openllm-python/src/openllm/_service.py b/openllm-python/src/openllm/_service.py
index 0263f21b..8ebe0e6a 100644
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -38,7 +38,7 @@ _JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config
 @svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
 async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
   echo = input_dict.pop('echo', False)
-  qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
+  qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
   config = qa_inputs.llm_config.model_dump()
   if runner.backend == 'vllm':
     async for output in runner.vllm_generate.async_stream(qa_inputs.prompt, adapter_name=qa_inputs.adapter_name, echo=echo, request_id=openllm_core.utils.gen_random_uuid(), **config):
@@ -51,7 +51,7 @@ async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
 @svc.api(route='/v1/generate_stream', input=_JsonInput, output=bentoml.io.Text(content_type='text/event-stream'))
 async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[str, None]:
   echo = input_dict.pop('echo', False)
-  qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
+  qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
   if runner.backend == 'vllm':
     return runner.vllm_generate_iterator.async_stream(qa_inputs.prompt,
                                                       adapter_name=qa_inputs.adapter_name,