chore: ignore peft and fix adapter loading issue (#255)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2026-02-02 11:52:19 -05:00 · 2023-08-25 04:36:35 -04:00
parent 3b40a83817
commit 08dc6ed2ba
5 changed files with 17 additions and 5 deletions
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -28,7 +28,7 @@ generic_embedding_runner = bentoml.Runner(
 runners: list[AbstractRunner] = [runner]
 if not runner.supports_embeddings: runners.append(generic_embedding_runner)
 svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=runners)
-_JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': ''})
+_JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': None})
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
 async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerationOutput:
  qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)