mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-05 07:36:15 -05:00
refactor: use DEBUG env-var instead of OPENLLMDEVDEBUG (#647)
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -213,7 +213,7 @@ def build_container(
|
||||
tags['image_sha'] = outputs.decode('utf-8').strip()
|
||||
except Exception as err:
|
||||
raise openllm.exceptions.OpenLLMException(
|
||||
f'Failed to containerize base container images (Scroll up to see error above, or set OPENLLMDEVDEBUG=True for more traceback):\n{err}'
|
||||
f'Failed to containerize base container images (Scroll up to see error above, or set DEBUG=5 for more traceback):\n{err}'
|
||||
) from err
|
||||
return tags
|
||||
|
||||
|
||||
@@ -147,8 +147,8 @@ async def cohere_generate(req: Request, llm: openllm.LLM[M, T]) -> Response:
|
||||
return StreamingResponse(generate_stream_generator(), media_type='text/event-stream')
|
||||
# None-streaming case
|
||||
final_result: GenerationOutput | None = None
|
||||
texts: list[list[str]] = [[]] * config['num_generations']
|
||||
token_ids: list[list[int]] = [[]] * config['num_generations']
|
||||
texts: list[list[str]] = [[]] * config['n']
|
||||
token_ids: list[list[int]] = [[]] * config['n']
|
||||
async for res in result_generator:
|
||||
if await req.is_disconnected():
|
||||
return error_response(HTTPStatus.BAD_REQUEST, 'Client disconnected.')
|
||||
|
||||
@@ -8,7 +8,6 @@ import typing as t
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
# import openllm here for OPENLLMDEVDEBUG
|
||||
import openllm
|
||||
|
||||
# Make sure to have at least one GPU to run this script
|
||||
|
||||
@@ -8,7 +8,6 @@ import typing as t
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
# import openllm here for OPENLLMDEVDEBUG
|
||||
import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
|
||||
@@ -7,7 +7,6 @@ import typing as t
|
||||
|
||||
import transformers
|
||||
|
||||
# import openllm here for OPENLLMDEVDEBUG
|
||||
import openllm
|
||||
|
||||
# Make sure to have at least one GPU to run this script
|
||||
|
||||
Reference in New Issue
Block a user