mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-25 09:32:37 -04:00
fix: make sure to add cpu to number
Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,17 @@ import abc, inspect, logging, os, typing as t
|
||||
import inflection, orjson, pydantic
|
||||
from deepmerge.merger import Merger
|
||||
|
||||
from ._typing_compat import DictStrAny, ListStr, LiteralSerialisation, NotRequired, Required, Self, TypedDict, overload
|
||||
from ._typing_compat import (
|
||||
DictStrAny,
|
||||
ListStr,
|
||||
LiteralSerialisation,
|
||||
NotRequired,
|
||||
Required,
|
||||
Self,
|
||||
TypedDict,
|
||||
overload,
|
||||
Annotated,
|
||||
)
|
||||
from .exceptions import ForbiddenAttributeError, MissingDependencyError
|
||||
from .utils import field_env_key, first_not_none, is_vllm_available, is_transformers_available
|
||||
|
||||
@@ -223,6 +233,9 @@ class GenerationConfig(pydantic.BaseModel):
|
||||
None, description='Number of log probabilities to return per output token.'
|
||||
)
|
||||
detokenize: bool = pydantic.Field(True, description='Whether to detokenize the output.')
|
||||
truncate_prompt_tokens: t.Optional[Annotated[int, pydantic.Field(ge=1)]] = pydantic.Field(
|
||||
None, description='Truncate the prompt tokens.'
|
||||
)
|
||||
prompt_logprobs: t.Optional[int] = pydantic.Field(
|
||||
None, description='Number of log probabilities to return per input token.'
|
||||
)
|
||||
|
||||
@@ -426,7 +426,7 @@ def build_command(
|
||||
labels = {'library': 'vllm'}
|
||||
service_config = dict(
|
||||
resources={
|
||||
'gpu' if device else 'cpu': len(device) if device else 'cpu_count',
|
||||
'gpu' if device else 'cpu': len(device) if device else '1',
|
||||
'gpu_type': recommended_instance_type(model_id, bentomodel),
|
||||
},
|
||||
traffic=dict(timeout=timeout),
|
||||
|
||||
Reference in New Issue
Block a user