fix: make sure to add cpu to number

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
paperspace
2024-05-09 00:06:10 +00:00
parent d02f267fc7
commit 6726f6ae3e
2 changed files with 15 additions and 2 deletions

View File

@@ -3,7 +3,17 @@ import abc, inspect, logging, os, typing as t
import inflection, orjson, pydantic
from deepmerge.merger import Merger
from ._typing_compat import DictStrAny, ListStr, LiteralSerialisation, NotRequired, Required, Self, TypedDict, overload
from ._typing_compat import (
DictStrAny,
ListStr,
LiteralSerialisation,
NotRequired,
Required,
Self,
TypedDict,
overload,
Annotated,
)
from .exceptions import ForbiddenAttributeError, MissingDependencyError
from .utils import field_env_key, first_not_none, is_vllm_available, is_transformers_available
@@ -223,6 +233,9 @@ class GenerationConfig(pydantic.BaseModel):
None, description='Number of log probabilities to return per output token.'
)
detokenize: bool = pydantic.Field(True, description='Whether to detokenize the output.')
truncate_prompt_tokens: t.Optional[Annotated[int, pydantic.Field(ge=1)]] = pydantic.Field(
None, description='Truncate the prompt tokens.'
)
prompt_logprobs: t.Optional[int] = pydantic.Field(
None, description='Number of log probabilities to return per input token.'
)

View File

@@ -426,7 +426,7 @@ def build_command(
labels = {'library': 'vllm'}
service_config = dict(
resources={
'gpu' if device else 'cpu': len(device) if device else 'cpu_count',
'gpu' if device else 'cpu': len(device) if device else '1',
'gpu_type': recommended_instance_type(model_id, bentomodel),
},
traffic=dict(timeout=timeout),