infra: using ruff formatter (#594)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-09 12:44:05 -05:00
committed by GitHub
parent 021fd453b9
commit ac377fe490
102 changed files with 5577 additions and 2540 deletions

View File

@@ -4,6 +4,8 @@ import os
from hypothesis import HealthCheck
from hypothesis import settings
settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
if 'CI' in os.environ: settings.load_profile('CI')
if 'CI' in os.environ:
settings.load_profile('CI')

View File

@@ -8,30 +8,34 @@ import openllm
from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
@st.composite
def model_settings(draw: st.DrawFn):
"""Strategy for generating ModelSettings objects."""
kwargs: dict[str, t.Any] = {
'default_id': st.text(min_size=1),
'model_ids': st.lists(st.text(), min_size=1),
'architecture': st.text(min_size=1),
'url': st.text(),
'trust_remote_code': st.booleans(),
'requirements': st.none() | st.lists(st.text(), min_size=1),
'model_type': st.sampled_from(['causal_lm', 'seq2seq_lm']),
'name_type': st.sampled_from(['dasherize', 'lowercase']),
'timeout': st.integers(min_value=3600),
'workers_per_resource': st.one_of(st.integers(min_value=1), st.floats(min_value=0.1, max_value=1.0)),
'default_id': st.text(min_size=1),
'model_ids': st.lists(st.text(), min_size=1),
'architecture': st.text(min_size=1),
'url': st.text(),
'trust_remote_code': st.booleans(),
'requirements': st.none() | st.lists(st.text(), min_size=1),
'model_type': st.sampled_from(['causal_lm', 'seq2seq_lm']),
'name_type': st.sampled_from(['dasherize', 'lowercase']),
'timeout': st.integers(min_value=3600),
'workers_per_resource': st.one_of(st.integers(min_value=1), st.floats(min_value=0.1, max_value=1.0)),
}
return draw(st.builds(ModelSettings, **kwargs))
def make_llm_config(cls_name: str,
dunder_config: dict[str, t.Any] | ModelSettings,
fields: tuple[tuple[t.LiteralString, str, t.Any], ...] | None = None,
generation_fields: tuple[tuple[t.LiteralString, t.Any], ...] | None = None,
) -> type[openllm.LLMConfig]:
def make_llm_config(
cls_name: str,
dunder_config: dict[str, t.Any] | ModelSettings,
fields: tuple[tuple[t.LiteralString, str, t.Any], ...] | None = None,
generation_fields: tuple[tuple[t.LiteralString, t.Any], ...] | None = None,
) -> type[openllm.LLMConfig]:
globs: dict[str, t.Any] = {'openllm': openllm}
_config_args: list[str] = []
lines: list[str] = [f'class {cls_name}Config(openllm.LLMConfig):']

View File

@@ -23,30 +23,44 @@ from openllm_core._configuration import field_env_key
from ._strategies._configuration import make_llm_config
from ._strategies._configuration import model_settings
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
@pytest.mark.skipif(
sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this'
)
def test_missing_default():
with pytest.raises(ValueError, match='Missing required fields *'):
make_llm_config('MissingDefaultId', {'name_type': 'lowercase', 'requirements': ['bentoml']})
with pytest.raises(ValueError, match='Missing required fields *'):
make_llm_config('MissingModelId', {'default_id': 'huggingface/t5-tiny-testing', 'requirements': ['bentoml']})
with pytest.raises(ValueError, match='Missing required fields *'):
make_llm_config('MissingArchitecture', {'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing'], 'requirements': ['bentoml'],},)
make_llm_config(
'MissingArchitecture',
{
'default_id': 'huggingface/t5-tiny-testing',
'model_ids': ['huggingface/t5-tiny-testing'],
'requirements': ['bentoml'],
},
)
def test_forbidden_access():
cl_ = make_llm_config('ForbiddenAccess', {
cl_ = make_llm_config(
'ForbiddenAccess',
{
'default_id': 'huggingface/t5-tiny-testing',
'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'],
'architecture': 'PreTrainedModel',
'requirements': ['bentoml'],
},
)
},
)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__',)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig',)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'SamplingParams',)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__')
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig')
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'SamplingParams')
assert openllm.utils.lenient_issubclass(cl_.__openllm_generation_class__, GenerationConfig)
@given(model_settings())
def test_class_normal_gen(gen_settings: ModelSettings):
assume(gen_settings['default_id'] and all(i for i in gen_settings['model_ids']))
@@ -55,25 +69,42 @@ def test_class_normal_gen(gen_settings: ModelSettings):
for key in gen_settings:
assert object.__getattribute__(cl_, f'__openllm_{key}__') == gen_settings.__getitem__(key)
@given(model_settings(), st.integers())
def test_simple_struct_dump(gen_settings: ModelSettings, field1: int):
cl_ = make_llm_config('IdempotentLLM', gen_settings, fields=(('field1', 'float', field1),))
assert cl_().model_dump()['field1'] == field1
@given(model_settings(), st.integers())
def test_config_derivation(gen_settings: ModelSettings, field1: int):
cl_ = make_llm_config('IdempotentLLM', gen_settings, fields=(('field1', 'float', field1),))
new_cls = cl_.model_derivate('DerivedLLM', default_id='asdfasdf')
assert new_cls.__openllm_default_id__ == 'asdfasdf'
@given(model_settings())
def test_config_derived_follow_attrs_protocol(gen_settings: ModelSettings):
cl_ = make_llm_config('AttrsProtocolLLM', gen_settings)
assert attr.has(cl_)
@given(model_settings(), st.integers(max_value=283473), st.floats(min_value=0.0, max_value=1.0), st.integers(max_value=283473), st.floats(min_value=0.0, max_value=1.0),)
def test_complex_struct_dump(gen_settings: ModelSettings, field1: int, temperature: float, input_field1: int, input_temperature: float):
cl_ = make_llm_config('ComplexLLM', gen_settings, fields=(('field1', 'float', field1),), generation_fields=(('temperature', temperature),),)
@given(
model_settings(),
st.integers(max_value=283473),
st.floats(min_value=0.0, max_value=1.0),
st.integers(max_value=283473),
st.floats(min_value=0.0, max_value=1.0),
)
def test_complex_struct_dump(
gen_settings: ModelSettings, field1: int, temperature: float, input_field1: int, input_temperature: float
):
cl_ = make_llm_config(
'ComplexLLM',
gen_settings,
fields=(('field1', 'float', field1),),
generation_fields=(('temperature', temperature),),
)
sent = cl_()
assert sent.model_dump()['field1'] == field1
assert sent.model_dump()['generation_config']['temperature'] == temperature
@@ -90,16 +121,18 @@ def test_complex_struct_dump(gen_settings: ModelSettings, field1: int, temperatu
assert pas_nested.model_dump()['field1'] == input_field1
assert pas_nested.model_dump()['generation_config']['temperature'] == input_temperature
@contextlib.contextmanager
def patch_env(**attrs: t.Any):
with mock.patch.dict(os.environ, attrs, clear=True):
yield
def test_struct_envvar():
with patch_env(**{field_env_key('field1'): '4', field_env_key('temperature', suffix='generation'): '0.2',}):
with patch_env(**{field_env_key('field1'): '4', field_env_key('temperature', suffix='generation'): '0.2'}):
class EnvLLM(openllm.LLMConfig):
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel',}
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'}
field1: int = 2
class GenerationConfig:
@@ -113,9 +146,10 @@ def test_struct_envvar():
assert overwrite_default.field1 == 4
assert overwrite_default['temperature'] == 0.2
def test_struct_provided_fields():
class EnvLLM(openllm.LLMConfig):
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel',}
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'}
field1: int = 2
class GenerationConfig:
@@ -125,26 +159,27 @@ def test_struct_provided_fields():
assert sent.field1 == 20
assert sent.generation_config.temperature == 0.4
def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mk:
mk.setenv(field_env_key('field1'), str(4.0))
mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2))
sent = make_llm_config('OverwriteWithEnvAvailable', {
'default_id': 'asdfasdf',
'model_ids': ['asdf', 'asdfasdfads'],
'architecture': 'PreTrainedModel'
},
fields=(('field1', 'float', 3.0),),
).model_construct_env(field1=20.0, temperature=0.4)
sent = make_llm_config(
'OverwriteWithEnvAvailable',
{'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'},
fields=(('field1', 'float', 3.0),),
).model_construct_env(field1=20.0, temperature=0.4)
assert sent.generation_config.temperature == 0.4
assert sent.field1 == 20.0
@given(model_settings())
@pytest.mark.parametrize(('return_dict', 'typ'), [(True, dict), (False, transformers.GenerationConfig)])
def test_conversion_to_transformers(return_dict: bool, typ: type[t.Any], gen_settings: ModelSettings):
cl_ = make_llm_config('ConversionLLM', gen_settings)
assert isinstance(cl_().to_generation_config(return_as_dict=return_dict), typ)
@given(model_settings())
def test_click_conversion(gen_settings: ModelSettings):
# currently our conversion omit Union type.
@@ -157,6 +192,7 @@ def test_click_conversion(gen_settings: ModelSettings):
click_options_filtered = [i for i in wrapped.__click_params__ if i.name and not i.name.startswith('fake_')]
assert len(filtered) == len(click_options_filtered)
@pytest.mark.parametrize('model_name', openllm.CONFIG_MAPPING.keys())
def test_configuration_dict_protocol(model_name: str):
config = openllm.AutoConfig.for_model(model_name)

View File

@@ -7,23 +7,44 @@ import pytest
import openllm
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralBackend
_MODELING_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B'}
_PROMPT_MAPPING = {'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?'}
_MODELING_MAPPING = {
'flan_t5': 'google/flan-t5-small',
'opt': 'facebook/opt-125m',
'baichuan': 'baichuan-inc/Baichuan-7B',
}
_PROMPT_MAPPING = {
'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?'
}
def parametrise_local_llm(model: str) -> t.Generator[tuple[str, openllm.LLMRunner[t.Any, t.Any] | openllm.LLM[t.Any, t.Any]], None, None]:
if model not in _MODELING_MAPPING: pytest.skip(f"'{model}' is not yet supported in framework testing.")
def parametrise_local_llm(
model: str
) -> t.Generator[tuple[str, openllm.LLMRunner[t.Any, t.Any] | openllm.LLM[t.Any, t.Any]], None, None]:
if model not in _MODELING_MAPPING:
pytest.skip(f"'{model}' is not yet supported in framework testing.")
backends: tuple[LiteralBackend, ...] = ('pt',)
for backend, prompt in itertools.product(backends, _PROMPT_MAPPING.keys()):
yield prompt, openllm.Runner(model, model_id=_MODELING_MAPPING[model], ensure_available=True, backend=backend, init_local=True)
yield (
prompt,
openllm.Runner(
model, model_id=_MODELING_MAPPING[model], ensure_available=True, backend=backend, init_local=True
),
)
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
if os.getenv('GITHUB_ACTIONS') is None:
if 'prompt' in metafunc.fixturenames and 'llm' in metafunc.fixturenames:
metafunc.parametrize('prompt,llm', [(p, llm) for p, llm in parametrise_local_llm(metafunc.function.__name__[5:-15])])
metafunc.parametrize(
'prompt,llm', [(p, llm) for p, llm in parametrise_local_llm(metafunc.function.__name__[5:-15])]
)
def pytest_sessionfinish(session: pytest.Session, exitstatus: int):
# If no tests are collected, pytest exists with code 5, which makes the CI fail.
if exitstatus == 5: session.exitstatus = 0
if exitstatus == 5:
session.exitstatus = 0

View File

@@ -27,6 +27,7 @@ from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListAny
from openllm_core._typing_compat import LiteralQuantise
logger = logging.getLogger(__name__)
if t.TYPE_CHECKING:
@@ -40,8 +41,11 @@ if t.TYPE_CHECKING:
from openllm.client import BaseAsyncClient
class ResponseComparator(JSONSnapshotExtension):
def serialize(self, data: SerializableData, *, exclude: PropertyFilter | None = None, matcher: PropertyMatcher | None = None,) -> SerializedData:
def serialize(
self, data: SerializableData, *, exclude: PropertyFilter | None = None, matcher: PropertyMatcher | None = None
) -> SerializedData:
if LazyType(ListAny).isinstance(data):
data = [d.unmarshaled for d in data]
else:
@@ -73,12 +77,16 @@ class ResponseComparator(JSONSnapshotExtension):
def eq_output(s: openllm.GenerationOutput, t: openllm.GenerationOutput) -> bool:
return len(s.outputs) == len(t.outputs)
return len(serialized_data) == len(snapshot_data) and all([eq_output(s, t) for s, t in zip(serialized_data, snapshot_data)])
return len(serialized_data) == len(snapshot_data) and all(
[eq_output(s, t) for s, t in zip(serialized_data, snapshot_data)]
)
@pytest.fixture()
def response_snapshot(snapshot: SnapshotAssertion):
return snapshot.use_extension(ResponseComparator)
@attr.define(init=False)
class _Handle(ABC):
port: int
@@ -88,8 +96,7 @@ class _Handle(ABC):
if t.TYPE_CHECKING:
def __attrs_init__(self, *args: t.Any, **attrs: t.Any):
...
def __attrs_init__(self, *args: t.Any, **attrs: t.Any): ...
def __attrs_post_init__(self):
self.client = openllm.client.AsyncHTTPClient(f'http://localhost:{self.port}')
@@ -111,42 +118,65 @@ class _Handle(ABC):
time.sleep(1)
raise RuntimeError(f'Handle failed to initialise within {timeout} seconds.')
@attr.define(init=False)
class LocalHandle(_Handle):
process: subprocess.Popen[bytes]
def __init__(self, process: subprocess.Popen[bytes], port: int, deployment_mode: t.Literal['container', 'local'],):
def __init__(self, process: subprocess.Popen[bytes], port: int, deployment_mode: t.Literal['container', 'local']):
self.__attrs_init__(port, deployment_mode, process)
def status(self) -> bool:
return self.process.poll() is None
class HandleProtocol(t.Protocol):
@contextlib.contextmanager
def __call__(*, model: str, model_id: str, image_tag: str, quantize: t.AnyStr | None = None,) -> t.Generator[_Handle, None, None]:
...
def __call__(
*, model: str, model_id: str, image_tag: str, quantize: t.AnyStr | None = None
) -> t.Generator[_Handle, None, None]: ...
@attr.define(init=False)
class DockerHandle(_Handle):
container_name: str
docker_client: docker.DockerClient
def __init__(self, docker_client: docker.DockerClient, container_name: str, port: int, deployment_mode: t.Literal['container', 'local'],):
def __init__(
self,
docker_client: docker.DockerClient,
container_name: str,
port: int,
deployment_mode: t.Literal['container', 'local'],
):
self.__attrs_init__(port, deployment_mode, container_name, docker_client)
def status(self) -> bool:
container = self.docker_client.containers.get(self.container_name)
return container.status in ['running', 'created']
@contextlib.contextmanager
def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
def _local_handle(
model: str,
model_id: str,
image_tag: str,
deployment_mode: t.Literal['container', 'local'],
quantize: LiteralQuantise | None = None,
*,
_serve_grpc: bool = False,
):
with openllm.utils.reserve_free_port() as port:
pass
if not _serve_grpc:
proc = openllm.start(model, model_id=model_id, quantize=quantize, additional_args=['--port', str(port)], __test__=True)
proc = openllm.start(
model, model_id=model_id, quantize=quantize, additional_args=['--port', str(port)], __test__=True
)
else:
proc = openllm.start_grpc(model, model_id=model_id, quantize=quantize, additional_args=['--port', str(port)], __test__=True)
proc = openllm.start_grpc(
model, model_id=model_id, quantize=quantize, additional_args=['--port', str(port)], __test__=True
)
yield LocalHandle(proc, port, deployment_mode)
proc.terminate()
@@ -159,8 +189,17 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.
if proc.stderr:
proc.stderr.close()
@contextlib.contextmanager
def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
def _container_handle(
model: str,
model_id: str,
image_tag: str,
deployment_mode: t.Literal['container', 'local'],
quantize: LiteralQuantise | None = None,
*,
_serve_grpc: bool = False,
):
with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
pass
container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
@@ -177,22 +216,22 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
env: DictStrAny = {}
if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize
if quantize is not None:
env['OPENLLM_QUANTIZE'] = quantize
gpus = openllm.utils.device_count() or -1
devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
container = client.containers.run(image_tag,
command=args,
name=container_name,
environment=env,
auto_remove=False,
detach=True,
device_requests=devs,
ports={
'3000/tcp': port,
'3001/tcp': prom_port
})
container = client.containers.run(
image_tag,
command=args,
name=container_name,
environment=env,
auto_remove=False,
detach=True,
device_requests=devs,
ports={'3000/tcp': port, '3001/tcp': prom_port},
)
yield DockerHandle(client, container.name, port, deployment_mode)
@@ -207,22 +246,26 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
container.remove()
@pytest.fixture(scope='session', autouse=True)
def clean_context() -> t.Generator[contextlib.ExitStack, None, None]:
stack = contextlib.ExitStack()
yield stack
stack.close()
@pytest.fixture(scope='module')
def el() -> t.Generator[asyncio.AbstractEventLoop, None, None]:
loop = asyncio.get_event_loop()
yield loop
loop.close()
@pytest.fixture(params=['container', 'local'], scope='session')
def deployment_mode(request: pytest.FixtureRequest) -> str:
return request.param
@pytest.fixture(scope='module')
def handler(el: asyncio.AbstractEventLoop, deployment_mode: t.Literal['container', 'local']):
if deployment_mode == 'container':

View File

@@ -5,6 +5,7 @@ import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
@@ -15,17 +16,24 @@ if t.TYPE_CHECKING:
model = 'flan_t5'
model_id = 'google/flan-t5-small'
@pytest.fixture(scope='module')
def flan_t5_handle(handler: HandleProtocol, deployment_mode: t.Literal['container', 'local'], clean_context: contextlib.ExitStack,):
with openllm.testing.prepare(model, model_id=model_id, deployment_mode=deployment_mode, clean_context=clean_context) as image_tag:
def flan_t5_handle(
handler: HandleProtocol, deployment_mode: t.Literal['container', 'local'], clean_context: contextlib.ExitStack
):
with openllm.testing.prepare(
model, model_id=model_id, deployment_mode=deployment_mode, clean_context=clean_context
) as image_tag:
with handler(model=model, model_id=model_id, image_tag=image_tag) as handle:
yield handle
@pytest.fixture(scope='module')
async def flan_t5(flan_t5_handle: _Handle):
await flan_t5_handle.health(240)
return flan_t5_handle.client
@pytest.mark.asyncio()
async def test_flan_t5(flan_t5: t.Awaitable[openllm.client.AsyncHTTPClient], response_snapshot: ResponseComparator):
client = await flan_t5

View File

@@ -5,6 +5,7 @@ import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
@@ -15,17 +16,24 @@ if t.TYPE_CHECKING:
model = 'opt'
model_id = 'facebook/opt-125m'
@pytest.fixture(scope='module')
def opt_125m_handle(handler: HandleProtocol, deployment_mode: t.Literal['container', 'local'], clean_context: contextlib.ExitStack,):
with openllm.testing.prepare(model, model_id=model_id, deployment_mode=deployment_mode, clean_context=clean_context) as image_tag:
def opt_125m_handle(
handler: HandleProtocol, deployment_mode: t.Literal['container', 'local'], clean_context: contextlib.ExitStack
):
with openllm.testing.prepare(
model, model_id=model_id, deployment_mode=deployment_mode, clean_context=clean_context
) as image_tag:
with handler(model=model, model_id=model_id, image_tag=image_tag) as handle:
yield handle
@pytest.fixture(scope='module')
async def opt_125m(opt_125m_handle: _Handle):
await opt_125m_handle.health(240)
return opt_125m_handle.client
@pytest.mark.asyncio()
async def test_opt_125m(opt_125m: t.Awaitable[openllm.client.AsyncHTTPClient], response_snapshot: ResponseComparator):
client = await opt_125m

View File

@@ -4,21 +4,25 @@ import typing as t
import pytest
if t.TYPE_CHECKING:
import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm(prompt)
assert llm(prompt, temperature=0.8, top_p=0.23)
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_opt_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm(prompt)
assert llm(prompt, temperature=0.9, top_k=8)
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_baichuan_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm(prompt)

View File

@@ -9,15 +9,18 @@ import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
if t.TYPE_CHECKING:
from pathlib import Path
HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'
actions_xfail = functools.partial(pytest.mark.xfail,
condition=os.getenv('GITHUB_ACTIONS') is not None,
reason='Marking GitHub Actions to xfail due to flakiness and building environment not isolated.',
)
actions_xfail = functools.partial(
pytest.mark.xfail,
condition=os.getenv('GITHUB_ACTIONS') is not None,
reason='Marking GitHub Actions to xfail due to flakiness and building environment not isolated.',
)
@actions_xfail
def test_general_build_with_internal_testing():
@@ -32,6 +35,7 @@ def test_general_build_with_internal_testing():
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
assert len(bento_store.list(bento.tag)) == 1
@actions_xfail
def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
local_path = tmp_path_factory.mktemp('local_t5')
@@ -42,12 +46,16 @@ def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
assert openllm.build('flan-t5', model_id=local_path.resolve().__fspath__(), model_version='local')
@pytest.fixture()
def dockerfile_template(tmp_path_factory: pytest.TempPathFactory):
file = tmp_path_factory.mktemp('dockerfiles') / 'Dockerfile.template'
file.write_text("{% extends bento_base_template %}\n{% block SETUP_BENTO_ENTRYPOINT %}\n{{ super() }}\nRUN echo 'sanity from custom dockerfile'\n{% endblock %}")
file.write_text(
"{% extends bento_base_template %}\n{% block SETUP_BENTO_ENTRYPOINT %}\n{{ super() }}\nRUN echo 'sanity from custom dockerfile'\n{% endblock %}"
)
return file
@pytest.mark.usefixtures('dockerfile_template')
@actions_xfail
def test_build_with_custom_dockerfile(dockerfile_template: Path):

View File

@@ -11,9 +11,11 @@ from openllm._strategies import CascadingResourceStrategy
from openllm._strategies import NvidiaGpuResource
from openllm._strategies import get_resource
if t.TYPE_CHECKING:
from _pytest.monkeypatch import MonkeyPatch
def test_nvidia_gpu_resource_from_env(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '0,1')
@@ -22,6 +24,7 @@ def test_nvidia_gpu_resource_from_env(monkeypatch: pytest.MonkeyPatch):
assert resource == ['0', '1']
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_cutoff_minus(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '0,2,-1,1')
@@ -30,6 +33,7 @@ def test_nvidia_gpu_cutoff_minus(monkeypatch: pytest.MonkeyPatch):
assert resource == ['0', '2']
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_neg_val(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '-1')
@@ -38,6 +42,7 @@ def test_nvidia_gpu_neg_val(monkeypatch: pytest.MonkeyPatch):
assert resource == []
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_parse_literal(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', 'GPU-5ebe9f43-ac33420d4628')
@@ -64,6 +69,7 @@ def test_nvidia_gpu_parse_literal(monkeypatch: pytest.MonkeyPatch):
assert resource == ['MIG-GPU-5ebe9f43-ac33420d4628']
mcls.delenv('CUDA_VISIBLE_DEVICES')
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='skip GPUs test on CI')
def test_nvidia_gpu_validate(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
@@ -71,9 +77,14 @@ def test_nvidia_gpu_validate(monkeypatch: pytest.MonkeyPatch):
mcls.setenv('CUDA_VISIBLE_DEVICES', '')
assert len(NvidiaGpuResource.from_system()) >= 0 # TODO: real from_system tests
assert pytest.raises(ValueError, NvidiaGpuResource.validate, [*NvidiaGpuResource.from_system(), 1],).match('Input list should be all string type.')
assert pytest.raises(ValueError, NvidiaGpuResource.validate, [*NvidiaGpuResource.from_system(), 1]).match(
'Input list should be all string type.'
)
assert pytest.raises(ValueError, NvidiaGpuResource.validate, [-2]).match('Input list should be all string type.')
assert pytest.raises(ValueError, NvidiaGpuResource.validate, ['GPU-5ebe9f43', 'GPU-ac33420d4628']).match('Failed to parse available GPUs UUID')
assert pytest.raises(ValueError, NvidiaGpuResource.validate, ['GPU-5ebe9f43', 'GPU-ac33420d4628']).match(
'Failed to parse available GPUs UUID'
)
def test_nvidia_gpu_from_spec(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
@@ -102,12 +113,15 @@ def test_nvidia_gpu_from_spec(monkeypatch: pytest.MonkeyPatch):
with pytest.raises(ValueError):
assert NvidiaGpuResource.from_spec(-2)
class GPURunnable(bentoml.Runnable):
SUPPORTED_RESOURCES = ('nvidia.com/gpu', 'amd.com/gpu')
def unvalidated_get_resource(x: dict[str, t.Any], y: str, validate: bool = False):
return get_resource(x, y, validate=validate)
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_worker_count(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)
@@ -119,6 +133,7 @@ def test_cascade_strategy_worker_count(monkeypatch: MonkeyPatch, gpu_type: str):
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 7, 8, 9]}, 0.5) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 5, 7, 8, 9]}, 0.4) == 1
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_worker_env(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)
@@ -158,6 +173,7 @@ def test_cascade_strategy_worker_env(monkeypatch: MonkeyPatch, gpu_type: str):
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 6, 7, 8, 9]}, 0.4, 2)
assert envs.get('CUDA_VISIBLE_DEVICES') == '9'
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_disabled_via_env(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)