tests: add additional basic testing (#982)

* chore: update rebase tests

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update partial clients before removing

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* fix: update clients parsing logics to work with 0.5

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: ignore ci runs as to run locally

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update async client tests

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update pre-commit

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2024-05-23 10:02:23 -04:00
committed by GitHub
parent 5cb5203eea
commit 97d76eec85
28 changed files with 498 additions and 978 deletions

View File

@@ -27,7 +27,7 @@ OpenLLM helps developers **run any open-source LLMs**, such as Llama 2 and Mistr
- 🚂 Support a wide range of open-source LLMs including LLMs fine-tuned with your own data
- ⛓️ OpenAI compatible API endpoints for seamless transition from your LLM app to open-source LLMs
- 🔥 State-of-the-art serving and inference performance
- 🎯 Simplified cloud deployment via [BentoML](www.bentoml.com)
- 🎯 Simplified cloud deployment via [BentoML](https://www.bentoml.com)
<!-- hatch-fancy-pypi-readme intro stop -->

View File

@@ -150,45 +150,6 @@ only-include = ["src/openllm", "src/openllm_cli", "src/_openllm_tiny"]
sources = ["src"]
[tool.hatch.build.targets.sdist]
exclude = ["/.git_archival.txt", "tests", "/.python-version-default"]
[tool.hatch.build.targets.wheel.hooks.mypyc]
dependencies = [
"hatch-mypyc==0.16.0",
"mypy==1.7.0",
# avoid https://github.com/pallets/click/issues/2558
"click==8.1.3",
"bentoml==1.1.9",
"transformers>=4.32.1",
"pandas-stubs",
"types-psutil",
"types-tabulate",
"types-PyYAML",
"types-protobuf",
]
enable-by-default = false
exclude = ["src/_openllm_tiny/_service.py", "src/openllm/utils/__init__.py"]
include = [
"src/openllm/__init__.py",
"src/openllm/_quantisation.py",
"src/openllm/_generation.py",
"src/openllm/exceptions.py",
"src/openllm/testing.py",
"src/openllm/utils",
]
# NOTE: This is consistent with pyproject.toml
mypy-args = [
"--strict",
# this is because all transient library doesn't have types
"--follow-imports=skip",
"--allow-subclassing-any",
"--check-untyped-defs",
"--ignore-missing-imports",
"--no-warn-return-any",
"--warn-unreachable",
"--no-warn-no-return",
"--no-warn-unused-ignores",
]
options = { verbose = true, strip_asserts = true, debug_level = "2", opt_level = "3", include_runtime_files = true }
require-runtime-dependencies = true
[tool.hatch.metadata.hooks.fancy-pypi-readme]
content-type = "text/markdown"
# PyPI doesn't support the <picture> tag.

View File

@@ -1,4 +1,4 @@
if __name__ == '__main__':
from openllm_cli.entrypoint import cli
from _openllm_tiny._entrypoint import cli
cli()

View File

@@ -70,7 +70,7 @@ def error_response(status_code, message):
)
async def check_model(request, model):
async def check_model(request, model): # noqa
if request.model == model:
return None
return error_response(

View File

@@ -1,9 +0,0 @@
from __future__ import annotations
import os
from hypothesis import HealthCheck, settings
settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
if 'CI' in os.environ:
settings.load_profile('CI')

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
import typing as t
from openllm_core._typing_compat import TypedDict
from datasets import load_dataset
if t.TYPE_CHECKING:
from transformers import PreTrainedTokenizerBase
FIXED_OUTPUT_LENGTH = 128
class DatasetEntry(TypedDict):
human: str
gpt: str
class SampledRequest(TypedDict):
prompt: str
prompt_length: int
output_length: int
def prepare_sharegpt_request(
num_requests: int, tokenizer: PreTrainedTokenizerBase, max_output_length: int | None = None
) -> list[SampledRequest]:
def transform(examples) -> DatasetEntry:
human, gpt = [], []
for example in examples['conversations']:
human.append(example[0]['value'])
gpt.append(example[1]['value'])
return {'human': human, 'gpt': gpt}
def process(examples, tokenizer, max_output_length: t.Optional[int]):
# Tokenize the 'human' and 'gpt' values in batches
prompt_token_ids = tokenizer(examples['human']).input_ids
completion_token_ids = tokenizer(examples['gpt']).input_ids
# Create the transformed entries
return {
'prompt': examples['human'],
'prompt_length': [len(ids) for ids in prompt_token_ids],
'output_length': [
len(ids) if max_output_length is None else FIXED_OUTPUT_LENGTH for ids in completion_token_ids
],
}
def filter_length(examples) -> list[bool]:
result = []
for prompt_length, output_length in zip(examples['prompt_length'], examples['output_length']):
if prompt_length < 4 or output_length < 4:
result.append(False)
elif prompt_length > 1024 or prompt_length + output_length > 2048:
result.append(False)
else:
result.append(True)
return result
return (
(
dataset := load_dataset(
'anon8231489123/ShareGPT_Vicuna_unfiltered',
data_files='ShareGPT_V3_unfiltered_cleaned_split.json',
split='train',
)
)
.filter(lambda example: len(example['conversations']) >= 2, num_proc=8)
.map(transform, remove_columns=dataset.column_names, batched=True)
.map(
process,
fn_kwargs={'tokenizer': tokenizer, 'max_output_length': max_output_length},
remove_columns=['human', 'gpt'],
batched=True,
)
.filter(filter_length, batched=True)
.shuffle(seed=42)
.to_list()[:num_requests]
)

View File

@@ -1,60 +0,0 @@
from __future__ import annotations
import logging
import typing as t
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
@st.composite
def model_settings(draw: st.DrawFn):
"""Strategy for generating ModelSettings objects."""
kwargs: dict[str, t.Any] = {
'default_id': st.text(min_size=1),
'model_ids': st.lists(st.text(), min_size=1),
'architecture': st.text(min_size=1),
'url': st.text(),
'trust_remote_code': st.booleans(),
'requirements': st.none() | st.lists(st.text(), min_size=1),
'model_type': st.sampled_from(['causal_lm', 'seq2seq_lm']),
'name_type': st.sampled_from(['dasherize', 'lowercase']),
'timeout': st.integers(min_value=3600),
'workers_per_resource': st.one_of(st.integers(min_value=1), st.floats(min_value=0.1, max_value=1.0)),
}
return draw(st.builds(ModelSettings, **kwargs))
def make_llm_config(
cls_name: str,
dunder_config: dict[str, t.Any] | ModelSettings,
fields: tuple[tuple[t.LiteralString, str, t.Any], ...] | None = None,
generation_fields: tuple[tuple[t.LiteralString, t.Any], ...] | None = None,
) -> type[openllm.LLMConfig]:
globs: dict[str, t.Any] = {'openllm': openllm}
_config_args: list[str] = []
lines: list[str] = [f'class {cls_name}Config(openllm.LLMConfig):']
for attr, value in dunder_config.items():
_config_args.append(f'"{attr}": __attr_{attr}')
globs[f'_{cls_name}Config__attr_{attr}'] = value
lines.append(f' __config__ = {{ {", ".join(_config_args)} }}')
if fields is not None:
for field, type_, default in fields:
lines.append(f' {field}: {type_} = openllm.LLMConfig.Field({default!r})')
if generation_fields is not None:
generation_lines = ['class GenerationConfig:']
for field, default in generation_fields:
generation_lines.append(f' {field} = {default!r}')
lines.extend((' ' + line for line in generation_lines))
script = '\n'.join(lines)
if openllm.utils.DEBUG:
logger.info('Generated class %s:\n%s', cls_name, script)
eval(compile(script, 'name', 'exec'), globs)
return globs[f'{cls_name}Config']

View File

@@ -1,152 +0,0 @@
from __future__ import annotations
import contextlib
import os
import typing as t
from unittest import mock
import attr
import pytest
from hypothesis import assume, given, strategies as st
import openllm
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
from ._strategies._configuration import make_llm_config, model_settings
def test_forbidden_access():
cl_ = make_llm_config(
'ForbiddenAccess',
{
'default_id': 'huggingface/t5-tiny-testing',
'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'],
'architecture': 'PreTrainedModel',
'requirements': ['bentoml'],
},
)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__')
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig')
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'SamplingParams')
assert openllm.utils.lenient_issubclass(cl_.__openllm_generation_class__, GenerationConfig)
@given(model_settings())
def test_class_normal_gen(gen_settings: ModelSettings):
assume(gen_settings['default_id'] and all(i for i in gen_settings['model_ids']))
cl_: type[openllm.LLMConfig] = make_llm_config('NotFullLLM', gen_settings)
assert issubclass(cl_, openllm.LLMConfig)
for key in gen_settings:
assert object.__getattribute__(cl_, f'__openllm_{key}__') == gen_settings.__getitem__(key)
@given(model_settings(), st.integers())
def test_simple_struct_dump(gen_settings: ModelSettings, field1: int):
cl_ = make_llm_config('IdempotentLLM', gen_settings, fields=(('field1', 'float', field1),))
assert cl_().model_dump()['field1'] == field1
@given(model_settings(), st.integers())
def test_config_derivation(gen_settings: ModelSettings, field1: int):
cl_ = make_llm_config('IdempotentLLM', gen_settings, fields=(('field1', 'float', field1),))
new_cls = cl_.model_derivate('DerivedLLM', default_id='asdfasdf')
assert new_cls.__openllm_default_id__ == 'asdfasdf'
@given(model_settings())
def test_config_derived_follow_attrs_protocol(gen_settings: ModelSettings):
cl_ = make_llm_config('AttrsProtocolLLM', gen_settings)
assert attr.has(cl_)
@given(
model_settings(),
st.integers(max_value=283473),
st.floats(min_value=0.0, max_value=1.0),
st.integers(max_value=283473),
st.floats(min_value=0.0, max_value=1.0),
)
def test_complex_struct_dump(
gen_settings: ModelSettings, field1: int, temperature: float, input_field1: int, input_temperature: float
):
cl_ = make_llm_config(
'ComplexLLM',
gen_settings,
fields=(('field1', 'float', field1),),
generation_fields=(('temperature', temperature),),
)
sent = cl_()
assert sent.model_dump()['field1'] == field1
assert sent.model_dump()['generation_config']['temperature'] == temperature
assert sent.model_dump(flatten=True)['field1'] == field1
assert sent.model_dump(flatten=True)['temperature'] == temperature
passed = cl_(field1=input_field1, temperature=input_temperature)
assert passed.model_dump()['field1'] == input_field1
assert passed.model_dump()['generation_config']['temperature'] == input_temperature
assert passed.model_dump(flatten=True)['field1'] == input_field1
assert passed.model_dump(flatten=True)['temperature'] == input_temperature
pas_nested = cl_(generation_config={'temperature': input_temperature}, field1=input_field1)
assert pas_nested.model_dump()['field1'] == input_field1
assert pas_nested.model_dump()['generation_config']['temperature'] == input_temperature
@contextlib.contextmanager
def patch_env(**attrs: t.Any):
with mock.patch.dict(os.environ, attrs, clear=True):
yield
def test_struct_envvar():
with patch_env(**{field_env_key('field1'): '4', field_env_key('temperature', suffix='generation'): '0.2'}):
class EnvLLM(openllm.LLMConfig):
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'}
field1: int = 2
class GenerationConfig:
temperature: float = 0.8
sent = EnvLLM.model_construct_env()
assert sent.field1 == 4
assert sent['temperature'] == 0.2
overwrite_default = EnvLLM()
assert overwrite_default.field1 == 4
assert overwrite_default['temperature'] == 0.2
def test_struct_provided_fields():
class EnvLLM(openllm.LLMConfig):
__config__ = {'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'}
field1: int = 2
class GenerationConfig:
temperature: float = 0.8
sent = EnvLLM.model_construct_env(field1=20, temperature=0.4)
assert sent.field1 == 20
assert sent.generation_config.temperature == 0.4
def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mk:
mk.setenv(field_env_key('field1'), str(4.0))
mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2))
sent = make_llm_config(
'OverwriteWithEnvAvailable',
{'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'},
fields=(('field1', 'float', 3.0),),
).model_construct_env(field1=20.0, temperature=0.4)
assert sent.generation_config.temperature == 0.4
assert sent.field1 == 20.0
@pytest.mark.parametrize('model_name', openllm.CONFIG_MAPPING.keys())
def test_configuration_dict_protocol(model_name: str):
config = openllm.AutoConfig.for_model(model_name)
assert isinstance(config.items(), list)
assert isinstance(config.keys(), list)
assert isinstance(config.values(), list)
assert isinstance(dict(config), dict)

View File

@@ -1,42 +1,16 @@
from __future__ import annotations
import itertools
import os
import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralBackend
_MODELING_MAPPING = {
'flan_t5': 'google/flan-t5-small',
'opt': 'facebook/opt-125m',
'baichuan': 'baichuan-inc/Baichuan-7B',
}
_PROMPT_MAPPING = {
'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?'
}
import pytest, typing as t
def parametrise_local_llm(model: str) -> t.Generator[tuple[str, openllm.LLM[t.Any, t.Any]], None, None]:
if model not in _MODELING_MAPPING:
pytest.skip(f"'{model}' is not yet supported in framework testing.")
backends: tuple[LiteralBackend, ...] = ('pt',)
for backend, prompt in itertools.product(backends, _PROMPT_MAPPING.keys()):
yield prompt, openllm.LLM(_MODELING_MAPPING[model], backend=backend)
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
if os.getenv('GITHUB_ACTIONS') is None:
if 'prompt' in metafunc.fixturenames and 'llm' in metafunc.fixturenames:
metafunc.parametrize(
'prompt,llm', [(p, llm) for p, llm in parametrise_local_llm(metafunc.function.__name__[5:-15])]
)
def pytest_sessionfinish(session: pytest.Session, exitstatus: int):
# If no tests are collected, pytest exists with code 5, which makes the CI fail.
if exitstatus == 5:
session.exitstatus = 0
@pytest.fixture(
scope='function',
name='model_id',
params={
'meta-llama/Meta-Llama-3-8B-Instruct',
'casperhansen/llama-3-70b-instruct-awq',
'TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ',
},
)
def fixture_model_id(request) -> t.Generator[str, None, None]:
yield request.param

View File

@@ -1,29 +0,0 @@
from __future__ import annotations
import os
import typing as t
import pytest
if t.TYPE_CHECKING:
import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm.generate(prompt)
assert llm.generate(prompt, temperature=0.8, top_p=0.23)
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_opt_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm.generate(prompt)
assert llm.generate(prompt, temperature=0.9, top_k=8)
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_baichuan_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
assert llm.generate(prompt)
assert llm.generate(prompt, temperature=0.95)

View File

@@ -1,60 +0,0 @@
from __future__ import annotations
import functools
import os
import typing as t
import pytest
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
if t.TYPE_CHECKING:
from pathlib import Path
HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'
actions_xfail = functools.partial(
pytest.mark.xfail,
condition=os.getenv('GITHUB_ACTIONS') is not None,
reason='Marking GitHub Actions to xfail due to flakiness and building environment not isolated.',
)
@actions_xfail
def test_general_build_with_internal_testing():
bento_store = BentoMLContainer.bento_store.get()
llm = openllm.LLM(model_id=HF_INTERNAL_T5_TESTING, serialisation='legacy')
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
assert llm.llm_type == bento.info.labels['_type']
assert llm.__llm_backend__ == bento.info.labels['_framework']
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
assert len(bento_store.list(bento.tag)) == 1
@actions_xfail
def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
local_path = tmp_path_factory.mktemp('local_t5')
llm = openllm.LLM(model_id=HF_INTERNAL_T5_TESTING, serialisation='legacy')
llm.model.save_pretrained(str(local_path))
llm._tokenizer.save_pretrained(str(local_path))
assert openllm.build('flan-t5', model_id=local_path.resolve().__fspath__(), model_version='local')
@pytest.fixture()
def dockerfile_template(tmp_path_factory: pytest.TempPathFactory):
file = tmp_path_factory.mktemp('dockerfiles') / 'Dockerfile.template'
file.write_text(
"{% extends bento_base_template %}\n{% block SETUP_BENTO_ENTRYPOINT %}\n{{ super() }}\nRUN echo 'sanity from custom dockerfile'\n{% endblock %}"
)
return file
@pytest.mark.usefixtures('dockerfile_template')
@actions_xfail
def test_build_with_custom_dockerfile(dockerfile_template: Path):
assert openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING, dockerfile_template=str(dockerfile_template))

View File

@@ -0,0 +1,56 @@
from __future__ import annotations
import pytest, subprocess, sys, openllm, bentoml, asyncio
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
SERVER_PORT = 53822
@pytest.mark.asyncio
async def test_openai_compatible(model_id: str):
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
assert client.is_ready(30)
try:
client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
serve_model = (await client.models.list()).data[0].id
assert serve_model == openllm.utils.normalise_model_name(model_id)
streamable = await client.chat.completions.create(
model=serve_model,
max_tokens=512,
stream=False,
messages=[
ChatCompletionSystemMessageParam(
role='system', content='You will be the writing assistant that assume the tone of Ernest Hemmingway.'
),
ChatCompletionUserMessageParam(
role='user', content='Comment on why Camus thinks we should revolt against life absurdity.'
),
],
)
assert streamable is not None
finally:
server.terminate()
@pytest.mark.asyncio
async def test_generate_endpoint(model_id: str):
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
assert client.is_ready(30)
try:
client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
assert await client.health()
response = await client.generate(
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
)
assert response is not None
finally:
server.terminate()

View File

@@ -1,185 +0,0 @@
from __future__ import annotations
import os
import typing as t
import pytest
import bentoml
from openllm import _strategies as strategy
from openllm._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
if t.TYPE_CHECKING:
from _pytest.monkeypatch import MonkeyPatch
def test_nvidia_gpu_resource_from_env(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '0,1')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 2
assert resource == ['0', '1']
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_cutoff_minus(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '0,2,-1,1')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 2
assert resource == ['0', '2']
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_neg_val(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', '-1')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 0
assert resource == []
mcls.delenv('CUDA_VISIBLE_DEVICES')
def test_nvidia_gpu_parse_literal(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', 'GPU-5ebe9f43-ac33420d4628')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 1
assert resource == ['GPU-5ebe9f43-ac33420d4628']
mcls.delenv('CUDA_VISIBLE_DEVICES')
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', 'GPU-5ebe9f43,GPU-ac33420d4628')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 2
assert resource == ['GPU-5ebe9f43', 'GPU-ac33420d4628']
mcls.delenv('CUDA_VISIBLE_DEVICES')
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', 'GPU-5ebe9f43,-1,GPU-ac33420d4628')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 1
assert resource == ['GPU-5ebe9f43']
mcls.delenv('CUDA_VISIBLE_DEVICES')
with monkeypatch.context() as mcls:
mcls.setenv('CUDA_VISIBLE_DEVICES', 'MIG-GPU-5ebe9f43-ac33420d4628')
resource = NvidiaGpuResource.from_system()
assert len(resource) == 1
assert resource == ['MIG-GPU-5ebe9f43-ac33420d4628']
mcls.delenv('CUDA_VISIBLE_DEVICES')
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='skip GPUs test on CI')
def test_nvidia_gpu_validate(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
# to make this tests works with system that has GPU
mcls.setenv('CUDA_VISIBLE_DEVICES', '')
assert len(NvidiaGpuResource.from_system()) >= 0 # TODO: real from_system tests
assert pytest.raises(ValueError, NvidiaGpuResource.validate, [*NvidiaGpuResource.from_system(), 1]).match(
'Input list should be all string type.'
)
assert pytest.raises(ValueError, NvidiaGpuResource.validate, [-2]).match('Input list should be all string type.')
assert pytest.raises(ValueError, NvidiaGpuResource.validate, ['GPU-5ebe9f43', 'GPU-ac33420d4628']).match(
'Failed to parse available GPUs UUID'
)
def test_nvidia_gpu_from_spec(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls:
# to make this tests works with system that has GPU
mcls.setenv('CUDA_VISIBLE_DEVICES', '')
assert NvidiaGpuResource.from_spec(1) == ['0']
assert NvidiaGpuResource.from_spec('5') == ['0', '1', '2', '3', '4']
assert NvidiaGpuResource.from_spec(1) == ['0']
assert NvidiaGpuResource.from_spec(2) == ['0', '1']
assert NvidiaGpuResource.from_spec('3') == ['0', '1', '2']
assert NvidiaGpuResource.from_spec([1, 3]) == ['1', '3']
assert NvidiaGpuResource.from_spec(['1', '3']) == ['1', '3']
assert NvidiaGpuResource.from_spec(-1) == []
assert NvidiaGpuResource.from_spec('-1') == []
assert NvidiaGpuResource.from_spec('') == []
assert NvidiaGpuResource.from_spec('-2') == []
assert NvidiaGpuResource.from_spec('GPU-288347ab') == ['GPU-288347ab']
assert NvidiaGpuResource.from_spec('GPU-288347ab,-1,GPU-ac33420d4628') == ['GPU-288347ab']
assert NvidiaGpuResource.from_spec('GPU-288347ab,GPU-ac33420d4628') == ['GPU-288347ab', 'GPU-ac33420d4628']
assert NvidiaGpuResource.from_spec('MIG-GPU-288347ab') == ['MIG-GPU-288347ab']
with pytest.raises(TypeError):
NvidiaGpuResource.from_spec((1, 2, 3))
with pytest.raises(TypeError):
NvidiaGpuResource.from_spec(1.5)
with pytest.raises(ValueError):
assert NvidiaGpuResource.from_spec(-2)
class GPURunnable(bentoml.Runnable):
SUPPORTED_RESOURCES = ('nvidia.com/gpu', 'amd.com/gpu')
def unvalidated_get_resource(x: dict[str, t.Any], y: str, validate: bool = False):
return get_resource(x, y, validate=validate)
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_worker_count(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: 2}, 1) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 7]}, 1) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 7]}, 0.5) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 7, 9]}, 0.5) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 7, 8, 9]}, 0.5) == 1
assert CascadingResourceStrategy.get_worker_count(GPURunnable, {gpu_type: [2, 5, 7, 8, 9]}, 0.4) == 1
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_worker_env(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 1, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '0'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 1, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '1'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7]}, 1, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '7'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 2, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '0'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 2, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '0'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 2, 2)
assert envs.get('CUDA_VISIBLE_DEVICES') == '1'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7]}, 2, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '2'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7]}, 2, 2)
assert envs.get('CUDA_VISIBLE_DEVICES') == '7'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7]}, 0.5, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '2,7'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7, 8, 9]}, 0.5, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '2,7'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7, 8, 9]}, 0.5, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '8,9'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 7, 8, 9]}, 0.25, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '2,7,8,9'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 6, 7, 8, 9]}, 0.4, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == '2,6'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 6, 7, 8, 9]}, 0.4, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '7,8'
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: [2, 6, 7, 8, 9]}, 0.4, 2)
assert envs.get('CUDA_VISIBLE_DEVICES') == '9'
@pytest.mark.parametrize('gpu_type', ['nvidia.com/gpu', 'amd.com/gpu'])
def test_cascade_strategy_disabled_via_env(monkeypatch: MonkeyPatch, gpu_type: str):
monkeypatch.setattr(strategy, 'get_resource', unvalidated_get_resource)
monkeypatch.setenv('CUDA_VISIBLE_DEVICES', '')
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 1, 0)
assert envs.get('CUDA_VISIBLE_DEVICES') == ''
monkeypatch.delenv('CUDA_VISIBLE_DEVICES')
monkeypatch.setenv('CUDA_VISIBLE_DEVICES', '-1')
envs = CascadingResourceStrategy.get_worker_env(GPURunnable, {gpu_type: 2}, 1, 1)
assert envs.get('CUDA_VISIBLE_DEVICES') == '-1'
monkeypatch.delenv('CUDA_VISIBLE_DEVICES')