diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index d7de1f72..00000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Run Tests - -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9", "3.12"] - - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install uv - run: | - pip install uv - - - name: Install dependencies with uv - run: | - uv pip install -e . - uv pip install pytest pexpect - - - name: Run tests - run: | - pytest tests -v diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py index 4b8a57fb..7e3c2c16 100644 --- a/src/openllm/__main__.py +++ b/src/openllm/__main__.py @@ -220,13 +220,23 @@ def serve( repo: typing.Optional[str] = None, port: int = 3000, verbose: bool = False, + env: typing.Optional[list[str]] = typer.Option( + None, + '--env', + help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.', + ), + arg: typing.Optional[list[str]] = typer.Option( + None, + '--arg', + help='Bento arguments in the form of key=value pairs. Can be specified multiple times.', + ), ) -> None: cmd_update() if verbose: VERBOSE_LEVEL.set(20) target = get_local_machine_spec() bento = ensure_bento(model, target=target, repo_name=repo) - local_serve(bento, port=port) + local_serve(bento, port=port, cli_envs=env, cli_args=arg) @app.command(help='run the model and chat in terminal') @@ -236,6 +246,16 @@ def run( port: typing.Optional[int] = None, timeout: int = 600, verbose: bool = False, + env: typing.Optional[list[str]] = typer.Option( + None, + '--env', + help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.', + ), + arg: typing.Optional[list[str]] = typer.Option( + None, + '--arg', + help='Bento arguments in the form of key=value pairs. Can be specified multiple times.', + ), ) -> None: cmd_update() if verbose: @@ -244,7 +264,7 @@ def run( bento = ensure_bento(model, target=target, repo_name=repo) if port is None: port = random.randint(30000, 40000) - local_run(bento, port=port, timeout=timeout) + local_run(bento, port=port, timeout=timeout, cli_envs=env, cli_args=arg) @app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud') diff --git a/src/openllm/local.py b/src/openllm/local.py index a5c72c60..9c63418b 100644 --- a/src/openllm/local.py +++ b/src/openllm/local.py @@ -1,6 +1,6 @@ from __future__ import annotations -import asyncio, time, typing +import asyncio, time, typing, os import httpx, openai from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam @@ -19,8 +19,6 @@ if typing.TYPE_CHECKING: def prep_env_vars(bento: BentoInfo) -> None: - import os - env_vars = bento.envs for env_var in env_vars: if not env_var.get('value'): @@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None: os.environ[key] = value -def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]: +def _get_serve_cmd( + bento: BentoInfo, port: int = 3000, cli_args: typing.Optional[list[str]] = None +) -> tuple[list[str], EnvVars]: cmd = ['bentoml', 'serve', bento.bentoml_tag] if port != 3000: cmd += ['--port', str(port)] + + # Add CLI arguments if provided + if cli_args: + for arg in cli_args: + cmd += ['--arg', arg] + return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) -def serve(bento: BentoInfo, port: int = 3000) -> None: +def serve( + bento: BentoInfo, + port: int = 3000, + cli_envs: typing.Optional[list[str]] = None, + cli_args: typing.Optional[list[str]] = None, +) -> None: prep_env_vars(bento) - cmd, env = _get_serve_cmd(bento, port=port) + cmd, env = _get_serve_cmd(bento, port=port, cli_args=cli_args) + + # Add CLI environment variables if provided + if cli_envs: + for env_var in cli_envs: + if '=' in env_var: + key, value = env_var.split('=', 1) + env[key] = value + else: + env[env_var] = os.environ.get(env_var, '') + venv = ensure_venv(bento, runtime_envs=env) output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)') run_command(cmd, env=env, cwd=None, venv=venv) -async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None: - cmd, env = _get_serve_cmd(bento, port) +async def _run_model( + bento: BentoInfo, + port: int = 3000, + timeout: int = 600, + cli_env: typing.Optional[dict[str, typing.Any]] = None, + cli_args: typing.Optional[list[str]] = None, +) -> None: + cmd, env = _get_serve_cmd(bento, port, cli_args=cli_args) + + # Merge cli environment variables if provided + if cli_env: + env.update(cli_env) + venv = ensure_venv(bento, runtime_envs=env) async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc: output(f'Model server started {server_proc.pid}') @@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> except KeyboardInterrupt: break output('\nStopping model server...', style='green') - output('Stopped model server', style='green') + output('Stopped model server', style='green') -def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None: +def run( + bento: BentoInfo, + port: int = 3000, + timeout: int = 600, + cli_envs: typing.Optional[list[str]] = None, + cli_args: typing.Optional[list[str]] = None, +) -> None: prep_env_vars(bento) - asyncio.run(_run_model(bento, port=port, timeout=timeout)) + + # Add CLI environment variables to the process + env = {} + if cli_envs: + for env_var in cli_envs: + if '=' in env_var: + key, value = env_var.split('=', 1) + env[key] = value + else: + env[env_var] = os.environ.get(env_var, '') + + asyncio.run(_run_model(bento, port=port, timeout=timeout, cli_env=env, cli_args=cli_args)) diff --git a/tests/test_cli_flow.py b/tests/test_cli_flow.py deleted file mode 100644 index 58f6ac1e..00000000 --- a/tests/test_cli_flow.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations - -import sys, typing - -import pytest, pexpect - - -@pytest.fixture -def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]: - child = pexpect.spawn( - f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False - ) - try: - yield child - finally: - try: - child.sendcontrol('c') - child.close(force=True) - except: - pass - - -def safe_expect( - child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern' -) -> int: - try: - print(f"\n{debug_msg}: '{pattern}'") - index = child.expect(pattern, timeout=timeout) - print(f'Found match at index {index}') - print(f'Before match: {child.before}') - print(f'After match: {child.after}') - return index - except pexpect.TIMEOUT: - print(f'TIMEOUT while {debug_msg}') - print(f'Last output: {child.before}') - raise - except pexpect.EOF: - print(f'EOF while {debug_msg}') - print(f'Last output: {child.before}') - raise - - -def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None: - child = pexpect_process - - try: - safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt') - - child.sendline('\x1b[B') - child.sendline('\r') - - safe_expect( - child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt' - ) - - child.sendline('\r') - - safe_expect( - child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt' - ) - - child.sendline('\x1b[B') - child.sendline('\x1b[B') - - child.sendline('\r') - - safe_expect( - child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt' - ) - - child.sendline('\r') - - child.expect('Error: .*HF_TOKEN', timeout=10) - except Exception as e: - pytest.fail(f'Test failed with exception: {e}')