feat: add support for --arg (#1174)

* feat: add support for --arg

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: remove tests

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

---------

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
Aaron Pham
2025-04-11 02:41:31 -04:00
committed by GitHub
parent d8fb4ae4a5
commit 8a75c99a46
4 changed files with 82 additions and 123 deletions

View File

@@ -1,35 +0,0 @@
name: Run Tests
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.12"]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
run: |
pip install uv
- name: Install dependencies with uv
run: |
uv pip install -e .
uv pip install pytest pexpect
- name: Run tests
run: |
pytest tests -v

View File

@@ -220,13 +220,23 @@ def serve(
repo: typing.Optional[str] = None,
port: int = 3000,
verbose: bool = False,
env: typing.Optional[list[str]] = typer.Option(
None,
'--env',
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
),
arg: typing.Optional[list[str]] = typer.Option(
None,
'--arg',
help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
),
) -> None:
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
local_serve(bento, port=port)
local_serve(bento, port=port, cli_envs=env, cli_args=arg)
@app.command(help='run the model and chat in terminal')
@@ -236,6 +246,16 @@ def run(
port: typing.Optional[int] = None,
timeout: int = 600,
verbose: bool = False,
env: typing.Optional[list[str]] = typer.Option(
None,
'--env',
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
),
arg: typing.Optional[list[str]] = typer.Option(
None,
'--arg',
help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
),
) -> None:
cmd_update()
if verbose:
@@ -244,7 +264,7 @@ def run(
bento = ensure_bento(model, target=target, repo_name=repo)
if port is None:
port = random.randint(30000, 40000)
local_run(bento, port=port, timeout=timeout)
local_run(bento, port=port, timeout=timeout, cli_envs=env, cli_args=arg)
@app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import asyncio, time, typing
import asyncio, time, typing, os
import httpx, openai
from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
@@ -19,8 +19,6 @@ if typing.TYPE_CHECKING:
def prep_env_vars(bento: BentoInfo) -> None:
import os
env_vars = bento.envs
for env_var in env_vars:
if not env_var.get('value'):
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
os.environ[key] = value
def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
def _get_serve_cmd(
bento: BentoInfo, port: int = 3000, cli_args: typing.Optional[list[str]] = None
) -> tuple[list[str], EnvVars]:
cmd = ['bentoml', 'serve', bento.bentoml_tag]
if port != 3000:
cmd += ['--port', str(port)]
# Add CLI arguments if provided
if cli_args:
for arg in cli_args:
cmd += ['--arg', arg]
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
def serve(bento: BentoInfo, port: int = 3000) -> None:
def serve(
bento: BentoInfo,
port: int = 3000,
cli_envs: typing.Optional[list[str]] = None,
cli_args: typing.Optional[list[str]] = None,
) -> None:
prep_env_vars(bento)
cmd, env = _get_serve_cmd(bento, port=port)
cmd, env = _get_serve_cmd(bento, port=port, cli_args=cli_args)
# Add CLI environment variables if provided
if cli_envs:
for env_var in cli_envs:
if '=' in env_var:
key, value = env_var.split('=', 1)
env[key] = value
else:
env[env_var] = os.environ.get(env_var, '')
venv = ensure_venv(bento, runtime_envs=env)
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
run_command(cmd, env=env, cwd=None, venv=venv)
async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
cmd, env = _get_serve_cmd(bento, port)
async def _run_model(
bento: BentoInfo,
port: int = 3000,
timeout: int = 600,
cli_env: typing.Optional[dict[str, typing.Any]] = None,
cli_args: typing.Optional[list[str]] = None,
) -> None:
cmd, env = _get_serve_cmd(bento, port, cli_args=cli_args)
# Merge cli environment variables if provided
if cli_env:
env.update(cli_env)
venv = ensure_venv(bento, runtime_envs=env)
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
output(f'Model server started {server_proc.pid}')
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
except KeyboardInterrupt:
break
output('\nStopping model server...', style='green')
output('Stopped model server', style='green')
output('Stopped model server', style='green')
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
def run(
bento: BentoInfo,
port: int = 3000,
timeout: int = 600,
cli_envs: typing.Optional[list[str]] = None,
cli_args: typing.Optional[list[str]] = None,
) -> None:
prep_env_vars(bento)
asyncio.run(_run_model(bento, port=port, timeout=timeout))
# Add CLI environment variables to the process
env = {}
if cli_envs:
for env_var in cli_envs:
if '=' in env_var:
key, value = env_var.split('=', 1)
env[key] = value
else:
env[env_var] = os.environ.get(env_var, '')
asyncio.run(_run_model(bento, port=port, timeout=timeout, cli_env=env, cli_args=cli_args))

View File

@@ -1,75 +0,0 @@
from __future__ import annotations
import sys, typing
import pytest, pexpect
@pytest.fixture
def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
child = pexpect.spawn(
f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
)
try:
yield child
finally:
try:
child.sendcontrol('c')
child.close(force=True)
except:
pass
def safe_expect(
child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
) -> int:
try:
print(f"\n{debug_msg}: '{pattern}'")
index = child.expect(pattern, timeout=timeout)
print(f'Found match at index {index}')
print(f'Before match: {child.before}')
print(f'After match: {child.after}')
return index
except pexpect.TIMEOUT:
print(f'TIMEOUT while {debug_msg}')
print(f'Last output: {child.before}')
raise
except pexpect.EOF:
print(f'EOF while {debug_msg}')
print(f'Last output: {child.before}')
raise
def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
child = pexpect_process
try:
safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
child.sendline('\x1b[B')
child.sendline('\r')
safe_expect(
child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
)
child.sendline('\r')
safe_expect(
child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
)
child.sendline('\x1b[B')
child.sendline('\x1b[B')
child.sendline('\r')
safe_expect(
child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
)
child.sendline('\r')
child.expect('Error: .*HF_TOKEN', timeout=10)
except Exception as e:
pytest.fail(f'Test failed with exception: {e}')