feat: add support for --arg (#1174)

* feat: add support for --arg Signed-off-by: Aaron Pham <contact@aarnphm.xyz> * chore: remove tests Signed-off-by: Aaron Pham <contact@aarnphm.xyz> --------- Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-12-23 23:57:46 -05:00 · 2025-04-11 02:41:31 -04:00
parent d8fb4ae4a5
commit 8a75c99a46
4 changed files with 82 additions and 123 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,35 +0,0 @@
-name: Run Tests
-
-on:
-  push:
-    branches: [main, master]
-  pull_request:
-    branches: [main, master]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.12"]
-
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install uv
-        run: |
-          pip install uv
-
-      - name: Install dependencies with uv
-        run: |
-          uv pip install -e .
-          uv pip install pytest pexpect
-
-      - name: Run tests
-        run: |
-          pytest tests -v
--- a/src/openllm/main.py
+++ b/src/openllm/main.py
@@ -220,13 +220,23 @@ def serve(
  repo: typing.Optional[str] = None,
  port: int = 3000,
  verbose: bool = False,
+  env: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--env',
+    help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
+  ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
 ) -> None:
  cmd_update()
  if verbose:
    VERBOSE_LEVEL.set(20)
  target = get_local_machine_spec()
  bento = ensure_bento(model, target=target, repo_name=repo)
-  local_serve(bento, port=port)
+  local_serve(bento, port=port, cli_envs=env, cli_args=arg)


@app.command(help='run the model and chat in terminal')
@@ -236,6 +246,16 @@ def run(
  port: typing.Optional[int] = None,
  timeout: int = 600,
  verbose: bool = False,
+  env: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--env',
+    help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
+  ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
 ) -> None:
  cmd_update()
  if verbose:
@@ -244,7 +264,7 @@ def run(
  bento = ensure_bento(model, target=target, repo_name=repo)
  if port is None:
    port = random.randint(30000, 40000)
-  local_run(bento, port=port, timeout=timeout)
+  local_run(bento, port=port, timeout=timeout, cli_envs=env, cli_args=arg)


@app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')
--- a/src/openllm/local.py
+++ b/src/openllm/local.py
@@ -1,6 +1,6 @@
 from __future__ import annotations

-import asyncio, time, typing
+import asyncio, time, typing, os
 import httpx, openai

 from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
@@ -19,8 +19,6 @@ if typing.TYPE_CHECKING:


 def prep_env_vars(bento: BentoInfo) -> None:
-  import os
-
  env_vars = bento.envs
  for env_var in env_vars:
    if not env_var.get('value'):
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
    os.environ[key] = value


-def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
+def _get_serve_cmd(
+  bento: BentoInfo, port: int = 3000, cli_args: typing.Optional[list[str]] = None
+) -> tuple[list[str], EnvVars]:
  cmd = ['bentoml', 'serve', bento.bentoml_tag]
  if port != 3000:
    cmd += ['--port', str(port)]
+
+  # Add CLI arguments if provided
+  if cli_args:
+    for arg in cli_args:
+      cmd += ['--arg', arg]
+
  return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})


-def serve(bento: BentoInfo, port: int = 3000) -> None:
+def serve(
+  bento: BentoInfo,
+  port: int = 3000,
+  cli_envs: typing.Optional[list[str]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
  prep_env_vars(bento)
-  cmd, env = _get_serve_cmd(bento, port=port)
+  cmd, env = _get_serve_cmd(bento, port=port, cli_args=cli_args)
+
+  # Add CLI environment variables if provided
+  if cli_envs:
+    for env_var in cli_envs:
+      if '=' in env_var:
+        key, value = env_var.split('=', 1)
+        env[key] = value
+      else:
+        env[env_var] = os.environ.get(env_var, '')
+
  venv = ensure_venv(bento, runtime_envs=env)
  output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
  run_command(cmd, env=env, cwd=None, venv=venv)


-async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
-  cmd, env = _get_serve_cmd(bento, port)
+async def _run_model(
+  bento: BentoInfo,
+  port: int = 3000,
+  timeout: int = 600,
+  cli_env: typing.Optional[dict[str, typing.Any]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
+  cmd, env = _get_serve_cmd(bento, port, cli_args=cli_args)
+
+  # Merge cli environment variables if provided
+  if cli_env:
+    env.update(cli_env)
+
  venv = ensure_venv(bento, runtime_envs=env)
  async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
    output(f'Model server started {server_proc.pid}')
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
      except KeyboardInterrupt:
        break
    output('\nStopping model server...', style='green')
-  output('Stopped model server', style='green')
+    output('Stopped model server', style='green')


-def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
+def run(
+  bento: BentoInfo,
+  port: int = 3000,
+  timeout: int = 600,
+  cli_envs: typing.Optional[list[str]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
  prep_env_vars(bento)
-  asyncio.run(_run_model(bento, port=port, timeout=timeout))
+
+  # Add CLI environment variables to the process
+  env = {}
+  if cli_envs:
+    for env_var in cli_envs:
+      if '=' in env_var:
+        key, value = env_var.split('=', 1)
+        env[key] = value
+      else:
+        env[env_var] = os.environ.get(env_var, '')
+
+  asyncio.run(_run_model(bento, port=port, timeout=timeout, cli_env=env, cli_args=cli_args))
--- a/tests/test_cli_flow.py
+++ b/tests/test_cli_flow.py
@@ -1,75 +0,0 @@
-from __future__ import annotations
-
-import sys, typing
-
-import pytest, pexpect
-
-
-@pytest.fixture
-def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
-  child = pexpect.spawn(
-    f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
-  )
-  try:
-    yield child
-  finally:
-    try:
-      child.sendcontrol('c')
-      child.close(force=True)
-    except:
-      pass
-
-
-def safe_expect(
-  child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
-) -> int:
-  try:
-    print(f"\n{debug_msg}: '{pattern}'")
-    index = child.expect(pattern, timeout=timeout)
-    print(f'Found match at index {index}')
-    print(f'Before match: {child.before}')
-    print(f'After match: {child.after}')
-    return index
-  except pexpect.TIMEOUT:
-    print(f'TIMEOUT while {debug_msg}')
-    print(f'Last output: {child.before}')
-    raise
-  except pexpect.EOF:
-    print(f'EOF while {debug_msg}')
-    print(f'Last output: {child.before}')
-    raise
-
-
-def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
-  child = pexpect_process
-
-  try:
-    safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
-
-    child.sendline('\x1b[B')
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
-    )
-
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
-    )
-
-    child.sendline('\x1b[B')
-    child.sendline('\x1b[B')
-
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
-    )
-
-    child.sendline('\r')
-
-    child.expect('Error: .*HF_TOKEN', timeout=10)
-  except Exception as e:
-    pytest.fail(f'Test failed with exception: {e}')