diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
deleted file mode 100644
index d7de1f72..00000000
--- a/.github/workflows/tests.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Run Tests
-
-on:
-  push:
-    branches: [main, master]
-  pull_request:
-    branches: [main, master]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.12"]
-
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install uv
-        run: |
-          pip install uv
-
-      - name: Install dependencies with uv
-        run: |
-          uv pip install -e .
-          uv pip install pytest pexpect
-
-      - name: Run tests
-        run: |
-          pytest tests -v
diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py
index 4b8a57fb..7e3c2c16 100644
--- a/src/openllm/__main__.py
+++ b/src/openllm/__main__.py
@@ -220,13 +220,23 @@ def serve(
   repo: typing.Optional[str] = None,
   port: int = 3000,
   verbose: bool = False,
+  env: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--env',
+    help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
+  ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
 ) -> None:
   cmd_update()
   if verbose:
     VERBOSE_LEVEL.set(20)
   target = get_local_machine_spec()
   bento = ensure_bento(model, target=target, repo_name=repo)
-  local_serve(bento, port=port)
+  local_serve(bento, port=port, cli_envs=env, cli_args=arg)
 
 
 @app.command(help='run the model and chat in terminal')
@@ -236,6 +246,16 @@ def run(
   port: typing.Optional[int] = None,
   timeout: int = 600,
   verbose: bool = False,
+  env: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--env',
+    help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
+  ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
 ) -> None:
   cmd_update()
   if verbose:
@@ -244,7 +264,7 @@ def run(
   bento = ensure_bento(model, target=target, repo_name=repo)
   if port is None:
     port = random.randint(30000, 40000)
-  local_run(bento, port=port, timeout=timeout)
+  local_run(bento, port=port, timeout=timeout, cli_envs=env, cli_args=arg)
 
 
 @app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')
diff --git a/src/openllm/local.py b/src/openllm/local.py
index a5c72c60..9c63418b 100644
--- a/src/openllm/local.py
+++ b/src/openllm/local.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import asyncio, time, typing
+import asyncio, time, typing, os
 import httpx, openai
 
 from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
@@ -19,8 +19,6 @@ if typing.TYPE_CHECKING:
 
 
 def prep_env_vars(bento: BentoInfo) -> None:
-  import os
-
   env_vars = bento.envs
   for env_var in env_vars:
     if not env_var.get('value'):
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
     os.environ[key] = value
 
 
-def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
+def _get_serve_cmd(
+  bento: BentoInfo, port: int = 3000, cli_args: typing.Optional[list[str]] = None
+) -> tuple[list[str], EnvVars]:
   cmd = ['bentoml', 'serve', bento.bentoml_tag]
   if port != 3000:
     cmd += ['--port', str(port)]
+
+  # Add CLI arguments if provided
+  if cli_args:
+    for arg in cli_args:
+      cmd += ['--arg', arg]
+
   return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
 
 
-def serve(bento: BentoInfo, port: int = 3000) -> None:
+def serve(
+  bento: BentoInfo,
+  port: int = 3000,
+  cli_envs: typing.Optional[list[str]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
   prep_env_vars(bento)
-  cmd, env = _get_serve_cmd(bento, port=port)
+  cmd, env = _get_serve_cmd(bento, port=port, cli_args=cli_args)
+
+  # Add CLI environment variables if provided
+  if cli_envs:
+    for env_var in cli_envs:
+      if '=' in env_var:
+        key, value = env_var.split('=', 1)
+        env[key] = value
+      else:
+        env[env_var] = os.environ.get(env_var, '')
+
   venv = ensure_venv(bento, runtime_envs=env)
   output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
   run_command(cmd, env=env, cwd=None, venv=venv)
 
 
-async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
-  cmd, env = _get_serve_cmd(bento, port)
+async def _run_model(
+  bento: BentoInfo,
+  port: int = 3000,
+  timeout: int = 600,
+  cli_env: typing.Optional[dict[str, typing.Any]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
+  cmd, env = _get_serve_cmd(bento, port, cli_args=cli_args)
+
+  # Merge cli environment variables if provided
+  if cli_env:
+    env.update(cli_env)
+
   venv = ensure_venv(bento, runtime_envs=env)
   async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
     output(f'Model server started {server_proc.pid}')
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
       except KeyboardInterrupt:
         break
     output('\nStopping model server...', style='green')
-  output('Stopped model server', style='green')
+    output('Stopped model server', style='green')
 
 
-def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
+def run(
+  bento: BentoInfo,
+  port: int = 3000,
+  timeout: int = 600,
+  cli_envs: typing.Optional[list[str]] = None,
+  cli_args: typing.Optional[list[str]] = None,
+) -> None:
   prep_env_vars(bento)
-  asyncio.run(_run_model(bento, port=port, timeout=timeout))
+
+  # Add CLI environment variables to the process
+  env = {}
+  if cli_envs:
+    for env_var in cli_envs:
+      if '=' in env_var:
+        key, value = env_var.split('=', 1)
+        env[key] = value
+      else:
+        env[env_var] = os.environ.get(env_var, '')
+
+  asyncio.run(_run_model(bento, port=port, timeout=timeout, cli_env=env, cli_args=cli_args))
diff --git a/tests/test_cli_flow.py b/tests/test_cli_flow.py
deleted file mode 100644
index 58f6ac1e..00000000
--- a/tests/test_cli_flow.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from __future__ import annotations
-
-import sys, typing
-
-import pytest, pexpect
-
-
-@pytest.fixture
-def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
-  child = pexpect.spawn(
-    f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
-  )
-  try:
-    yield child
-  finally:
-    try:
-      child.sendcontrol('c')
-      child.close(force=True)
-    except:
-      pass
-
-
-def safe_expect(
-  child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
-) -> int:
-  try:
-    print(f"\n{debug_msg}: '{pattern}'")
-    index = child.expect(pattern, timeout=timeout)
-    print(f'Found match at index {index}')
-    print(f'Before match: {child.before}')
-    print(f'After match: {child.after}')
-    return index
-  except pexpect.TIMEOUT:
-    print(f'TIMEOUT while {debug_msg}')
-    print(f'Last output: {child.before}')
-    raise
-  except pexpect.EOF:
-    print(f'EOF while {debug_msg}')
-    print(f'Last output: {child.before}')
-    raise
-
-
-def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
-  child = pexpect_process
-
-  try:
-    safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
-
-    child.sendline('\x1b[B')
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
-    )
-
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
-    )
-
-    child.sendline('\x1b[B')
-    child.sendline('\x1b[B')
-
-    child.sendline('\r')
-
-    safe_expect(
-      child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
-    )
-
-    child.sendline('\r')
-
-    child.expect('Error: .*HF_TOKEN', timeout=10)
-  except Exception as e:
-    pytest.fail(f'Test failed with exception: {e}')