mirror of
https://github.com/bentoml/OpenLLM.git
synced 2025-12-23 23:57:46 -05:00
feat: add support for --arg (#1174)
* feat: add support for --arg Signed-off-by: Aaron Pham <contact@aarnphm.xyz> * chore: remove tests Signed-off-by: Aaron Pham <contact@aarnphm.xyz> --------- Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
35
.github/workflows/tests.yml
vendored
35
.github/workflows/tests.yml
vendored
@@ -1,35 +0,0 @@
|
||||
name: Run Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, master]
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
pip install uv
|
||||
|
||||
- name: Install dependencies with uv
|
||||
run: |
|
||||
uv pip install -e .
|
||||
uv pip install pytest pexpect
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest tests -v
|
||||
@@ -220,13 +220,23 @@ def serve(
|
||||
repo: typing.Optional[str] = None,
|
||||
port: int = 3000,
|
||||
verbose: bool = False,
|
||||
env: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--env',
|
||||
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
|
||||
),
|
||||
arg: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--arg',
|
||||
help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
|
||||
),
|
||||
) -> None:
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
local_serve(bento, port=port, cli_envs=env, cli_args=arg)
|
||||
|
||||
|
||||
@app.command(help='run the model and chat in terminal')
|
||||
@@ -236,6 +246,16 @@ def run(
|
||||
port: typing.Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
env: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--env',
|
||||
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
|
||||
),
|
||||
arg: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--arg',
|
||||
help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
|
||||
),
|
||||
) -> None:
|
||||
cmd_update()
|
||||
if verbose:
|
||||
@@ -244,7 +264,7 @@ def run(
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
local_run(bento, port=port, timeout=timeout, cli_envs=env, cli_args=arg)
|
||||
|
||||
|
||||
@app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio, time, typing
|
||||
import asyncio, time, typing, os
|
||||
import httpx, openai
|
||||
|
||||
from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
|
||||
@@ -19,8 +19,6 @@ if typing.TYPE_CHECKING:
|
||||
|
||||
|
||||
def prep_env_vars(bento: BentoInfo) -> None:
|
||||
import os
|
||||
|
||||
env_vars = bento.envs
|
||||
for env_var in env_vars:
|
||||
if not env_var.get('value'):
|
||||
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
|
||||
def _get_serve_cmd(
|
||||
bento: BentoInfo, port: int = 3000, cli_args: typing.Optional[list[str]] = None
|
||||
) -> tuple[list[str], EnvVars]:
|
||||
cmd = ['bentoml', 'serve', bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ['--port', str(port)]
|
||||
|
||||
# Add CLI arguments if provided
|
||||
if cli_args:
|
||||
for arg in cli_args:
|
||||
cmd += ['--arg', arg]
|
||||
|
||||
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
|
||||
|
||||
|
||||
def serve(bento: BentoInfo, port: int = 3000) -> None:
|
||||
def serve(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
cli_envs: typing.Optional[list[str]] = None,
|
||||
cli_args: typing.Optional[list[str]] = None,
|
||||
) -> None:
|
||||
prep_env_vars(bento)
|
||||
cmd, env = _get_serve_cmd(bento, port=port)
|
||||
cmd, env = _get_serve_cmd(bento, port=port, cli_args=cli_args)
|
||||
|
||||
# Add CLI environment variables if provided
|
||||
if cli_envs:
|
||||
for env_var in cli_envs:
|
||||
if '=' in env_var:
|
||||
key, value = env_var.split('=', 1)
|
||||
env[key] = value
|
||||
else:
|
||||
env[env_var] = os.environ.get(env_var, '')
|
||||
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
|
||||
run_command(cmd, env=env, cwd=None, venv=venv)
|
||||
|
||||
|
||||
async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
|
||||
cmd, env = _get_serve_cmd(bento, port)
|
||||
async def _run_model(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
timeout: int = 600,
|
||||
cli_env: typing.Optional[dict[str, typing.Any]] = None,
|
||||
cli_args: typing.Optional[list[str]] = None,
|
||||
) -> None:
|
||||
cmd, env = _get_serve_cmd(bento, port, cli_args=cli_args)
|
||||
|
||||
# Merge cli environment variables if provided
|
||||
if cli_env:
|
||||
env.update(cli_env)
|
||||
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
|
||||
output(f'Model server started {server_proc.pid}')
|
||||
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output('\nStopping model server...', style='green')
|
||||
output('Stopped model server', style='green')
|
||||
output('Stopped model server', style='green')
|
||||
|
||||
|
||||
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
|
||||
def run(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
timeout: int = 600,
|
||||
cli_envs: typing.Optional[list[str]] = None,
|
||||
cli_args: typing.Optional[list[str]] = None,
|
||||
) -> None:
|
||||
prep_env_vars(bento)
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
|
||||
# Add CLI environment variables to the process
|
||||
env = {}
|
||||
if cli_envs:
|
||||
for env_var in cli_envs:
|
||||
if '=' in env_var:
|
||||
key, value = env_var.split('=', 1)
|
||||
env[key] = value
|
||||
else:
|
||||
env[env_var] = os.environ.get(env_var, '')
|
||||
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout, cli_env=env, cli_args=cli_args))
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys, typing
|
||||
|
||||
import pytest, pexpect
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
|
||||
child = pexpect.spawn(
|
||||
f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
|
||||
)
|
||||
try:
|
||||
yield child
|
||||
finally:
|
||||
try:
|
||||
child.sendcontrol('c')
|
||||
child.close(force=True)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def safe_expect(
|
||||
child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
|
||||
) -> int:
|
||||
try:
|
||||
print(f"\n{debug_msg}: '{pattern}'")
|
||||
index = child.expect(pattern, timeout=timeout)
|
||||
print(f'Found match at index {index}')
|
||||
print(f'Before match: {child.before}')
|
||||
print(f'After match: {child.after}')
|
||||
return index
|
||||
except pexpect.TIMEOUT:
|
||||
print(f'TIMEOUT while {debug_msg}')
|
||||
print(f'Last output: {child.before}')
|
||||
raise
|
||||
except pexpect.EOF:
|
||||
print(f'EOF while {debug_msg}')
|
||||
print(f'Last output: {child.before}')
|
||||
raise
|
||||
|
||||
|
||||
def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
|
||||
child = pexpect_process
|
||||
|
||||
try:
|
||||
safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
|
||||
|
||||
child.sendline('\x1b[B')
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
|
||||
)
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
|
||||
)
|
||||
|
||||
child.sendline('\x1b[B')
|
||||
child.sendline('\x1b[B')
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
|
||||
)
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
child.expect('Error: .*HF_TOKEN', timeout=10)
|
||||
except Exception as e:
|
||||
pytest.fail(f'Test failed with exception: {e}')
|
||||
Reference in New Issue
Block a user