feat(openai): chat templates and complete control of prompt generation (#725)

* feat(openai): chat templates and complete control of prompt generation Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * fix: correctly use base chat templates Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * fix: remove symlink Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
2026-03-11 19:47:03 -04:00 · 2023-11-22 06:49:14 -05:00
parent 7aa0918a6f
commit b28b5269b5
11 changed files with 146 additions and 316 deletions
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -1334,8 +1334,7 @@ def query_command(
  $ openllm query --endpoint http://12.323.2.1:3000 "What is the meaning of life?"
  ```
  '''
-  if server_type == 'grpc':
-    raise click.ClickException("'grpc' is currently disabled.")
+  if server_type == 'grpc': raise click.ClickException("'grpc' is currently disabled.")
  _memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
  # TODO: grpc support
  client = openllm.HTTPClient(address=endpoint, timeout=timeout)
@@ -1343,7 +1342,7 @@ def query_command(

  if stream:
    stream_res: t.Iterator[StreamingResponse] = client.generate_stream(prompt, **_memoized)
-    termui.echo(prompt, fg=input_fg, nl=False)
+    termui.echo(prompt, fg=input_fg, nl=False if stream else True)
    for it in stream_res:
      termui.echo(it.text, fg=generated_fg, nl=False)
  else: