mirror of
https://github.com/exo-explore/exo.git
synced 2025-12-23 22:27:50 -05:00
* 8000 -> 52415 * dont grab the api port for placement --------- Co-authored-by: rltakashige <rl.takashige@gmail.com>
87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import json
|
|
import sys
|
|
|
|
import requests
|
|
|
|
|
|
def stream_chat(host: str, query: str) -> None:
|
|
url = f"http://{host}:52415/v1/chat/completions"
|
|
headers = {"Content-Type": "application/json"}
|
|
payload = {
|
|
"model": "mlx-community/Llama-3.2-1B-Instruct-4bit",
|
|
# "model": "mlx-community/Llama-3_3-Nemotron-Super-49B-v1_5-mlx-4Bit",
|
|
"stream": True,
|
|
"messages": [{"role": "user", "content": query}],
|
|
}
|
|
|
|
try:
|
|
with requests.post(url, headers=headers, json=payload, stream=True) as resp:
|
|
resp.raise_for_status()
|
|
for line in resp.iter_lines(decode_unicode=True):
|
|
if not line:
|
|
continue
|
|
|
|
# SSE lines look like: "data: {...}" or "data: [DONE]"
|
|
if not line.startswith("data:"):
|
|
continue
|
|
|
|
data = line[len("data:") :].strip()
|
|
if data == "[DONE]":
|
|
break
|
|
|
|
try:
|
|
obj = json.loads(data)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
for choice in obj.get("choices", []):
|
|
delta = choice.get("delta") or {}
|
|
content = delta.get("content")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
|
|
except requests.RequestException as e:
|
|
print(f"Request failed: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print()
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Stream chat completions from a local server."
|
|
)
|
|
parser.add_argument("host", help="Hostname (without protocol), e.g. localhost")
|
|
parser.add_argument(
|
|
"-f",
|
|
"--file",
|
|
help="Path to a text file whose contents will be used as the query",
|
|
)
|
|
parser.add_argument(
|
|
"query",
|
|
nargs="*",
|
|
help="Query text (if not using -f/--file). All remaining arguments are joined with spaces.",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.file:
|
|
try:
|
|
with open(args.file, "r", encoding="utf-8") as f:
|
|
query = f.read().strip()
|
|
except OSError as e:
|
|
print(f"Error reading file {args.file}: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
elif args.query:
|
|
query = " ".join(args.query)
|
|
else:
|
|
parser.error("You must provide either a query or a file (-f/--file).")
|
|
|
|
stream_chat(args.host, query)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|