Worker refactor

Co-authored-by: rltakashige <rl.takashige@gmail.com>
Co-authored-by: Alex Cheema <alexcheema123@gmail.com>
This commit is contained in:
Evan Quiney
2025-11-10 23:31:53 +00:00
committed by GitHub
parent 9058b117c0
commit aa519b8c03
47 changed files with 1767 additions and 2349 deletions

View File

@@ -13,7 +13,7 @@ QUERY="$*"
curl -sN -X POST "http://$HOST:8000/v1/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"mlx-community/DeepSeek-V3.1-8bit\",
\"model\": \"mlx-community/Llama-3.3-70B-Instruct-8bit\",
\"stream\": true,
\"messages\": [{ \"role\": \"user\", \"content\": \"$QUERY\" }]
}" |
@@ -21,4 +21,4 @@ curl -sN -X POST "http://$HOST:8000/v1/chat/completions" \
grep --line-buffered -v 'data: \[DONE\]' |
cut -d' ' -f2- |
jq -r --unbuffered '.choices[].delta.content // empty' |
awk '{ORS=""; print; fflush()} END {print "\n"}'
awk '{ORS=""; print; fflush()} END {print "\n"}'