feat: continuous batching with vLLM (#349)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: continuous batching Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: add changeloe Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: add one shot generation Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-24 16:44:39 -04:00 · 2023-09-14 03:09:36 -04:00
parent e35e143093
commit ad9107958d
22 changed files with 336 additions and 232 deletions
--- a/openllm-python/tests/_strategies/_configuration.py
+++ b/openllm-python/tests/_strategies/_configuration.py
@@ -20,7 +20,6 @@ def model_settings(draw: st.DrawFn):
      'model_ids': st.lists(st.text(), min_size=1),
      'architecture': st.text(min_size=1),
      'url': st.text(),
-      'requires_gpu': st.booleans(),
      'trust_remote_code': st.booleans(),
      'requirements': st.none() | st.lists(st.text(), min_size=1),
      'default_backend': st.dictionaries(st.sampled_from(['cpu', 'nvidia.com/gpu']), st.sampled_from(['vllm', 'pt', 'tf', 'flax'])),