mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-24 00:47:44 -04:00
feat: continuous batching with vLLM (#349)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: continuous batching Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: add changeloe Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: add one shot generation Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -20,7 +20,6 @@ def model_settings(draw: st.DrawFn):
|
||||
'model_ids': st.lists(st.text(), min_size=1),
|
||||
'architecture': st.text(min_size=1),
|
||||
'url': st.text(),
|
||||
'requires_gpu': st.booleans(),
|
||||
'trust_remote_code': st.booleans(),
|
||||
'requirements': st.none() | st.lists(st.text(), min_size=1),
|
||||
'default_backend': st.dictionaries(st.sampled_from(['cpu', 'nvidia.com/gpu']), st.sampled_from(['vllm', 'pt', 'tf', 'flax'])),
|
||||
|
||||
Reference in New Issue
Block a user