diff --git a/src/openllm/_package.py b/src/openllm/_package.py
index 893d5b83..2b438299 100644
--- a/src/openllm/_package.py
+++ b/src/openllm/_package.py
@@ -168,7 +168,7 @@ def build(model_name: str, *, __cli__: bool = False, **attrs: t.Any) -> tuple[be
     current_model_envvar = os.environ.pop("OPENLLM_MODEL", None)
     current_model_id_envvar = os.environ.pop("OPENLLM_MODEL_ID", None)
     _previously_built = False
-    workers = attrs.pop("_workers", None)
+    workers_per_resource = attrs.pop("_workers_per_resource", None)
     model_id: str = attrs.pop("model_id", None)
 
     llm_config = openllm.AutoConfig.for_model(model_name)
@@ -193,7 +193,9 @@ def build(model_name: str, *, __cli__: bool = False, **attrs: t.Any) -> tuple[be
         labels = dict(llm.identifying_params)
         labels.update({"_type": llm.llm_type, "_framework": to_use_framework})
         service_name = f"generated_{llm.config.__openllm_model_name__}_service.py"
-        workers_per_resource = utils.first_not_none(workers, default=llm.config.__openllm_workers_per_resource__)
+        workers_per_resource = utils.first_not_none(
+            workers_per_resource, default=llm.config.__openllm_workers_per_resource__
+        )
 
         with fs.open_fs(f"temp://llm_{llm.config.__openllm_model_name__}") as llm_fs:
             # add service.py definition to this temporary folder
diff --git a/src/openllm/cli.py b/src/openllm/cli.py
index ffa56017..22b4b272 100644
--- a/src/openllm/cli.py
+++ b/src/openllm/cli.py
@@ -162,11 +162,11 @@ Available model_id(s): {llm_config.__openllm_model_ids__} [default: {llm_config.
         help=f"Assign GPU devices (if available) for {model_name}.",
         show_envvar=True,
     )
-    @workers_option(cog.optgroup)
+    @workers_per_resource_option(cog.optgroup)
     def model_start(
         server_timeout: int | None,
         model_id: str | None,
-        workers: float | None,
+        workers_per_resource: float | None,
         device: tuple[str, ...] | None,
         **attrs: t.Any,
     ) -> openllm.LLMConfig:
@@ -201,7 +201,7 @@ Available model_id(s): {llm_config.__openllm_model_ids__} [default: {llm_config.
             ]
         )
 
-        workers_per_resource = first_not_none(workers, default=llm.config.__openllm_workers_per_resource__)
+        workers_per_resource = first_not_none(workers_per_resource, default=llm.config.__openllm_workers_per_resource__)
         server_timeout = first_not_none(server_timeout, default=llm.config.__openllm_timeout__)
 
         num_workers = int(1 / workers_per_resource)
@@ -543,10 +543,30 @@ class NargsOptions(cog.GroupedOption):
         return retval
 
 
-def parse_device_callback(_: click.Context, params: click.Parameter, value: tuple[str, ...] | None) -> t.Any:
+def parse_device_callback(
+    _: click.Context, params: click.Parameter, value: tuple[str, ...] | t.Literal["all"] | None
+) -> t.Any:
     if value is None:
         return value
 
+    # NOTE: --device all is a special case
+    if isinstance(value, str):
+        if value != "all":
+            raise RuntimeError(f"{params} parameter only accept 'all' as a string value.")
+        import pynvml  # transitive dependencies of BentoML
+
+        try:
+            pynvml.nvmlInit()
+            return tuple(range(pynvml.nvmlDeviceGetCount()))
+        except (pynvml.nvml.NVMLError, OSError):
+            logger.debug("GPU not detected. Unable to initialize pynvml lib.")
+            return ()
+        finally:
+            try:
+                pynvml.nvmlShutdown()
+            except Exception:
+                pass
+
     if not LazyType(TupleStrAny).isinstance(value):
         raise RuntimeError(f"{params} only accept multiple values.")
     parsed: tuple[str, ...] = tuple()
@@ -608,7 +628,7 @@ def model_id_option(factory: t.Any, model_env: ModelEnv | None = None):
     )
 
 
-def workers_option(factory: t.Any, build: bool = False):
+def workers_per_resource_option(factory: t.Any, build: bool = False):
     help_str = """Number of workers per resource assigned.
     See https://docs.bentoml.org/en/latest/guides/scheduling.html#resource-scheduling-strategy
     for more information. By default, this is set to 1."""
@@ -618,7 +638,7 @@ def workers_option(factory: t.Any, build: bool = False):
     be provisioned in Kubernetes as well as in standalone container. This will
     ensure it has the same effect with 'openllm start --workers ...'"""
     return factory.option(
-        "--workers",
+        "--workers-per-resource",
         default=None,
         type=click.FLOAT,
         help=help_str,
@@ -673,8 +693,14 @@ def cli_factory() -> click.Group:
     @model_id_option(click)
     @output_option
     @click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.")
-    @workers_option(click, build=True)
-    def build(model_name: str, model_id: str | None, overwrite: bool, output: OutputLiteral, workers: float | None):
+    @workers_per_resource_option(click, build=True)
+    def build(
+        model_name: str,
+        model_id: str | None,
+        overwrite: bool,
+        output: OutputLiteral,
+        workers_per_resource: float | None,
+    ):
         """Package a given models into a Bento.
 
         $ openllm build flan-t5
@@ -695,7 +721,7 @@ def cli_factory() -> click.Group:
             model_name,
             __cli__=True,
             model_id=model_id,
-            _workers=workers,
+            _workers_per_resource=workers_per_resource,
             _overwrite_existing_bento=overwrite,
         )