diff --git a/openllm-core/src/openllm_core/config/configuration_phi.py b/openllm-core/src/openllm_core/config/configuration_phi.py
index 7ce8e41e..3bfb0221 100644
--- a/openllm-core/src/openllm_core/config/configuration_phi.py
+++ b/openllm-core/src/openllm_core/config/configuration_phi.py
@@ -22,11 +22,18 @@ class PhiConfig(openllm_core.LLMConfig):
   metadata_config: ModelSettings = pydantic.Field(
     default={
       'url': 'https://arxiv.org/abs/2309.05463',
-      'architecture': 'PhiForCausalLM',
+      'architecture': 'Phi3ForCausalLM',
       'trust_remote_code': True,
-      'default_id': 'microsoft/phi-2',
+      'default_id': 'microsoft/Phi-3-mini-4k-instruct',
       'serialisation': 'safetensors',
-      'model_ids': ['microsoft/phi-2', 'microsoft/phi-1_5'],
+      'model_ids': [
+        'microsoft/Phi-3-mini-4k-instruct',
+        'microsoft/Phi-3-mini-128k-instruct',
+        'microsoft/Phi-3-small-8k-instruct',
+        'microsoft/Phi-3-small-128k-instruct',
+        'microsoft/Phi-3-medium-4k-instruct',
+        'microsoft/Phi-3-medium-128k-instruct',
+      ],
       'fine_tune_strategies': (
         {'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none'},
       ),
diff --git a/openllm-python/src/_openllm_tiny/_entrypoint.py b/openllm-python/src/_openllm_tiny/_entrypoint.py
index fe379f46..4b3faaa3 100644
--- a/openllm-python/src/_openllm_tiny/_entrypoint.py
+++ b/openllm-python/src/_openllm_tiny/_entrypoint.py
@@ -436,7 +436,7 @@ def build_command(
     service_config = dict(
       resources={
         'gpu' if device else 'cpu': len(device) if device else '1',
-        'gpu_type': recommended_instance_type(model_id, bentomodel),
+        'gpu_type': recommended_instance_type(model_id, bentomodel, serialisation),
       },
       traffic=dict(timeout=timeout),
     )
diff --git a/openllm-python/src/_openllm_tiny/_helpers.py b/openllm-python/src/_openllm_tiny/_helpers.py
index ae60fa63..5d968fcb 100644
--- a/openllm-python/src/_openllm_tiny/_helpers.py
+++ b/openllm-python/src/_openllm_tiny/_helpers.py
@@ -18,8 +18,9 @@ from openllm_core.protocol.openai import (
   LogProbs,
   UsageInfo,
 )
+from openllm_core._typing_compat import LiteralSerialisation
 from starlette.requests import Request
-from huggingface_hub import scan_cache_dir
+from huggingface_hub import HfApi
 
 if t.TYPE_CHECKING:
   import bentoml
@@ -328,12 +329,21 @@ class OpenAI:
 RECOMMENDED_MAPPING = {'nvidia-l4': 24e9, 'nvidia-a10g': 24e9, 'nvidia-tesla-a100': 40e9, 'nvidia-a100-80gb': 80e9}
 
 
-def recommended_instance_type(model_id: str, bentomodel: bentoml.Model | None = None):
+def recommended_instance_type(
+  model_id: str, bentomodel: bentoml.Model | None = None, serialisation: LiteralSerialisation = 'safetensors'
+):
+  extensions = 'safetensors' if serialisation == 'safetensors' else 'pt'
+  api = HfApi()
+
   if bentomodel is not None:
     size = sum(f.stat().st_size for f in pathlib.Path(resolve_filepath(model_id)).glob('**/*') if f.is_file())
   else:
-    info = next(filter(lambda repo: repo.repo_id == model_id, scan_cache_dir().repos))
-    size = info.size_on_disk
+    size = sum(
+      i.size
+      for i in api.get_paths_info(
+        model_id, list(filter(lambda x: x.endswith(f'.{extensions}'), api.list_repo_files(model_id)))
+      )
+    )
 
   # find the first occurence of the gpu_type in the recommended mapping such that "size" should be less than or equal to 70% of the recommended size
   for gpu, max_size in RECOMMENDED_MAPPING.items():