diff --git a/.gitattributes b/.gitattributes
index 41ef37f2..1d6c3384 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -6,6 +6,8 @@
 openllm-python/tests/models/__snapshots__/* linguist-generated=true
 openllm-python/README.md linguist-generated=true
 openllm-python/CHANGELOG.md linguist-generated=true
+openllm-core/src/openllm_core/config/__init__.py linguist-generated=true
+openllm-core/src/openllm_core/utils/import_utils.pyi linguist-generated=true
 
 # Others
 Formula/openllm.rb linguist-generated=true
diff --git a/.github/INFRA.md b/.github/INFRA.md
index 147c854b..93c744ea 100644
--- a/.github/INFRA.md
+++ b/.github/INFRA.md
@@ -22,7 +22,7 @@ The folder structure of this are as follow:
     ├── build.yml             # Self-hosted EC2 runners
     ├── ci.yml                # CI workflow
     ├── cleanup.yml           # Cache cleanup
-    ├── compile-pypi.yml      # Compile PyPI packages
+    ├── build-pypi.yml        # Build PyPI packages
     ├── create-releases.yml   # Create GitHub releases
     ├── cron.yml              # Cron jobs
     └── release-notes.yml     # Generate release notes
@@ -53,7 +53,7 @@ There are a few ways to trigger this workflow:
 
 ### Wheel compilation
 
-The workflow for wheel compilation is located in [compile-pypi.yml](/.github/workflows/compile-pypi.yml).
+The workflow for wheel compilation is located in [build-pypi.yml](/.github/workflows/build-pypi.yml).
 
 To speed up CI, opt in to the following label `02 - Wheel Build` on pull request or add `[wheel build]` to commit message.
 
diff --git a/.github/workflows/build-pypi.yml b/.github/workflows/build-pypi.yml
new file mode 100644
index 00000000..0742a94d
--- /dev/null
+++ b/.github/workflows/build-pypi.yml
@@ -0,0 +1,148 @@
+name: Compile PyPI package
+on:
+  workflow_dispatch:
+    inputs:
+      tags:
+        description: 'Tag for releases'
+        required: true
+        type: string
+  workflow_call:
+    inputs:
+      tags:
+        required: true
+        type: string
+    outputs:
+      sucess:
+        description: 'Whether the build is successful or not'
+        value: ${{ jobs.sucess-build.outputs.success }}
+  push:
+    branches: [main]
+    paths-ignore:
+      - 'docs/**'
+      - 'bazel/**'
+      - 'typings/**'
+      - '*.md'
+      - 'changelog.d/**'
+      - 'assets/**'
+  pull_request:
+    branches: [main]
+    paths-ignore:
+      - 'docs/**'
+      - 'bazel/**'
+      - 'typings/**'
+      - '*.md'
+      - 'changelog.d/**'
+      - 'assets/**'
+env:
+  LINES: 120
+  COLUMNS: 120
+  HATCH_VERBOSE: 2
+  CIBW_BUILD_FRONTEND: build
+  CIBW_ENVIRONMENT_PASS_LINUX: >
+    HATCH_BUILD_HOOKS_ENABLE MYPYPATH
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+jobs:
+  get_commit_message:
+    name: Get commit message
+    runs-on: ubuntu-latest
+    if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
+    outputs:
+      message: ${{ steps.commit_message.outputs.message }}
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
+        # Gets the correct commit message for pull request
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Get commit message
+        id: commit_message
+        run: |
+          set -xe
+          COMMIT_MSG=$(git log --no-merges -1 --oneline)
+          echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT
+          echo github.ref ${{ github.ref }}
+  pure-wheels-sdist:
+    name: Pure wheels and sdist distribution (${{ matrix.directory }})
+    runs-on: ubuntu-latest
+    needs: get_commit_message
+    if: >-
+      contains(needs.get_commit_message.outputs.message, '[wheel build]') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '02 - Wheel Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))
+    strategy:
+      fail-fast: false
+      matrix:
+        directory: ['openllm-core', 'openllm-python', 'openllm-client']
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
+        with:
+          fetch-depth: 0
+          ref: '${{ inputs.tags }}'
+      - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
+        with:
+          bentoml-version: 'main'
+          python-version-file: .python-version-default
+      - name: Build
+        run: hatch build
+        working-directory: ${{ matrix.directory }}
+      - name: Upload artifacts
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
+        with:
+          name: python-artefacts
+          path: ${{ matrix.directory }}/dist/*
+          if-no-files-found: error
+  check-download-artefacts:
+    name: dry-run for downloading artefacts
+    if: github.event_name == 'pull_request'
+    needs: [pure-wheels-sdist]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download Python artifacts
+        uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
+        with:
+          name: python-artefacts
+          path: dist
+      - name: dry ls
+        run: ls -rthlaR
+  push-nightly:
+    name: Push nightly wheels
+    if: ${{ !github.event.repository.fork && github.event_name == 'push' }}
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    # needs: [pure-wheels-sdist, mypyc]
+    needs: [pure-wheels-sdist]
+    steps:
+      - name: Download Python artifacts
+        uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
+        with:
+          name: python-artefacts
+          path: dist
+      - name: Publish nightly wheels to test.pypi.org
+        uses: pypa/gh-action-pypi-publish@2f6f737ca5f74c637829c0f5c3acd0e29ea5e8bf # ratchet:pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+  sucess-push: # https://github.com/marketplace/actions/alls-green#why
+    if: github.event_name == 'push'
+    needs: [push-nightly, pure-wheels-sdist]
+    runs-on: ubuntu-latest
+    outputs:
+      success: ${{ steps.everygreen.outputs.success }}
+    steps:
+      - name: Decide whether the needed jobs succeeded or failed
+        uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
+        id: evergreen
+        with:
+          jobs: ${{ toJSON(needs) }}
+  sucess-pr: # https://github.com/marketplace/actions/alls-green#why
+    if: github.event_name == 'pull_request'
+    needs: [pure-wheels-sdist]
+    runs-on: ubuntu-latest
+    outputs:
+      success: ${{ steps.everygreen.outputs.success }}
+    steps:
+      - name: Decide whether the needed jobs succeeded or failed
+        uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
+        id: evergreen
+        with:
+          jobs: ${{ toJSON(needs) }}
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
index 538f3cdb..a708840e 100644
--- a/.github/workflows/create-releases.yml
+++ b/.github/workflows/create-releases.yml
@@ -66,17 +66,17 @@ jobs:
           version="$(git describe --tags "$(git rev-list --tags --max-count=1)")"
           echo "Release version: $version"
           echo "version=$version" >> $GITHUB_OUTPUT
-  compile-pypi:
+  build-pypi:
     if: github.repository_owner == 'bentoml'
     needs:
       - release
     name: Compile PyPI distribution for OpenLLM
-    uses: bentoml/OpenLLM/.github/workflows/compile-pypi.yml@main # ratchet:exclude
+    uses: bentoml/OpenLLM/.github/workflows/build-pypi.yml@main # ratchet:exclude
     with:
       tags: ${{ needs.release.outputs.version }}
   publish-python:
     needs:
-      - compile-pypi
+      - build-pypi
     runs-on: ubuntu-latest
     permissions:
       id-token: write
@@ -85,32 +85,12 @@ jobs:
         with:
           ref: '${{ needs.release.outputs.version }}'
           token: ${{ secrets.OPENLLM_PAT }}
-      # NOTE: Keep this section in sync with compile-pypi.yml
+      # NOTE: Keep this section in sync with build-pypi.yml
       - name: Download Python artifacts
         uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
         with:
           name: python-artefacts
           path: dist
-      # - name: Download Linux x86_64 compiled artifacts
-      #   uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
-      #   with:
-      #     name: linux-x86_64-mypyc-wheels
-      #     path: dist
-      # - name: Download MacOS x86_64 compiled artifacts
-      #   uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
-      #   with:
-      #     name: macos-x86_64-mypyc-wheels
-      #     path: dist
-      # - name: Download MacOS arm64 compiled artifacts
-      #   uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
-      #   with:
-      #     name: macos-arm64-mypyc-wheels
-      #     path: dist
-      # - name: Download MacOS universal2 compiled artifacts
-      #   uses: actions/download-artifact@7a1cd3216ca9260cd8022db641d960b1db4d1be4 # ratchet:actions/download-artifact@v3
-      #   with:
-      #     name: macos-universal2-mypyc-wheels
-      #     path: dist
       - name: Smoke test compiled artefacts
         run: ls -R dist
       - name: Publish to PyPI
@@ -176,7 +156,7 @@ jobs:
           git push origin HEAD:main
   binary-distribution:
     if: github.repository_owner == 'bentoml'
-    needs: compile-pypi
+    needs: build-pypi
     name: Create binary/wheels distribution
     uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main # ratchet:exclude
   release-notes:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b0437b2a..ba3d8cd9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,11 @@ repos:
       - id: ruff
         alias: r
         verbose: true
-        args: [--exit-non-zero-on-fix, --show-fixes]
+        args: [--exit-non-zero-on-fix, --show-fixes, --fix]
+        types_or: [python, pyi, jupyter]
+      - id: ruff-format
+        alias: rf
+        verbose: true
         types_or: [python, pyi, jupyter]
   - repo: local
     hooks:
diff --git a/.ruff.toml b/.ruff.toml
index 7cc1e070..deecc257 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -1,6 +1,21 @@
 exclude = ["tools", "examples", "openllm-python/src/openllm_cli/playground/"]
 extend-include = ["*.ipynb"]
 preview = true
+line-length = 119
+indent-width = 2
+
+[format]
+preview = true
+quote-style = "single"
+indent-style = "space"
+skip-magic-trailing-comma = true
+
+[lint]
+typing-modules = [
+    "openllm_core._typing_compat",
+    "openllm_client._typing_compat",
+]
+ignore = ["RUF012"]
 select = [
     "F",
     "G",    # flake8-logging-format
@@ -26,19 +41,6 @@ select = [
     "W293",
     "UP039", # unnecessary-class-parentheses
 ]
-ignore = ["RUF012"]
-line-length = 150
-indent-width = 2
-typing-modules = [
-    "openllm_core._typing_compat",
-    "openllm_client._typing_compat",
-]
-
-[format]
-preview = true
-quote-style = "single"
-indent-style = "space"
-skip-magic-trailing-comma = true
 
 [lint.pydocstyle]
 convention = "google"
diff --git a/ADDING_NEW_MODEL.md b/ADDING_NEW_MODEL.md
index 15996e19..b8a846d5 100644
--- a/ADDING_NEW_MODEL.md
+++ b/ADDING_NEW_MODEL.md
@@ -8,14 +8,14 @@ environment by referring to our
 ## Procedure
 
 All the relevant code for incorporating a new model resides within
-[`$GIT_ROOT/openllm-core/src/openllm_core/config`](../openllm-core/src/openllm_core/config/) `model_name` in snake_case.
+[`$GIT_ROOT/openllm-core/src/openllm_core/config`](/openllm-core/src/openllm_core/config/) `model_name` in snake_case.
 Here's your roadmap:
 
 - [ ] Generate model configuration file:
       `$GIT_ROOT/openllm-core/src/openllm_core/config/configuration_{model_name}.py`
 - [ ] Update `$GIT_ROOT/openllm-core/src/openllm_core/config/__init__.py` to import the new model
 - [ ] Add your new model entry in `$GIT_ROOT/openllm-core/src/openllm_core/config/configuration_auto.py` with a tuple of the `model_name` alongside with the `ModelConfig`
-- [ ] Run `./tools/update-config-stubs.py` and `./tools/update-readme.py`
+- [ ] Run `bash all.sh`
 
 > [!NOTE]
 >
diff --git a/README.md b/README.md
index 59a46eb4..9cf84650 100644
--- a/README.md
+++ b/README.md
@@ -1004,7 +1004,7 @@ openllm start facebook/opt-125m --backend pt
 Run the following command to quickly spin up a Phi server:
 
 ```bash
-TRUST_REMOTE_CODE=True openllm start microsoft/phi-1_5
+TRUST_REMOTE_CODE=True openllm start microsoft/phi-2
 ```
 In a different terminal, run the following command to interact with the server:
 
@@ -1023,6 +1023,7 @@ openllm query 'What are large language models?'
 You can specify any of the following Phi models via `openllm start`:
 
 
+- [microsoft/phi-2](https://huggingface.co/microsoft/phi-2)
 - [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5)
 
 ### Supported backends
@@ -1041,7 +1042,7 @@ OpenLLM will support vLLM and PyTorch as default backend. By default, it will us
 To install vLLM, run `pip install "openllm[vllm]"`
 
 ```bash
-TRUST_REMOTE_CODE=True openllm start microsoft/phi-1_5 --backend vllm
+TRUST_REMOTE_CODE=True openllm start microsoft/phi-2 --backend vllm
 ```
 
 
@@ -1056,7 +1057,7 @@ TRUST_REMOTE_CODE=True openllm start microsoft/phi-1_5 --backend vllm
 
 
 ```bash
-TRUST_REMOTE_CODE=True openllm start microsoft/phi-1_5 --backend pt
+TRUST_REMOTE_CODE=True openllm start microsoft/phi-2 --backend pt
 ```
 
 </details>
diff --git a/cz.py b/cz.py
index 2d7e38af..f8b6ee8c 100755
--- a/cz.py
+++ b/cz.py
@@ -22,13 +22,19 @@ def run_cz(args):
       with tokenize.open(filepath) as file_:
         tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
         token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens]))
-        table.append([filepath.replace(os.path.join(args.dir, 'src'), ''), line_count, token_count / line_count if line_count != 0 else 0])
+        table.append([
+          filepath.replace(os.path.join(args.dir, 'src'), ''),
+          line_count,
+          token_count / line_count if line_count != 0 else 0,
+        ])
   print(tabulate([headers, *sorted(table, key=lambda x: -x[1])], headers='firstrow', floatfmt='.1f') + '\n')
   print(
     tabulate(
       [
         (dir_name, sum([x[1] for x in group]))
-        for dir_name, group in itertools.groupby(sorted([(x[0].rsplit('/', 1)[0], x[1]) for x in table]), key=lambda x: x[0])
+        for dir_name, group in itertools.groupby(
+          sorted([(x[0].rsplit('/', 1)[0], x[1]) for x in table]), key=lambda x: x[0]
+        )
       ],
       headers=['Directory', 'LOC'],
       floatfmt='.1f',
@@ -46,6 +52,10 @@ if __name__ == '__main__':
 
   parser = argparse.ArgumentParser()
   parser.add_argument(
-    '--dir', choices=['openllm-python', 'openllm-core', 'openllm-client'], help='directory to check', default='openllm-python', required=False
+    '--dir',
+    choices=['openllm-python', 'openllm-core', 'openllm-client'],
+    help='directory to check',
+    default='openllm-python',
+    required=False,
   )
   raise SystemExit(run_cz(parser.parse_args()))
diff --git a/local.sh b/local.sh
index 8ce8bb45..42ab8ddb 100755
--- a/local.sh
+++ b/local.sh
@@ -98,9 +98,19 @@ else
   EXTENSIONS_STR=${EXTENSIONS_STR// /,} # Replace spaces with commas
 fi
 
-pip install -e "$GIT_ROOT/openllm-core"
-pip install -e "$GIT_ROOT/openllm-client"
-pip install -e "$GIT_ROOT/openllm-python$EXTENSIONS_STR" -v
+# check if uv is installed
+if ! command -v uv > /dev/null 2>&1; then
+  echo "Installing uv..."
+  curl -LsSf https://astral.sh/uv/install.sh | sh
+fi
+
+# check if there is a $GIT_ROOT/.venv directory, if not, create it
+if [ ! -d "$GIT_ROOT/.venv" ]; then
+  uv venv
+fi
+
+uv pip install --editable "$GIT_ROOT/openllm-python$EXTENSIONS_STR"
+uv pip install --editable "$GIT_ROOT/openllm-core"
+uv pip install --editable "$GIT_ROOT/openllm-client"
 
-# running all script
 bash "$GIT_ROOT/all.sh"