infra: update scripts to run update readme automatically (#658)

* infra: update scripts to run update readme automatically Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: cleanup mirror Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore(dropdown): correctly format noteblock and important block Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: whitespace aware Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-08-01 10:38:50 -04:00 · 2023-11-15 02:22:49 -05:00
parent 2ea2f3fd4f
commit 034e08cf08
7 changed files with 1463 additions and 823 deletions
--- a/ADDING_NEW_MODEL.md
+++ b/ADDING_NEW_MODEL.md
@@ -15,7 +15,7 @@ Here's your roadmap:
      `$GIT_ROOT/openllm-core/src/openllm_core/config/configuration_{model_name}.py`
 - [ ] Update `$GIT_ROOT/openllm-core/src/openllm_core/config/__init__.py` to import the new model
 - [ ] Add your new model entry in `$GIT_ROOT/openllm-core/src/openllm_core/config/configuration_auto.py` with a tuple of the `model_name` alongside with the `ModelConfig`
- [ ] Run `./tools/update-config-stubs.py`
+- [ ] Run `./tools/update-config-stubs.py` and `./tools/update-readme.py`

 > [!NOTE]
 >
@@ -40,8 +40,3 @@ the `__init__` file, and the tuple under `CONFIG_MAPPING_NAMES` in [openllm-core
 Once you have completed the checklist above, raise a PR and the OpenLLMs
 maintainer will review it ASAP. Once the PR is merged, you should be able to see
 your model in the next release! 🎉 🎊
-
-### Updating README.md
-
-After a model is added, just ping OpenLLM's maintainers to update the README.md file
-with the new model.
--- a/README.md
+++ b/README.md
--- a/cz.py
+++ b/cz.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-from __future__ import annotations
 import itertools
 import os
 import token
@@ -9,13 +8,15 @@ from tabulate import tabulate

 TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]

+_ignored = ['_version.py']

-def run_cz(dir: str, package: str):
+
+def run_cz(dir, package):
  headers = ['Name', 'Lines', 'Tokens/Line']
  table = []
  for path, _, files in os.walk(os.path.join(dir, 'src', package)):
    for name in files:
-      if not name.endswith('.py'):
+      if not name.endswith('.py') or name in _ignored:
        continue
      filepath = os.path.join(path, name)
      with tokenize.open(filepath) as file_:
@@ -28,7 +29,6 @@ def run_cz(dir: str, package: str):
            token_count / line_count if line_count != 0 else 0,
          ]
        )
-  print(f'\n{"=" * 80}\n')
  print(tabulate([headers, *sorted(table, key=lambda x: -x[1])], headers='firstrow', floatfmt='.1f') + '\n')
  print(
    tabulate(
@@ -43,15 +43,8 @@ def run_cz(dir: str, package: str):
    )
  )
  print(f'total line count for {package}: {sum([x[1] for x in table])}\n')
-
-
-def main() -> int:
-  run_cz('openllm-python', 'openllm')
-  run_cz('openllm-python', 'openllm_cli')
-  run_cz('openllm-core', 'openllm_core')
-  run_cz('openllm-client', 'openllm_client')
  return 0


 if __name__ == '__main__':
-  raise SystemExit(main())
+  raise SystemExit((lambda: run_cz('openllm-python', 'openllm') or 0)())
--- a/openllm-core/src/openllm_core/config/configuration_auto.py
+++ b/openllm-core/src/openllm_core/config/configuration_auto.py
@@ -28,21 +28,23 @@ else:

 # NOTE: This is the entrypoint when adding new model config
 CONFIG_MAPPING_NAMES = OrderedDict(
-  [
-    ('chatglm', 'ChatGLMConfig'),
-    ('dolly_v2', 'DollyV2Config'),
-    ('falcon', 'FalconConfig'),
-    ('flan_t5', 'FlanT5Config'),
-    ('gpt_neox', 'GPTNeoXConfig'),
-    ('llama', 'LlamaConfig'),
-    ('mpt', 'MPTConfig'),
-    ('opt', 'OPTConfig'),
-    ('stablelm', 'StableLMConfig'),
-    ('starcoder', 'StarCoderConfig'),
-    ('mistral', 'MistralConfig'),
-    ('yi', 'YiConfig'),
-    ('baichuan', 'BaichuanConfig'),
-  ]
+  sorted(
+    [
+      ('chatglm', 'ChatGLMConfig'),
+      ('dolly_v2', 'DollyV2Config'),
+      ('falcon', 'FalconConfig'),
+      ('flan_t5', 'FlanT5Config'),
+      ('gpt_neox', 'GPTNeoXConfig'),
+      ('llama', 'LlamaConfig'),
+      ('mpt', 'MPTConfig'),
+      ('opt', 'OPTConfig'),
+      ('stablelm', 'StableLMConfig'),
+      ('starcoder', 'StarCoderConfig'),
+      ('mistral', 'MistralConfig'),
+      ('yi', 'YiConfig'),
+      ('baichuan', 'BaichuanConfig'),
+    ]
+  )
 )


--- a/openllm-core/src/openllm_core/config/configuration_mistral.py
+++ b/openllm-core/src/openllm_core/config/configuration_mistral.py
@@ -34,7 +34,12 @@ class MistralConfig(openllm_core.LLMConfig):
    'serialisation': 'safetensors',
    # NOTE: see https://docs.mistral.ai/usage/guardrailing/
    # and https://docs.mistral.ai/llm/mistral-instruct-v0.1
-    'model_ids': ['mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mistral-7B-v0.1'],
+    'model_ids': [
+      'HuggingFaceH4/zephyr-7b-alpha',
+      'HuggingFaceH4/zephyr-7b-beta',
+      'mistralai/Mistral-7B-Instruct-v0.1',
+      'mistralai/Mistral-7B-v0.1',
+    ],
    'fine_tune_strategies': (
      {'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none'},
    ),
--- a/openllm-python/README.md
+++ b/openllm-python/README.md
--- a/tools/update-readme.py
+++ b/tools/update-readme.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+import os
+import shutil
+import sys
+
+import tomlkit
+
+START_COMMENT = f'<!-- {os.path.basename(__file__)}: start -->\n'
+END_COMMENT = f'<!-- {os.path.basename(__file__)}: stop -->\n'
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.join(ROOT, 'openllm-core', 'src'))
+from openllm_core.config import CONFIG_MAPPING
+
+
+def markdown_noteblock(text: str):
+  return ['\n', f'> **Note:** {text}\n']
+
+
+def markdown_importantblock(text: str):
+  return ['\n', f'> **Important:** {text}\n']
+
+
+def main() -> int:
+  with open(os.path.join(ROOT, 'openllm-python', 'pyproject.toml'), 'r') as f:
+    deps = tomlkit.parse(f.read()).value['project']['optional-dependencies']
+  with open(os.path.join(ROOT, 'README.md'), 'r') as f:
+    readme = f.readlines()
+
+  start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT)
+
+  content = []
+
+  for it in CONFIG_MAPPING.values():
+    it = it()
+    details_block = ['<details>\n']
+    architecture_name = it.__class__.__name__[:-6]
+    details_block.extend(
+      [
+        f'<summary>{architecture_name}</summary>\n\n',
+        '### Quickstart\n',
+        f'Run the following command to quickly spin up a {architecture_name} server:\n',
+        f"""\
+```bash
+{'' if not it['trust_remote_code'] else 'TRUST_REMOTE_CODE=True '}openllm start {it['default_id']}
+```""",
+        'In a different terminal, run the following command to interact with the server:\n',
+        """\
+```bash
+export OPENLLM_ENDPOINT=http://localhost:3000
+openllm query 'What are large language models?'
+```""",
+        *markdown_noteblock(
+          f"Any {architecture_name} variants can be deployed with OpenLLM. Visit the [HuggingFace Model Hub](https://huggingface.co/models?sort=trending&search={it['model_name']}) to see more {architecture_name}-compatible models.\n"
+        ),
+        '\n### Supported models\n',
+        f'You can specify any of the following {architecture_name} models via `openllm start`:\n\n',
+      ]
+    )
+    list_ids = [f'- [{model_id}](https://huggingface.co/{model_id})' for model_id in it['model_ids']]
+    details_block.extend(list_ids)
+    details_block.extend(
+      [
+        '### Supported backends\n',
+        'OpenLLM will support vLLM and PyTorch as default backend. By default, it will use vLLM if vLLM is available, otherwise fallback to PyTorch.\n',
+        *markdown_importantblock(
+          'We recommend user to explicitly specify `--backend` to choose the desired backend to run the model. If you have access to a GPU, always use `--backend vllm`.\n'
+        ),
+      ]
+    )
+    if 'vllm' in it['backend']:
+      details_block.extend(
+        [
+          '\n- vLLM (Recommended):\n\n',
+          'To install vLLM, run `pip install "openllm[vllm]"`\n',
+          f"""\
+```bash
+openllm start {it['model_ids'][0]} --backend vllm
+```""",
+          *markdown_importantblock(
+            'Using vLLM requires a GPU that has architecture newer than 8.0 to get the best performance for serving. It is recommended that for all serving usecase in production, you should choose vLLM for serving.'
+          ),
+          *markdown_noteblock('Currently, adapters are yet to be supported with vLLM.'),
+        ]
+      )
+    if 'pt' in it['backend']:
+      details_block.extend(
+        [
+          '\n- PyTorch:\n\n',
+          f"""\
+```bash
+openllm start {it['model_ids'][0]} --backend pt
+```""",
+        ]
+      )
+
+    details_block.append('\n</details>\n\n')
+    content.append('\n'.join(details_block))
+
+  readme = readme[:start_index] + [START_COMMENT] + content + [END_COMMENT] + readme[stop_index + 1 :]
+  with open(os.path.join(ROOT, 'README.md'), 'w') as f:
+    f.writelines(readme)
+
+  shutil.copyfile(os.path.join(ROOT, 'README.md'), os.path.join(ROOT, 'openllm-python', 'README.md'))
+  return 0
+
+
+if __name__ == '__main__':
+  raise SystemExit(main())