diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
index fbbf2cf7..c7ca53b9 100644
--- a/openllm-core/src/openllm_core/_configuration.py
+++ b/openllm-core/src/openllm_core/_configuration.py
@@ -69,49 +69,29 @@ _object_setattr = object.__setattr__
 
 @attr.frozen(slots=True, repr=False, init=False)
 class GenerationConfig(ReprMixin):
-  '''GenerationConfig is the attrs-compatible version of ``transformers.GenerationConfig``, with some additional validation and environment constructor.
-
-  Note that we always set `do_sample=True`. This class is not designed to be used directly, rather
-  to be used conjunction with LLMConfig. The instance of the generation config can then be accessed
-  via ``LLMConfig.generation_config``.
-  '''
-
-  max_new_tokens: int = dantic.Field(
-    20, ge=0, description='The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.'
-  )
+  max_new_tokens: int = dantic.Field(20, ge=0, description='The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.')
   min_length: int = dantic.Field(
-    0,
-    ge=0,
+    0, ge=0, #
     description='The minimum length of the sequence to be generated. Corresponds to the length of the input prompt + `min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.',
   )
-  min_new_tokens: int = dantic.Field(
-    description='The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.'
-  )
+  min_new_tokens: int = dantic.Field(description='The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.')
   early_stopping: bool = dantic.Field(
     False,
-    description="""Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values: `True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates; `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm) """,
-  )
-  max_time: float = dantic.Field(
-    description='The maximum amount of time you allow the computation to run for in seconds. generation will still finish the current pass after allocated time has been passed.'
+    description="Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values: `True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates; `'never'`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm) ",
   )
+  max_time: float = dantic.Field(description='The maximum amount of time you allow the computation to run for in seconds. generation will still finish the current pass after allocated time has been passed.')
   num_beams: int = dantic.Field(1, description='Number of beams for beam search. 1 means no beam search.')
   num_beam_groups: int = dantic.Field(
     1,
     description='Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details.',
   )
-  penalty_alpha: float = dantic.Field(
-    description='The values balance the model confidence and the degeneration penalty in contrastive search decoding.'
-  )
+  penalty_alpha: float = dantic.Field(description='The values balance the model confidence and the degeneration penalty in contrastive search decoding.')
   use_cache: bool = dantic.Field(
     True,
     description='Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.',
   )
-  temperature: float = dantic.Field(
-    1.0, ge=0.0, le=1.0, description='The value used to modulate the next token probabilities.'
-  )
-  top_k: int = dantic.Field(
-    50, description='The number of highest probability vocabulary tokens to keep for top-k-filtering.'
-  )
+  temperature: float = dantic.Field(1.0, ge=0.0, le=1.0, description='The value used to modulate the next token probabilities.')
+  top_k: int = dantic.Field(50, description='The number of highest probability vocabulary tokens to keep for top-k-filtering.')
   top_p: float = dantic.Field(
     1.0,
     description='If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.',
@@ -196,44 +176,29 @@ class GenerationConfig(ReprMixin):
   )
   pad_token_id: int = dantic.Field(description='The id of the *padding* token.')
   bos_token_id: int = dantic.Field(description='The id of the *beginning-of-sequence* token.')
-  eos_token_id: t.Union[int, t.List[int]] = dantic.Field(
-    description='The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.'
-  )
+  eos_token_id: t.Union[int, t.List[int]] = dantic.Field(description='The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.')
   encoder_no_repeat_ngram_size: int = dantic.Field(
     0,
     description='If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.',
   )
-  decoder_start_token_id: int = dantic.Field(
-    description='If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.'
-  )
+  decoder_start_token_id: int = dantic.Field(description='If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.')
   # NOTE: This is now implemented and supported for both PyTorch and vLLM
   logprobs: int = dantic.Field(0, description='Number of log probabilities to return per output token.')
   prompt_logprobs: int = dantic.Field(0, description='Number of log probabilities to return per input token.')
-
   def __init__(self, *, _internal: bool = False, **attrs: t.Any):
-    if not _internal:
-      raise RuntimeError(
-        'GenerationConfig is not meant to be used directly, but you can access this via a LLMConfig.generation_config'
-      )
+    if not _internal: raise RuntimeError('GenerationConfig is not meant to be used directly, but you can access this via a LLMConfig.generation_config')
     self.__attrs_init__(**attrs)
-
   def __getitem__(self, item: str) -> t.Any:
-    if hasattr(self, item):
-      return getattr(self, item)
+    if hasattr(self, item): return getattr(self, item)
     raise KeyError(f"'{self.__class__.__name__}' has no attribute {item}.")
-
   @property
-  def __repr_keys__(self) -> set[str]:
-    return {i.name for i in attr.fields(self.__class__)}
+  def __repr_keys__(self) -> set[str]: return {i.name for i in attr.fields(self.__class__)}
 
 
 converter.register_unstructure_hook_factory(
   lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig),
   lambda cls: make_dict_unstructure_fn(
-    cls,
-    converter,
-    _cattrs_omit_if_default=False,
-    _cattrs_use_linecache=True,
+    cls, converter, #
     **{k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)},
   ),
 )
@@ -394,15 +359,6 @@ _object_getattribute = object.__getattribute__
 
 
 class ModelSettings(t.TypedDict, total=False):
-  '''ModelSettings serve only for typing purposes as this is transcribed into LLMConfig.__config__.
-
-  Note that all fields from this dictionary will then be converted to __openllm_*__ fields in LLMConfig.
-
-  If the field below changes, make sure to run ./tools/update-config-stubs.py to generate correct __getitem__
-  stubs for type-checking purposes.
-  '''
-
-  # NOTE: These required fields should be at the top, as it will be kw_only
   default_id: Required[str]
   model_ids: Required[ListStr]
   architecture: Required[str]
@@ -435,15 +391,13 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin
 
 
 @attr.define(
-  frozen=False,
-  slots=True,
+  frozen=False, slots=True, #
   field_transformer=lambda _, __: [
     attr.Attribute.from_counting_attr(
       k,
       dantic.Field(
         kw_only=False if t.get_origin(ann) is not Required else True,
-        auto_default=True,
-        use_default_converter=False,
+        auto_default=True, use_default_converter=False, #
         type=_transformed_type.get(k, ann),
         metadata={'target': f'__openllm_{k}__'},
         description=f'ModelSettings field for {k}.',
@@ -454,11 +408,10 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin
 )
 class _ModelSettingsAttr:
   def __getitem__(self, key: str) -> t.Any:
-    if key in codegen.get_annotations(ModelSettings):
-      return _object_getattribute(self, key)
+    if key in codegen.get_annotations(ModelSettings): return _object_getattribute(self, key)
     raise KeyError(key)
-
-  # NOTE: The below are dynamically generated by the field_transformer
+  @classmethod
+  def from_settings(cls, settings: ModelSettings) -> _ModelSettingsAttr: return cls(**settings)
   if t.TYPE_CHECKING:
     # update-config-stubs.py: attrs start
     default_id: str
@@ -479,22 +432,15 @@ class _ModelSettingsAttr:
     fine_tune_strategies: t.Dict[AdapterType, FineTuneConfig]
     # update-config-stubs.py: attrs stop
 
-
-_DEFAULT = _ModelSettingsAttr(
-  **ModelSettings(
-    default_id='__default__',
-    model_ids=['__default__'],
-    architecture='PreTrainedModel',
-    serialisation='legacy',
+_DEFAULT = _ModelSettingsAttr.from_settings(
+  ModelSettings(
+    name_type='dasherize', url='', #
     backend=('pt', 'vllm', 'ctranslate'),
-    name_type='dasherize',
-    url='',
-    model_type='causal_lm',
-    trust_remote_code=False,
-    requirements=None,
-    timeout=int(36e6),
-    service_name='',
-    workers_per_resource=1.0,
+    timeout=int(36e6), service_name='', #
+    model_type='causal_lm', requirements=None, #
+    trust_remote_code=False, workers_per_resource=1.0, #
+    default_id='__default__', model_ids=['__default__'], #
+    architecture='PreTrainedModel', serialisation='legacy', #
   )
 )
 
@@ -504,62 +450,39 @@ def structure_settings(cls: type[LLMConfig], _: type[_ModelSettingsAttr]) -> _Mo
   has_custom_name = all(i in cls.__config__ for i in {'model_name', 'start_name'})
   _config = attr.evolve(_DEFAULT, **cls.__config__)
   _attr = {}
-
   if not has_custom_name:
-    _attr['model_name'] = inflection.underscore(_cl_name) if _config['name_type'] == 'dasherize' else _cl_name.lower()
-    _attr['start_name'] = (
-      inflection.dasherize(_attr['model_name']) if _config['name_type'] == 'dasherize' else _attr['model_name']
-    )
-  model_name = _attr['model_name'] if 'model_name' in _attr else _config.model_name
-
+    if _config['name_type'] == 'dasherize':
+      _attr['model_name'] = inflection.underscore(_cl_name)
+      _attr['start_name'] = inflection.dasherize(_attr['model_name'])
+    else:
+      _attr['model_name'] = _cl_name.lower()
+      _attr['start_name'] = _attr['model_name']
   _attr.update(
     {
-      'service_name': f'generated_{model_name}_service.py',
+      'service_name': f'generated_{_attr["model_name"] if "model_name" in _attr else _config.model_name}_service.py',
       'fine_tune_strategies': {
-        ft_config.get('adapter_type', 'lora'): FineTuneConfig.from_config(ft_config, cls)
-        for ft_config in _config.fine_tune_strategies  # ft_config is a dict here before transformer
-      }
-      if _config.fine_tune_strategies
-      else {},
+        ft_config.get('adapter_type', 'lora'): FineTuneConfig.from_config(ft_config, cls) for ft_config in _config.fine_tune_strategies
+      } if _config.fine_tune_strategies else {},
     }
   )
-
   return attr.evolve(_config, **_attr)
 
 
 converter.register_structure_hook(_ModelSettingsAttr, structure_settings)
 
 
-def _setattr_class(attr_name: str, value_var: t.Any) -> str:
-  return f"setattr(cls, '{attr_name}', {value_var})"
-
-
-def _make_assignment_script(
-  cls: type[LLMConfig], attributes: attr.AttrsInstance, _prefix: LiteralString = 'openllm'
-) -> t.Callable[..., None]:
+_reserved_namespace = {'__config__', 'GenerationConfig', 'SamplingParams'}
+def _setattr_class(attr_name: str, value_var: t.Any) -> str: return f"setattr(cls, '{attr_name}', {value_var})"
+def _make_assignment_script(cls: type[LLMConfig], attributes: attr.AttrsInstance) -> t.Callable[[type[LLMConfig]], None]:
   '''Generate the assignment script with prefix attributes __openllm_<value>__.'''
-  args: ListStr = []
-  globs: DictStrAny = {
-    'cls': cls,
-    '_cached_attribute': attributes,
-    '_cached_getattribute_get': _object_getattribute.__get__,
-  }
-  annotations: DictStrAny = {'return': None}
-
-  lines: ListStr = []
+  args, lines, annotations = [], [], {'return': None}
+  globs = {'cls': cls, '_cached_attribute': attributes}
   for attr_name, field in attr.fields_dict(attributes.__class__).items():
-    arg_name = field.metadata.get('target', f'__{_prefix}_{inflection.underscore(attr_name)}__')
+    arg_name = field.metadata.get('target', f'__openllm_{inflection.underscore(attr_name)}__')
     args.append(f"{attr_name}=getattr(_cached_attribute, '{attr_name}')")
     lines.append(_setattr_class(arg_name, attr_name))
     annotations[attr_name] = field.type
-
-  return codegen.generate_function(
-    cls, '__assign_attr', lines, args=('cls', *args), globs=globs, annotations=annotations
-  )
-
-
-_reserved_namespace = {'__config__', 'GenerationConfig', 'SamplingParams'}
-
+  return codegen.generate_function(cls, '__assign_attr', lines, ('cls', *args), globs, annotations)
 
 @attr.define(slots=True)
 class _ConfigAttr(t.Generic[_GenerationConfigT, _SamplingParamsT]):
@@ -958,8 +881,7 @@ class LLMConfig(_ConfigAttr[GenerationConfig, SamplingParams]):
       logger.warning("LLMConfig subclass should end with 'Config'. Updating to %sConfig", cls.__name__)
       cls.__name__ = f'{cls.__name__}Config'
 
-    if not hasattr(cls, '__config__'):
-      raise RuntimeError("Given LLMConfig must have '__config__' that is not None defined.")
+    if not hasattr(cls, '__config__'): raise RuntimeError("Given LLMConfig must have '__config__' that is not None defined.")
 
     # auto assignment attributes generated from __config__ after create the new slot class.
     _make_assignment_script(cls, converter.structure(cls, _ModelSettingsAttr))(cls)
diff --git a/openllm-core/src/openllm_core/_typing_compat.py b/openllm-core/src/openllm_core/_typing_compat.py
index 5210f4b6..9dc392a1 100644
--- a/openllm-core/src/openllm_core/_typing_compat.py
+++ b/openllm-core/src/openllm_core/_typing_compat.py
@@ -1,46 +1,27 @@
 from __future__ import annotations
+from typing import Callable, Dict, Tuple, List, Literal, Any, TypeVar
 import sys
-import typing as t
 
 import attr
 
-if t.TYPE_CHECKING:
-  from ctranslate2 import Generator, Translator
-  from peft.peft_model import PeftModel
-  from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
+M = TypeVar('M')
+T = TypeVar('T')
 
-  from .utils.lazy import VersionInfo
-else:
-  # NOTE: t.Any is also a type
-  PeftModel = (
-    PreTrainedModel
-  ) = PreTrainedTokenizer = PreTrainedTokenizerBase = PreTrainedTokenizerFast = Generator = Translator = t.Any
-  # NOTE: that VersionInfo is from openllm.utils.lazy.VersionInfo
-  VersionInfo = t.Any
+def get_literal_args(typ: Any) -> Tuple[str, ...]: return getattr(typ, '__args__', tuple())
+AnyCallable = Callable[..., Any]
+DictStrAny = Dict[str, Any]
+ListStr = List[str]
+At = TypeVar('At', bound=attr.AttrsInstance)
+LiteralDtype = Literal['float16', 'float32', 'bfloat16', 'int8', 'int16']
+LiteralSerialisation = Literal['safetensors', 'legacy']
+LiteralQuantise = Literal['int8', 'int4', 'gptq', 'awq', 'squeezellm']
+LiteralBackend = Literal['pt', 'vllm', 'ctranslate', 'triton']  # TODO: ggml
+AdapterType = Literal['lora', 'adalora', 'adaption_prompt', 'prefix_tuning', 'p_tuning', 'prompt_tuning', 'ia3', 'loha', 'lokr']
+LiteralVersionStrategy = Literal['release', 'nightly', 'latest', 'custom']
 
-M = t.TypeVar('M', bound=t.Union[PreTrainedModel, PeftModel, Generator, Translator])
-T = t.TypeVar('T', bound=t.Union[PreTrainedTokenizerFast, PreTrainedTokenizer, PreTrainedTokenizerBase])
+class AdapterTuple(Tuple[Any, ...]): adapter_id: str; name: str ; config: DictStrAny
 
-
-def get_literal_args(typ: t.Any) -> tuple[str, ...]:
-  return getattr(typ, '__args__', tuple())
-
-
-AnyCallable = t.Callable[..., t.Any]
-DictStrAny = t.Dict[str, t.Any]
-ListAny = t.List[t.Any]
-ListStr = t.List[str]
-TupleAny = t.Tuple[t.Any, ...]
-At = t.TypeVar('At', bound=attr.AttrsInstance)
-
-LiteralDtype = t.Literal['float16', 'float32', 'bfloat16', 'int8', 'int16']
-LiteralSerialisation = t.Literal['safetensors', 'legacy']
-LiteralQuantise = t.Literal['int8', 'int4', 'gptq', 'awq', 'squeezellm']
-LiteralBackend = t.Literal['pt', 'vllm', 'ctranslate', 'triton']  # TODO: ggml
-AdapterType = t.Literal[
-  'lora', 'adalora', 'adaption_prompt', 'prefix_tuning', 'p_tuning', 'prompt_tuning', 'ia3', 'loha', 'lokr'
-]
-LiteralVersionStrategy = t.Literal['release', 'nightly', 'latest', 'custom']
+AdapterMap = Dict[AdapterType, Tuple[AdapterTuple, ...]]
 
 if sys.version_info[:2] >= (3, 11):
   from typing import (
@@ -75,12 +56,3 @@ if sys.version_info[:2] >= (3, 9):
   from typing import Annotated as Annotated
 else:
   from typing_extensions import Annotated as Annotated
-
-
-class AdapterTuple(TupleAny):
-  adapter_id: str
-  name: str
-  config: DictStrAny
-
-
-AdapterMap = t.Dict[AdapterType, t.Tuple[AdapterTuple, ...]]
diff --git a/openllm-core/src/openllm_core/utils/codegen.py b/openllm-core/src/openllm_core/utils/codegen.py
index be3d2697..939ef750 100644
--- a/openllm-core/src/openllm_core/utils/codegen.py
+++ b/openllm-core/src/openllm_core/utils/codegen.py
@@ -11,9 +11,7 @@ import orjson
 
 if t.TYPE_CHECKING:
   import openllm_core
-  from openllm_core._typing_compat import AnyCallable, DictStrAny, ListStr, LiteralString
-
-  PartialAny = functools.partial[t.Any]
+  from openllm_core._typing_compat import AnyCallable, DictStrAny, LiteralString
 
 _T = t.TypeVar('_T', bound=t.Callable[..., t.Any])
 logger = logging.getLogger(__name__)
@@ -21,24 +19,17 @@ logger = logging.getLogger(__name__)
 # sentinel object for unequivocal object() getattr
 _sentinel = object()
 
-
 def has_own_attribute(cls: type[t.Any], attrib_name: t.Any) -> bool:
-  """Check whether *cls* defines *attrib_name* (and doesn't just inherit it)."""
   attr = getattr(cls, attrib_name, _sentinel)
-  if attr is _sentinel:
-    return False
+  if attr is _sentinel: return False
   for base_cls in cls.__mro__[1:]:
     a = getattr(base_cls, attrib_name, None)
-    if attr is a:
-      return False
+    if attr is a: return False
   return True
 
-
 def get_annotations(cls: type[t.Any]) -> DictStrAny:
-  if has_own_attribute(cls, '__annotations__'):
-    return cls.__annotations__
-  return t.cast('DictStrAny', {})
-
+  if has_own_attribute(cls, '__annotations__'): return cls.__annotations__
+  return {}
 
 def is_class_var(annot: str | t.Any) -> bool:
   annot = str(annot)
@@ -47,7 +38,6 @@ def is_class_var(annot: str | t.Any) -> bool:
     annot = annot[1:-1]
   return annot.startswith(('typing.ClassVar', 't.ClassVar', 'ClassVar', 'typing_extensions.ClassVar'))
 
-
 def add_method_dunders(cls: type[t.Any], method_or_cls: _T, _overwrite_doc: str | None = None) -> _T:
   try:
     method_or_cls.__module__ = cls.__module__
@@ -63,13 +53,9 @@ def add_method_dunders(cls: type[t.Any], method_or_cls: _T, _overwrite_doc: str
     pass
   return method_or_cls
 
-
-def _compile_and_eval(script: str, globs: DictStrAny, locs: t.Any = None, filename: str = '') -> None:
-  eval(compile(script, filename, 'exec'), globs, locs)
-
-
+def _compile_and_eval(script: str, globs: DictStrAny, locs: t.Any = None, filename: str = '') -> None: eval(compile(script, filename, 'exec'), globs, locs)
 def _make_method(name: str, script: str, filename: str, globs: DictStrAny) -> AnyCallable:
-  locs: DictStrAny = {}
+  locs: dict[str, t.Any | AnyCallable] = {}
   # In order of debuggers like PDB being able to step through the code, we add a fake linecache entry.
   count = 1
   base_filename = filename
@@ -84,18 +70,16 @@ def _make_method(name: str, script: str, filename: str, globs: DictStrAny) -> An
   _compile_and_eval(script, globs, locs, filename)
   return locs[name]
 
-
 def make_attr_tuple_class(cls_name: str, attr_names: t.Sequence[str]) -> type[t.Any]:
   '''Create a tuple subclass to hold class attributes.
 
   The subclass is a bare tuple with properties for names.
 
   class MyClassAttributes(tuple):
-  __slots__ = ()
-  x = property(itemgetter(0))
+    __slots__ = ()
+    x = property(itemgetter(0))
   '''
   from . import SHOW_CODEGEN
-
   attr_class_name = f'{cls_name}Attributes'
   attr_class_template = [f'class {attr_class_name}(tuple):', '    __slots__ = ()']
   if attr_names:
@@ -103,113 +87,73 @@ def make_attr_tuple_class(cls_name: str, attr_names: t.Sequence[str]) -> type[t.
       attr_class_template.append(f'    {attr_name} = _attrs_property(_attrs_itemgetter({i}))')
   else:
     attr_class_template.append('    pass')
-  globs: DictStrAny = {'_attrs_itemgetter': itemgetter, '_attrs_property': property}
-  if SHOW_CODEGEN:
-    print(f'Generated class for {attr_class_name}:\n\n', '\n'.join(attr_class_template))
+  globs = {'_attrs_itemgetter': itemgetter, '_attrs_property': property}
+  if SHOW_CODEGEN: print(f'Generated class for {attr_class_name}:\n\n', '\n'.join(attr_class_template))
   _compile_and_eval('\n'.join(attr_class_template), globs)
   return globs[attr_class_name]
 
-
-def generate_unique_filename(cls: type[t.Any], func_name: str) -> str:
-  return f"<{cls.__name__} generated {func_name} {cls.__module__}.{getattr(cls, '__qualname__', cls.__name__)}>"
-
+def generate_unique_filename(cls: type[t.Any], func_name: str) -> str: return f"<{cls.__name__} generated {func_name} {cls.__module__}.{getattr(cls, '__qualname__', cls.__name__)}>"
 
 def generate_function(
-  typ: type[t.Any],
-  func_name: str,
+  typ: type[t.Any], func_name: str, #
   lines: list[str] | None,
   args: tuple[str, ...] | None,
   globs: dict[str, t.Any],
   annotations: dict[str, t.Any] | None = None,
 ) -> AnyCallable:
   from openllm_core.utils import SHOW_CODEGEN
-
-  script = 'def %s(%s):\n    %s\n' % (
-    func_name,
-    ', '.join(args) if args is not None else '',
-    '\n    '.join(lines) if lines else 'pass',
-  )
+  script = 'def %s(%s):\n    %s\n' % (func_name, ', '.join(args) if args is not None else '', '\n    '.join(lines) if lines else 'pass')
   meth = _make_method(func_name, script, generate_unique_filename(typ, func_name), globs)
-  if annotations:
-    meth.__annotations__ = annotations
-  if SHOW_CODEGEN:
-    print(f'Generated script for {typ}:\n\n', script)
+  if annotations: meth.__annotations__ = annotations
+  if SHOW_CODEGEN: print(f'Generated script for {typ}:\n\n', script)
   return meth
 
-
 def make_env_transformer(
-  cls: type[openllm_core.LLMConfig],
-  model_name: str,
+  cls: type[openllm_core.LLMConfig], model_name: str, #
   suffix: LiteralString | None = None,
   default_callback: t.Callable[[str, t.Any], t.Any] | None = None,
   globs: DictStrAny | None = None,
 ) -> AnyCallable:
   from openllm_core.utils import dantic, field_env_key
 
-  def identity(_: str, x_value: t.Any) -> t.Any:
-    return x_value
+  def identity(_: str, x_value: t.Any) -> t.Any: return x_value
 
-  default_callback = identity if default_callback is None else default_callback
   globs = {} if globs is None else globs
   globs.update(
     {
-      '__populate_env': dantic.env_converter,
-      '__default_callback': default_callback,
-      '__field_env': field_env_key,
-      '__suffix': suffix or '',
-      '__model_name': model_name,
+      '__populate_env': dantic.env_converter, '__field_env': field_env_key, #
+      '__suffix': suffix or '', '__model_name': model_name, #
+      '__default_callback': identity if default_callback is None else default_callback,
     }
   )
-  lines: ListStr = [
-    '__env=lambda field_name:__field_env(field_name,__suffix)',
-    "return [f.evolve(default=__populate_env(__default_callback(f.name,f.default),__env(f.name)),metadata={'env':f.metadata.get('env',__env(f.name)),'description':f.metadata.get('description', '(not provided)')}) for f in fields]",
-  ]
   fields_ann = 'list[attr.Attribute[t.Any]]'
   return generate_function(
-    cls,
-    '__auto_env',
-    lines,
-    args=('_', 'fields'),
-    globs=globs,
-    annotations={'_': 'type[LLMConfig]', 'fields': fields_ann, 'return': fields_ann},
+    cls, '__auto_env', #
+    ['__env=lambda field_name:__field_env(field_name,__suffix)', "return [f.evolve(default=__populate_env(__default_callback(f.name,f.default),__env(f.name)),metadata={'env':f.metadata.get('env',__env(f.name)),'description':f.metadata.get('description', '(not provided)')}) for f in fields]"],
+    ('_', 'fields'), globs, {'_': 'type[LLMConfig]', 'fields': fields_ann, 'return': fields_ann}, #
   )
 
 
 def gen_sdk(func: _T, name: str | None = None, **attrs: t.Any) -> _T:
   from .representation import ReprMixin
-
-  if name is None:
-    name = func.__name__.strip('_')
+  if name is None: name = func.__name__.strip('_')
   _signatures = inspect.signature(func).parameters
-
-  def _repr(self: ReprMixin) -> str:
-    return f'<generated function {name} {orjson.dumps(dict(self.__repr_args__()), option=orjson.OPT_NON_STR_KEYS | orjson.OPT_INDENT_2).decode()}>'
-
-  def _repr_args(self: ReprMixin) -> t.Iterator[t.Tuple[str, t.Any]]:
-    return ((k, _signatures[k].annotation) for k in self.__repr_keys__)
-
-  if func.__doc__ is None:
-    doc = f'Generated SDK for {func.__name__}'
-  else:
-    doc = func.__doc__
-  return t.cast(
-    _T,
-    functools.update_wrapper(
-      types.new_class(
-        name,
-        (t.cast('PartialAny', functools.partial), ReprMixin),
-        exec_body=lambda ns: ns.update(
-          {
-            '__repr_keys__': property(lambda _: [i for i in _signatures.keys() if not i.startswith('_')]),
-            '__repr_args__': _repr_args,
-            '__repr__': _repr,
-            '__doc__': inspect.cleandoc(doc),
-            '__module__': 'openllm',
-          }
-        ),
-      )(func, **attrs),
-      func,
-    ),
+  def _repr(self: ReprMixin) -> str: return f'<generated function {name} {orjson.dumps(dict(self.__repr_args__()), option=orjson.OPT_NON_STR_KEYS | orjson.OPT_INDENT_2).decode()}>'
+  def _repr_args(self: ReprMixin) -> t.Iterator[t.Tuple[str, t.Any]]: return ((k, _signatures[k].annotation) for k in self.__repr_keys__)
+  return functools.update_wrapper(
+    types.new_class(
+      name,
+      (functools.partial, ReprMixin),
+      exec_body=lambda ns: ns.update(
+        {
+          '__repr_keys__': property(lambda _: [i for i in _signatures.keys() if not i.startswith('_')]),
+          '__repr_args__': _repr_args, '__repr__': _repr, #
+          '__doc__': inspect.cleandoc(f'Generated SDK for {func.__name__}' if func.__doc__ is None else func.__doc__),
+          '__module__': 'openllm',
+        }
+      ),
+    )(func, **attrs),
+    func,
   )
 
 
diff --git a/openllm-core/src/openllm_core/utils/representation.py b/openllm-core/src/openllm_core/utils/representation.py
index 20f5c5d3..1ee36df0 100644
--- a/openllm-core/src/openllm_core/utils/representation.py
+++ b/openllm-core/src/openllm_core/utils/representation.py
@@ -1,35 +1,17 @@
 from __future__ import annotations
 import typing as t
 from abc import abstractmethod
-
-import attr
-import orjson
-
+import attr, orjson
 from openllm_core import utils
 
-if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import TypeAlias
-
+if t.TYPE_CHECKING: from openllm_core._typing_compat import TypeAlias
 ReprArgs: TypeAlias = t.Generator[t.Tuple[t.Optional[str], t.Any], None, None]
-
-
 class ReprMixin:
   @property
   @abstractmethod
-  def __repr_keys__(self) -> set[str]:
-    raise NotImplementedError
-
-  def __repr__(self):
-    return f'{self.__class__.__name__} {orjson.dumps({k: utils.converter.unstructure(v) if attr.has(v) else v for k, v in self.__repr_args__()}, option=orjson.OPT_INDENT_2).decode()}'
-
-  def __str__(self):
-    return self.__repr_str__(' ')
-
-  def __repr_name__(self) -> str:
-    return self.__class__.__name__
-
-  def __repr_str__(self, join_str: str):
-    return join_str.join(repr(v) if a is None else f'{a}={v!r}' for a, v in self.__repr_args__())
-
-  def __repr_args__(self) -> ReprArgs:
-    return ((k, getattr(self, k)) for k in self.__repr_keys__)
+  def __repr_keys__(self) -> set[str]: raise NotImplementedError
+  def __repr__(self) -> str: return f'{self.__class__.__name__} {orjson.dumps({k: utils.converter.unstructure(v) if attr.has(v) else v for k, v in self.__repr_args__()}, option=orjson.OPT_INDENT_2).decode()}'
+  def __str__(self) -> str: return self.__repr_str__(' ')
+  def __repr_name__(self) -> str: return self.__class__.__name__
+  def __repr_str__(self, join_str: str) -> str: return join_str.join(repr(v) if a is None else f'{a}={v!r}' for a, v in self.__repr_args__())
+  def __repr_args__(self) -> ReprArgs: return ((k, getattr(self, k)) for k in self.__repr_keys__)
diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py
index 0c3bd37a..4b303ee1 100644
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -6,19 +6,16 @@ from openllm_core._typing_compat import (
   AdapterMap,
   AdapterTuple,
   AdapterType,
-  DictStrAny,
   LiteralBackend,
   LiteralDtype,
   LiteralQuantise,
   LiteralSerialisation,
   M,
   T,
-  TupleAny,
 )
 from openllm_core.exceptions import MissingDependencyError
 from openllm_core.utils import (
   DEBUG,
-  ReprMixin,
   apply,
   check_bool_env,
   codegen,
@@ -49,7 +46,7 @@ ResolvedAdapterMap = t.Dict[AdapterType, t.Dict[str, t.Tuple['PeftConfig', str]]
 
 
 @attr.define(slots=True, repr=False, init=False)
-class LLM(t.Generic[M, T], ReprMixin):
+class LLM(t.Generic[M, T]):
   async def generate(
     self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
   ) -> GenerationOutput:
@@ -90,13 +87,12 @@ class LLM(t.Generic[M, T], ReprMixin):
         self.runner.init_local(quiet=True)
     config = self.config.model_construct_env(**attrs)
 
-    if stop_token_ids is None:
-      stop_token_ids = []
+    if stop_token_ids is None: stop_token_ids = []
     eos_token_id = attrs.get('eos_token_id', config['eos_token_id'])
     if eos_token_id is not None:
-      if not isinstance(eos_token_id, list):
-        eos_token_id = [eos_token_id]
+      if not isinstance(eos_token_id, list): eos_token_id = [eos_token_id]
       stop_token_ids.extend(eos_token_id)
+    if config['eos_token_id'] and config['eos_token_id'] not in stop_token_ids: stop_token_ids.append(config['eos_token_id'])
     if self.tokenizer.eos_token_id not in stop_token_ids:
       stop_token_ids.append(self.tokenizer.eos_token_id)
     if stop is None:
@@ -142,9 +138,9 @@ class LLM(t.Generic[M, T], ReprMixin):
   _revision: t.Optional[str]
   _quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, transformers.GPTQConfig, transformers.AwqConfig]]
   _quantise: t.Optional[LiteralQuantise]
-  _model_decls: TupleAny
-  __model_attrs: DictStrAny
-  __tokenizer_attrs: DictStrAny
+  _model_decls: t.Tuple[t.Any, ...]
+  __model_attrs: t.Dict[str, t.Any]
+  __tokenizer_attrs: t.Dict[str, t.Any]
   _tag: bentoml.Tag
   _adapter_map: t.Optional[AdapterMap]
   _serialisation: LiteralSerialisation
@@ -308,9 +304,8 @@ class LLM(t.Generic[M, T], ReprMixin):
       del self.__llm_model__, self.__llm_tokenizer__, self.__llm_adapter_map__
     except AttributeError:
       pass
-  @property
-  def __repr_keys__(self): return {'model_id', 'revision', 'backend', 'type'}
   def __repr_args__(self): yield from (('model_id', self._model_id if not self._local else self.tag.name), ('revision', self._revision if self._revision else self.tag.version), ('backend', self.__llm_backend__), ('type', self.llm_type))
+  def __repr__(self) -> str: return f'{self.__class__.__name__} {orjson.dumps({k: v for k, v in self.__repr_args__()}, option=orjson.OPT_INDENT_2).decode()}'
   @property
   def import_kwargs(self): return {'device_map': 'auto' if self._has_gpus else None, 'torch_dtype': self._torch_dtype}, {'padding_side': 'left', 'truncation_side': 'left'}
   @property
diff --git a/openllm-python/src/openllm/_llm.pyi b/openllm-python/src/openllm/_llm.pyi
index af88d171..d38632e6 100644
--- a/openllm-python/src/openllm/_llm.pyi
+++ b/openllm-python/src/openllm/_llm.pyi
@@ -1,4 +1,4 @@
-from typing import Any, AsyncGenerator, Dict, Generic, Iterable, List, Literal, Optional, Set, Tuple, TypedDict, Union
+from typing import Any, AsyncGenerator, Dict, Generic, Iterable, List, Literal, Optional, Tuple, TypedDict, Union
 
 import attr
 import torch
@@ -18,7 +18,6 @@ from openllm_core._typing_compat import (
   M,
   T,
 )
-from openllm_core.utils.representation import ReprArgs
 
 from ._quantisation import QuantizationConfig
 from ._runners import Runner
@@ -59,13 +58,7 @@ class LLM(Generic[M, T]):
   __llm_adapter_map__: Optional[ResolvedAdapterMap] = ...
   __llm_trust_remote_code__: bool = ...
 
-  @property
-  def __repr_keys__(self) -> Set[str]: ...
   def __repr__(self) -> str: ...
-  def __str__(self) -> str: ...
-  def __repr_name__(self) -> str: ...
-  def __repr_str__(self, join_str: str) -> str: ...
-  def __repr_args__(self) -> ReprArgs: ...
   def __init__(
     self,
     model_id: str,