diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py index aee7a6af..f9c7e299 100644 --- a/openllm-core/src/openllm_core/_configuration.py +++ b/openllm-core/src/openllm_core/_configuration.py @@ -17,7 +17,6 @@ from deepmerge.merger import Merger import openllm_core -from ._conversation import Conversation, SeparatorStyle from ._typing_compat import ( AdapterType, AnyCallable, @@ -398,14 +397,10 @@ class ModelSettings(t.TypedDict, total=False): fine_tune_strategies: t.Tuple[t.Dict[str, t.Any], ...] # Chat models related configuration - conversation: t.Optional[t.Dict[str, t.Any]] add_generation_prompt: bool -_transformed_type: DictStrAny = { - 'fine_tune_strategies': t.Dict[AdapterType, FineTuneConfig], - 'conversation': Conversation, -} +_transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, FineTuneConfig]} @attr.define( @@ -434,26 +429,27 @@ class _ModelSettingsAttr: # NOTE: The below are dynamically generated by the field_transformer if t.TYPE_CHECKING: + # fmt: off # update-config-stubs.py: attrs start - default_id: str - model_ids: ListStr - architecture: str - url: str - serialisation: LiteralSerialisation - trust_remote_code: bool - service_name: str - requirements: t.Optional[ListStr] - model_type: t.Literal['causal_lm', 'seq2seq_lm'] - name_type: t.Optional[t.Literal['dasherize', 'lowercase']] - backend: t.Tuple[LiteralBackend, ...] - model_name: str - start_name: str - timeout: int - workers_per_resource: t.Union[int, float] - fine_tune_strategies: t.Dict[AdapterType, FineTuneConfig] - conversation: Conversation - add_generation_prompt: bool + default_id:str + model_ids:ListStr + architecture:str + url:str + serialisation:LiteralSerialisation + trust_remote_code:bool + service_name:str + requirements:t.Optional[ListStr] + model_type:t.Literal['causal_lm', 'seq2seq_lm'] + name_type:t.Optional[t.Literal['dasherize', 'lowercase']] + backend:t.Tuple[LiteralBackend, ...] + model_name:str + start_name:str + timeout:int + workers_per_resource:t.Union[int, float] + fine_tune_strategies:t.Dict[AdapterType, FineTuneConfig] + add_generation_prompt:bool # update-config-stubs.py: attrs stop + # fmt: on _DEFAULT = _ModelSettingsAttr( @@ -468,7 +464,6 @@ _DEFAULT = _ModelSettingsAttr( model_type='causal_lm', trust_remote_code=False, requirements=None, - conversation=dict(system_message='', roles=('', ''), sep_style=SeparatorStyle.NO_COLON_SINGLE, sep=''), add_generation_prompt=False, timeout=int(36e6), service_name='', @@ -493,7 +488,6 @@ def structure_settings(cls: type[LLMConfig], _: type[_ModelSettingsAttr]) -> _Mo _attr.update( { 'service_name': f'generated_{model_name}_service.py', - 'conversation': Conversation(name=model_name, **_config.conversation), 'fine_tune_strategies': { ft_config.get('adapter_type', 'lora'): FineTuneConfig.from_config(ft_config, cls) for ft_config in _config.fine_tune_strategies # ft_config is a dict here before transformer @@ -617,24 +611,25 @@ class _ConfigAttr(t.Generic[_GenerationConfigT, _SamplingParamsT]): # considered to be public API. Users can also access these via self[key] # To update the docstring for these field, update it through tools/update-config-stubs.py + # fmt: off # update-config-stubs.py: special start - __openllm_default_id__: str = Field(None) - """Return the default model to use when using 'openllm start '. + __openllm_default_id__:str=Field(None) + '''Return the default model to use when using 'openllm start '. This could be one of the keys in 'self.model_ids' or custom users model. This field is required when defining under '__config__'. - """ - __openllm_model_ids__: ListStr = Field(None) - """A list of supported pretrained models tag for this given runnable. + ''' + __openllm_model_ids__:ListStr=Field(None) + '''A list of supported pretrained models tag for this given runnable. For example: For FLAN-T5 impl, this would be ["google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"] This field is required when defining under '__config__'. - """ - __openllm_architecture__: str = Field(None) - """The model architecture that is supported by this LLM. + ''' + __openllm_architecture__:str=Field(None) + '''The model architecture that is supported by this LLM. Note that any model weights within this architecture generation can always be run and supported by this LLM. @@ -643,33 +638,33 @@ class _ConfigAttr(t.Generic[_GenerationConfigT, _SamplingParamsT]): ```bash openllm start gpt-neox --model-id stabilityai/stablelm-tuned-alpha-3b - ```""" - __openllm_url__: str = Field(None) - """The resolved url for this LLMConfig.""" - __openllm_serialisation__: LiteralSerialisation = Field(None) - """Default serialisation format for different models. Some will default to use the legacy 'bin'. """ - __openllm_trust_remote_code__: bool = Field(None) - """Whether to always trust remote code""" - __openllm_service_name__: str = Field(None) + ```''' + __openllm_url__:str=Field(None) + '''The resolved url for this LLMConfig.''' + __openllm_serialisation__:LiteralSerialisation=Field(None) + '''Default serialisation format for different models. Some will default to use the legacy 'bin'. ''' + __openllm_trust_remote_code__:bool=Field(None) + '''Whether to always trust remote code''' + __openllm_service_name__:str=Field(None) '''Generated service name for this LLMConfig. By default, it is "generated_{model_name}_service.py"''' - __openllm_requirements__: t.Optional[ListStr] = Field(None) - """The default PyPI requirements needed to run this given LLM. By default, we will depend on bentoml, torch, transformers.""" - __openllm_model_type__: t.Literal['causal_lm', 'seq2seq_lm'] = Field(None) + __openllm_requirements__:t.Optional[ListStr]=Field(None) + '''The default PyPI requirements needed to run this given LLM. By default, we will depend on bentoml, torch, transformers.''' + __openllm_model_type__:t.Literal['causal_lm', 'seq2seq_lm']=Field(None) '''The model type for this given LLM. By default, it should be causal language modeling. Currently supported "causal_lm" or "seq2seq_lm"''' - __openllm_name_type__: t.Optional[t.Literal['dasherize', 'lowercase']] = Field(None) - """The default name typed for this model. "dasherize" will convert the name to lowercase and + __openllm_name_type__:t.Optional[t.Literal['dasherize', 'lowercase']]=Field(None) + '''The default name typed for this model. "dasherize" will convert the name to lowercase and replace spaces with dashes. "lowercase" will convert the name to lowercase. If this is not set, then both - `model_name` and `start_name` must be specified.""" - __openllm_backend__: t.Tuple[LiteralBackend, ...] = Field(None) - """List of supported backend for this given LLM class. Currently, we support "pt" and "vllm".""" - __openllm_model_name__: str = Field(None) - """The normalized version of __openllm_start_name__, determined by __openllm_name_type__""" - __openllm_start_name__: str = Field(None) - """Default name to be used with `openllm start`""" - __openllm_timeout__: int = Field(None) - """The default timeout to be set for this given LLM.""" - __openllm_workers_per_resource__: t.Union[int, float] = Field(None) - """The number of workers per resource. This is used to determine the number of workers to use for this model. + `model_name` and `start_name` must be specified.''' + __openllm_backend__:t.Tuple[LiteralBackend, ...]=Field(None) + '''List of supported backend for this given LLM class. Currently, we support "pt" and "vllm".''' + __openllm_model_name__:str=Field(None) + '''The normalized version of __openllm_start_name__, determined by __openllm_name_type__''' + __openllm_start_name__:str=Field(None) + '''Default name to be used with `openllm start`''' + __openllm_timeout__:int=Field(None) + '''The default timeout to be set for this given LLM.''' + __openllm_workers_per_resource__:t.Union[int, float]=Field(None) + '''The number of workers per resource. This is used to determine the number of workers to use for this model. For example, if this is set to 0.5, then OpenLLM will use 1 worker per 2 resources. If this is set to 1, then OpenLLM will use 1 worker per resource. If this is set to 2, then OpenLLM will use 2 workers per resource. @@ -677,14 +672,13 @@ class _ConfigAttr(t.Generic[_GenerationConfigT, _SamplingParamsT]): https://docs.bentoml.org/en/latest/guides/scheduling.html#resource-scheduling-strategy for more details. By default, it is set to 1. - """ - __openllm_fine_tune_strategies__: t.Dict[AdapterType, FineTuneConfig] = Field(None) - """The fine-tune strategies for this given LLM.""" - __openllm_conversation__: Conversation = Field(None) - """The conversation class for this given LLM to determine its chat templates.""" - __openllm_add_generation_prompt__: bool = Field(None) - """Whether to add generation prompt token for formatting chat templates. This arguments will be used for chat-based models.""" + ''' + __openllm_fine_tune_strategies__:t.Dict[AdapterType, FineTuneConfig]=Field(None) + '''The fine-tune strategies for this given LLM.''' + __openllm_add_generation_prompt__:bool=Field(None) + '''Whether to add generation prompt token for formatting chat templates. This arguments will be used for chat-based models.''' # update-config-stubs.py: special stop + # fmt: on class _ConfigBuilder: @@ -1096,167 +1090,165 @@ class LLMConfig(_ConfigAttr[t.Any, t.Any]): # update-config-stubs.py: start # NOTE: ModelSettings arguments @overload - def __getitem__(self, item: t.Literal['default_id']) -> str: ... + def __getitem__(self,item:t.Literal['default_id'])->str:... @overload - def __getitem__(self, item: t.Literal['model_ids']) -> ListStr: ... + def __getitem__(self,item:t.Literal['model_ids'])->ListStr:... @overload - def __getitem__(self, item: t.Literal['architecture']) -> str: ... + def __getitem__(self,item:t.Literal['architecture'])->str:... @overload - def __getitem__(self, item: t.Literal['url']) -> str: ... + def __getitem__(self,item:t.Literal['url'])->str:... @overload - def __getitem__(self, item: t.Literal['serialisation']) -> LiteralSerialisation: ... + def __getitem__(self,item:t.Literal['serialisation'])->LiteralSerialisation:... @overload - def __getitem__(self, item: t.Literal['trust_remote_code']) -> bool: ... + def __getitem__(self,item:t.Literal['trust_remote_code'])->bool:... @overload - def __getitem__(self, item: t.Literal['service_name']) -> str: ... + def __getitem__(self,item:t.Literal['service_name'])->str:... @overload - def __getitem__(self, item: t.Literal['requirements']) -> t.Optional[ListStr]: ... + def __getitem__(self,item:t.Literal['requirements'])->t.Optional[ListStr]:... @overload - def __getitem__(self, item: t.Literal['model_type']) -> t.Literal['causal_lm', 'seq2seq_lm']: ... + def __getitem__(self,item:t.Literal['model_type'])->t.Literal['causal_lm', 'seq2seq_lm']:... @overload - def __getitem__(self, item: t.Literal['name_type']) -> t.Optional[t.Literal['dasherize', 'lowercase']]: ... + def __getitem__(self,item:t.Literal['name_type'])->t.Optional[t.Literal['dasherize', 'lowercase']]:... @overload - def __getitem__(self, item: t.Literal['backend']) -> t.Tuple[LiteralBackend, ...]: ... + def __getitem__(self,item:t.Literal['backend'])->t.Tuple[LiteralBackend, ...]:... @overload - def __getitem__(self, item: t.Literal['model_name']) -> str: ... + def __getitem__(self,item:t.Literal['model_name'])->str:... @overload - def __getitem__(self, item: t.Literal['start_name']) -> str: ... + def __getitem__(self,item:t.Literal['start_name'])->str:... @overload - def __getitem__(self, item: t.Literal['timeout']) -> int: ... + def __getitem__(self,item:t.Literal['timeout'])->int:... @overload - def __getitem__(self, item: t.Literal['workers_per_resource']) -> t.Union[int, float]: ... + def __getitem__(self,item:t.Literal['workers_per_resource'])->t.Union[int, float]:... @overload - def __getitem__(self, item: t.Literal['fine_tune_strategies']) -> t.Dict[AdapterType, FineTuneConfig]: ... + def __getitem__(self,item:t.Literal['fine_tune_strategies'])->t.Dict[AdapterType, FineTuneConfig]:... @overload - def __getitem__(self, item: t.Literal['conversation']) -> Conversation: ... - @overload - def __getitem__(self, item: t.Literal['add_generation_prompt']) -> bool: ... + def __getitem__(self,item:t.Literal['add_generation_prompt'])->bool:... # NOTE: generation_class, sampling_class and extras arguments @overload - def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ... + def __getitem__(self,item:t.Literal['generation_class'])->t.Type[openllm_core.GenerationConfig]:... @overload - def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ... + def __getitem__(self,item:t.Literal['sampling_class'])->t.Type[openllm_core.SamplingParams]:... @overload - def __getitem__(self, item: t.Literal['extras']) -> t.Dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['extras'])->t.Dict[str, t.Any]:... # NOTE: GenerationConfig arguments @overload - def __getitem__(self, item: t.Literal['max_new_tokens']) -> int: ... + def __getitem__(self,item:t.Literal['max_new_tokens'])->int:... @overload - def __getitem__(self, item: t.Literal['min_length']) -> int: ... + def __getitem__(self,item:t.Literal['min_length'])->int:... @overload - def __getitem__(self, item: t.Literal['min_new_tokens']) -> int: ... + def __getitem__(self,item:t.Literal['min_new_tokens'])->int:... @overload - def __getitem__(self, item: t.Literal['early_stopping']) -> bool: ... + def __getitem__(self,item:t.Literal['early_stopping'])->bool:... @overload - def __getitem__(self, item: t.Literal['max_time']) -> float: ... + def __getitem__(self,item:t.Literal['max_time'])->float:... @overload - def __getitem__(self, item: t.Literal['num_beams']) -> int: ... + def __getitem__(self,item:t.Literal['num_beams'])->int:... @overload - def __getitem__(self, item: t.Literal['num_beam_groups']) -> int: ... + def __getitem__(self,item:t.Literal['num_beam_groups'])->int:... @overload - def __getitem__(self, item: t.Literal['penalty_alpha']) -> float: ... + def __getitem__(self,item:t.Literal['penalty_alpha'])->float:... @overload - def __getitem__(self, item: t.Literal['use_cache']) -> bool: ... + def __getitem__(self,item:t.Literal['use_cache'])->bool:... @overload - def __getitem__(self, item: t.Literal['temperature']) -> float: ... + def __getitem__(self,item:t.Literal['temperature'])->float:... @overload - def __getitem__(self, item: t.Literal['top_k']) -> int: ... + def __getitem__(self,item:t.Literal['top_k'])->int:... @overload - def __getitem__(self, item: t.Literal['top_p']) -> float: ... + def __getitem__(self,item:t.Literal['top_p'])->float:... @overload - def __getitem__(self, item: t.Literal['typical_p']) -> float: ... + def __getitem__(self,item:t.Literal['typical_p'])->float:... @overload - def __getitem__(self, item: t.Literal['epsilon_cutoff']) -> float: ... + def __getitem__(self,item:t.Literal['epsilon_cutoff'])->float:... @overload - def __getitem__(self, item: t.Literal['eta_cutoff']) -> float: ... + def __getitem__(self,item:t.Literal['eta_cutoff'])->float:... @overload - def __getitem__(self, item: t.Literal['diversity_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['diversity_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['repetition_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['repetition_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['encoder_repetition_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['encoder_repetition_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['length_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['length_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['no_repeat_ngram_size']) -> int: ... + def __getitem__(self,item:t.Literal['no_repeat_ngram_size'])->int:... @overload - def __getitem__(self, item: t.Literal['bad_words_ids']) -> t.List[t.List[int]]: ... + def __getitem__(self,item:t.Literal['bad_words_ids'])->t.List[t.List[int]]:... @overload - def __getitem__(self, item: t.Literal['force_words_ids']) -> t.Union[t.List[t.List[int]], t.List[t.List[t.List[int]]]]: ... + def __getitem__(self,item:t.Literal['force_words_ids'])->t.Union[t.List[t.List[int]], t.List[t.List[t.List[int]]]]:... @overload - def __getitem__(self, item: t.Literal['renormalize_logits']) -> bool: ... + def __getitem__(self,item:t.Literal['renormalize_logits'])->bool:... @overload - def __getitem__(self, item: t.Literal['forced_bos_token_id']) -> int: ... + def __getitem__(self,item:t.Literal['forced_bos_token_id'])->int:... @overload - def __getitem__(self, item: t.Literal['forced_eos_token_id']) -> t.Union[int, t.List[int]]: ... + def __getitem__(self,item:t.Literal['forced_eos_token_id'])->t.Union[int, t.List[int]]:... @overload - def __getitem__(self, item: t.Literal['remove_invalid_values']) -> bool: ... + def __getitem__(self,item:t.Literal['remove_invalid_values'])->bool:... @overload - def __getitem__(self, item: t.Literal['exponential_decay_length_penalty']) -> t.Tuple[int, float]: ... + def __getitem__(self,item:t.Literal['exponential_decay_length_penalty'])->t.Tuple[int, float]:... @overload - def __getitem__(self, item: t.Literal['suppress_tokens']) -> t.List[int]: ... + def __getitem__(self,item:t.Literal['suppress_tokens'])->t.List[int]:... @overload - def __getitem__(self, item: t.Literal['begin_suppress_tokens']) -> t.List[int]: ... + def __getitem__(self,item:t.Literal['begin_suppress_tokens'])->t.List[int]:... @overload - def __getitem__(self, item: t.Literal['forced_decoder_ids']) -> t.List[t.List[int]]: ... + def __getitem__(self,item:t.Literal['forced_decoder_ids'])->t.List[t.List[int]]:... @overload - def __getitem__(self, item: t.Literal['num_return_sequences']) -> int: ... + def __getitem__(self,item:t.Literal['num_return_sequences'])->int:... @overload - def __getitem__(self, item: t.Literal['output_attentions']) -> bool: ... + def __getitem__(self,item:t.Literal['output_attentions'])->bool:... @overload - def __getitem__(self, item: t.Literal['output_hidden_states']) -> bool: ... + def __getitem__(self,item:t.Literal['output_hidden_states'])->bool:... @overload - def __getitem__(self, item: t.Literal['output_scores']) -> bool: ... + def __getitem__(self,item:t.Literal['output_scores'])->bool:... @overload - def __getitem__(self, item: t.Literal['pad_token_id']) -> int: ... + def __getitem__(self,item:t.Literal['pad_token_id'])->int:... @overload - def __getitem__(self, item: t.Literal['bos_token_id']) -> int: ... + def __getitem__(self,item:t.Literal['bos_token_id'])->int:... @overload - def __getitem__(self, item: t.Literal['eos_token_id']) -> t.Union[int, t.List[int]]: ... + def __getitem__(self,item:t.Literal['eos_token_id'])->t.Union[int, t.List[int]]:... @overload - def __getitem__(self, item: t.Literal['encoder_no_repeat_ngram_size']) -> int: ... + def __getitem__(self,item:t.Literal['encoder_no_repeat_ngram_size'])->int:... @overload - def __getitem__(self, item: t.Literal['decoder_start_token_id']) -> int: ... + def __getitem__(self,item:t.Literal['decoder_start_token_id'])->int:... # NOTE: SamplingParams arguments @overload - def __getitem__(self, item: t.Literal['n']) -> int: ... + def __getitem__(self,item:t.Literal['n'])->int:... @overload - def __getitem__(self, item: t.Literal['best_of']) -> int: ... + def __getitem__(self,item:t.Literal['best_of'])->int:... @overload - def __getitem__(self, item: t.Literal['presence_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['presence_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['frequency_penalty']) -> float: ... + def __getitem__(self,item:t.Literal['frequency_penalty'])->float:... @overload - def __getitem__(self, item: t.Literal['use_beam_search']) -> bool: ... + def __getitem__(self,item:t.Literal['use_beam_search'])->bool:... @overload - def __getitem__(self, item: t.Literal['ignore_eos']) -> bool: ... + def __getitem__(self,item:t.Literal['ignore_eos'])->bool:... @overload - def __getitem__(self, item: t.Literal['logprobs']) -> int: ... + def __getitem__(self,item:t.Literal['logprobs'])->int:... @overload - def __getitem__(self, item: t.Literal['prompt_logprobs']) -> t.Optional[int]: ... + def __getitem__(self,item:t.Literal['prompt_logprobs'])->t.Optional[int]:... @overload - def __getitem__(self, item: t.Literal['skip_special_tokens']) -> bool: ... + def __getitem__(self,item:t.Literal['skip_special_tokens'])->bool:... # NOTE: PeftType arguments @overload - def __getitem__(self, item: t.Literal['prompt_tuning']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['prompt_tuning'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['multitask_prompt_tuning']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['multitask_prompt_tuning'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['p_tuning']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['p_tuning'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['prefix_tuning']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['prefix_tuning'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['lora']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['lora'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['adalora']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['adalora'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['adaption_prompt']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['adaption_prompt'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['ia3']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['ia3'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['loha']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['loha'])->t.Dict[str, t.Any]:... @overload - def __getitem__(self, item: t.Literal['lokr']) -> dict[str, t.Any]: ... + def __getitem__(self,item:t.Literal['lokr'])->t.Dict[str, t.Any]:... # update-config-stubs.py: stop # fmt: on @@ -1452,12 +1444,6 @@ class LLMConfig(_ConfigAttr[t.Any, t.Any]): key_to_remove.append(k) return self.model_construct_env(**llm_config_attrs), {k: v for k, v in attrs.items() if k not in key_to_remove} - def get_conversation_template(self) -> Conversation: - template = self['conversation'].copy() - if hasattr(self, 'default_system_message'): - template.set_system_message(self.default_system_message) - return template - def make_fine_tune_config(self, adapter_type: AdapterType, **attrs: t.Any) -> FineTuneConfig: return FineTuneConfig(adapter_type=adapter_type, llm_config_class=self.__class__).with_config(**attrs) diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py index 3073bfb9..6c9b460d 100644 --- a/openllm-core/src/openllm_core/config/configuration_auto.py +++ b/openllm-core/src/openllm_core/config/configuration_auto.py @@ -114,6 +114,47 @@ class AutoConfig: 'Cannot instantiate AutoConfig directly. Please use `AutoConfig.for_model(model_name)` instead.' ) + # fmt: off + # update-config-stubs.py: auto stubs start + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['chatglm'],**attrs:t.Any)->openllm_core.config.ChatGLMConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['dolly_v2'],**attrs:t.Any)->openllm_core.config.DollyV2Config:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['falcon'],**attrs:t.Any)->openllm_core.config.FalconConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['flan_t5'],**attrs:t.Any)->openllm_core.config.FlanT5Config:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['gpt_neox'],**attrs:t.Any)->openllm_core.config.GPTNeoXConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['llama'],**attrs:t.Any)->openllm_core.config.LlamaConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['mpt'],**attrs:t.Any)->openllm_core.config.MPTConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['opt'],**attrs:t.Any)->openllm_core.config.OPTConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['stablelm'],**attrs:t.Any)->openllm_core.config.StableLMConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['starcoder'],**attrs:t.Any)->openllm_core.config.StarCoderConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['mistral'],**attrs:t.Any)->openllm_core.config.MistralConfig:... + @t.overload + @classmethod + def for_model(cls,model_name:t.Literal['baichuan'],**attrs:t.Any)->openllm_core.config.BaichuanConfig:... + # update-config-stubs.py: auto stubs stop + # fmt: on + @classmethod def for_model(cls, model_name: str, **attrs: t.Any) -> openllm_core.LLMConfig: model_name = inflection.underscore(model_name) diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py index 4b6a6727..a8d04840 100644 --- a/openllm-core/src/openllm_core/config/configuration_baichuan.py +++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate START_BAICHUAN_COMMAND_DOCSTRING = """\ @@ -50,7 +49,6 @@ class BaichuanConfig(openllm_core.LLMConfig): # https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/19ef51ba5bad8935b03acd20ff04a269210983bc/modeling_baichuan.py#L555 # https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/main/generation_config.json # https://github.com/baichuan-inc/Baichuan-13B/issues/25 - 'conversation': dict(roles=('', ''), sep_style=SeparatorStyle.NO_COLON_SINGLE, sep=''), 'default_id': 'baichuan-inc/baichuan-7b', 'model_ids': [ 'baichuan-inc/baichuan-7b', diff --git a/openllm-core/src/openllm_core/config/configuration_chatglm.py b/openllm-core/src/openllm_core/config/configuration_chatglm.py index 967da5ae..f3dbaa08 100644 --- a/openllm-core/src/openllm_core/config/configuration_chatglm.py +++ b/openllm-core/src/openllm_core/config/configuration_chatglm.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.utils import dantic if t.TYPE_CHECKING: @@ -51,7 +50,6 @@ class ChatGLMConfig(openllm_core.LLMConfig): 'timeout': 3600000, 'backend': ('pt',), 'url': 'https://github.com/THUDM/ChatGLM-6B', - 'conversation': dict(roles=('问', '答'), sep_style=SeparatorStyle.CHATGLM, sep='\n'), 'requirements': ['cpm-kernels', 'sentencepiece'], 'architecture': 'ChatGLMModel', 'default_id': 'thudm/chatglm-6b', diff --git a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py index 1848b4b7..e0d771cf 100644 --- a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py +++ b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt from openllm_core.utils import dantic @@ -84,13 +83,6 @@ class DollyV2Config(openllm_core.LLMConfig): 'url': 'https://github.com/databrickslabs/dolly', 'architecture': 'GPTNeoXForCausalLM', 'default_id': 'databricks/dolly-v2-3b', - 'conversation': dict( - system_message='Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n', - roles=('### Instruction', '### Response'), - sep_style=SeparatorStyle.DOLLY, - sep='\n\n', - sep2='### End', - ), 'model_ids': ['databricks/dolly-v2-3b', 'databricks/dolly-v2-7b', 'databricks/dolly-v2-12b'], } return_full_text: bool = dantic.Field(False, description='Whether to return the full prompt to the users.') diff --git a/openllm-core/src/openllm_core/config/configuration_falcon.py b/openllm-core/src/openllm_core/config/configuration_falcon.py index c5bfb46e..4eedda13 100644 --- a/openllm-core/src/openllm_core/config/configuration_falcon.py +++ b/openllm-core/src/openllm_core/config/configuration_falcon.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt START_FALCON_COMMAND_DOCSTRING = """\ @@ -48,9 +47,6 @@ class FalconConfig(openllm_core.LLMConfig): 'requirements': ['einops', 'xformers'], 'architecture': 'FalconForCausalLM', # NOTE: See https://huggingface.co/tiiuae/falcon-7b-instruct/discussions/1 - 'conversation': dict( - roles=('User', 'Assistant'), messages=[], sep_style=SeparatorStyle.ADD_COLON_SINGLE, sep='\n' - ), # No space after colon 'default_id': 'tiiuae/falcon-7b', 'model_ids': ['tiiuae/falcon-7b', 'tiiuae/falcon-40b', 'tiiuae/falcon-7b-instruct', 'tiiuae/falcon-40b-instruct'], 'fine_tune_strategies': ( diff --git a/openllm-core/src/openllm_core/config/configuration_flan_t5.py b/openllm-core/src/openllm_core/config/configuration_flan_t5.py index 97632d38..dfbe6191 100644 --- a/openllm-core/src/openllm_core/config/configuration_flan_t5.py +++ b/openllm-core/src/openllm_core/config/configuration_flan_t5.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt START_FLAN_T5_COMMAND_DOCSTRING = """\ @@ -41,9 +40,6 @@ class FlanT5Config(openllm_core.LLMConfig): 'model_type': 'seq2seq_lm', 'backend': ('pt',), # NOTE: See https://www.philschmid.de/fine-tune-flan-t5. No specific template found, but seems to have the same dialogue style - 'conversation': dict( - system_message='', roles=('User', 'Assistant'), sep_style=SeparatorStyle.ADD_COLON_SINGLE, sep='\n' - ), 'default_id': 'google/flan-t5-large', 'model_ids': [ 'google/flan-t5-small', diff --git a/openllm-core/src/openllm_core/config/configuration_gpt_neox.py b/openllm-core/src/openllm_core/config/configuration_gpt_neox.py index fd8f77a4..4eb5b27e 100644 --- a/openllm-core/src/openllm_core/config/configuration_gpt_neox.py +++ b/openllm-core/src/openllm_core/config/configuration_gpt_neox.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt from openllm_core.utils import dantic @@ -49,9 +48,6 @@ class GPTNeoXConfig(openllm_core.LLMConfig): 'start_name': 'gpt-neox', 'architecture': 'GPTNeoXForCausalLM', # NOTE: See https://huggingface.co/togethercomputer/GPT-NeoXT-Chat-Base-20B - 'conversation': dict( - system_message='', roles=('', ''), sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE, sep='\n' - ), 'url': 'https://github.com/EleutherAI/gpt-neox', 'default_id': 'eleutherai/gpt-neox-20b', 'model_ids': ['eleutherai/gpt-neox-20b'], diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py index 22c2bd54..46dde799 100644 --- a/openllm-core/src/openllm_core/config/configuration_llama.py +++ b/openllm-core/src/openllm_core/config/configuration_llama.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate START_LLAMA_COMMAND_DOCSTRING = """\ @@ -84,14 +83,6 @@ class LlamaConfig(openllm_core.LLMConfig): 'requirements': ['fairscale', 'sentencepiece', 'scipy'], 'default_id': 'NousResearch/llama-2-7b-hf', 'serialisation': 'safetensors', - # NOTE: see https://huggingface.co/blog/codellama#conversational-instructions - 'conversation': dict( - system_template='[INST] <>\n{system_message}\n<>\n\n', - roles=('[INST]', '[/INST]'), - sep_style=SeparatorStyle.LLAMA, - sep=' ', - sep2=' ', - ), 'model_ids': [ 'meta-llama/Llama-2-70b-chat-hf', 'meta-llama/Llama-2-13b-chat-hf', diff --git a/openllm-core/src/openllm_core/config/configuration_mpt.py b/openllm-core/src/openllm_core/config/configuration_mpt.py index eebb0056..b3f4445a 100644 --- a/openllm-core/src/openllm_core/config/configuration_mpt.py +++ b/openllm-core/src/openllm_core/config/configuration_mpt.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt from openllm_core.utils import dantic @@ -83,7 +82,6 @@ class MPTConfig(openllm_core.LLMConfig): 'requirements': ['triton', 'einops'], 'architecture': 'MPTForCausalLM', # NOTE: See https://huggingface.co/TheBloke/mpt-30B-chat-GGML/discussions/4 - 'conversation': dict(roles=('user', 'assistant'), messages=[], sep_style=SeparatorStyle.MPT, sep='\n'), 'default_id': 'mosaicml/mpt-7b-instruct', 'model_ids': [ 'mosaicml/mpt-7b', diff --git a/openllm-core/src/openllm_core/config/configuration_opt.py b/openllm-core/src/openllm_core/config/configuration_opt.py index e5a0eb08..94c20fc7 100644 --- a/openllm-core/src/openllm_core/config/configuration_opt.py +++ b/openllm-core/src/openllm_core/config/configuration_opt.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import process_prompt if t.TYPE_CHECKING: @@ -50,9 +49,6 @@ class OPTConfig(openllm_core.LLMConfig): 'url': 'https://huggingface.co/docs/transformers/model_doc/opt', 'default_id': 'facebook/opt-1.3b', 'architecture': 'OPTForCausalLM', - 'conversation': dict( - roles=('User', 'Assistant'), messages=[], sep_style=SeparatorStyle.ADD_COLON_SINGLE, sep='\n' - ), 'model_ids': [ 'facebook/opt-125m', 'facebook/opt-350m', diff --git a/openllm-core/src/openllm_core/config/configuration_stablelm.py b/openllm-core/src/openllm_core/config/configuration_stablelm.py index ca696d6b..1a3afe44 100644 --- a/openllm-core/src/openllm_core/config/configuration_stablelm.py +++ b/openllm-core/src/openllm_core/config/configuration_stablelm.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle from openllm_core.prompts import PromptTemplate, process_prompt START_STABLELM_COMMAND_DOCSTRING = """\ @@ -51,19 +50,6 @@ class StableLMConfig(openllm_core.LLMConfig): __config__ = { 'name_type': 'lowercase', 'url': 'https://github.com/Stability-AI/StableLM', - 'conversation': dict( - system_template='<|SYSTEM|>{system_message}', - system_message="""# StableLM Tuned (Alpha version) -- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI. -- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. -- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes. -- StableLM will refuse to participate in anything that could harm a human. -""", - roles=('<|USER|>', '<|ASSISTANT|>'), - sep_style=SeparatorStyle.NO_COLON_SINGLE, - sep='', - stop_token_ids=[50278, 50279, 50277, 1, 0], - ), 'architecture': 'GPTNeoXForCausalLM', 'default_id': 'stabilityai/stablelm-tuned-alpha-3b', 'model_ids': [ diff --git a/openllm-core/src/openllm_core/config/configuration_starcoder.py b/openllm-core/src/openllm_core/config/configuration_starcoder.py index 633299ac..8eaa5bea 100644 --- a/openllm-core/src/openllm_core/config/configuration_starcoder.py +++ b/openllm-core/src/openllm_core/config/configuration_starcoder.py @@ -2,7 +2,6 @@ from __future__ import annotations import typing as t import openllm_core -from openllm_core._conversation import SeparatorStyle if t.TYPE_CHECKING: from openllm_core.prompts import PromptTemplate @@ -51,9 +50,6 @@ class StarCoderConfig(openllm_core.LLMConfig): 'name_type': 'lowercase', 'url': 'https://github.com/bigcode-project/starcoder', 'architecture': 'GPTBigCodeForCausalLM', - 'conversation': dict( - system_message='', roles=('<|user|>', '<|assistant|>'), sep_style=SeparatorStyle.STARCODER, sep='\n' - ), 'requirements': ['bitsandbytes'], 'default_id': 'bigcode/starcoder', 'model_ids': ['bigcode/starcoder', 'bigcode/starcoderbase'], diff --git a/openllm-python/src/openllm/protocol/openai.py b/openllm-python/src/openllm/protocol/openai.py index c9ea216c..740f69da 100644 --- a/openllm-python/src/openllm/protocol/openai.py +++ b/openllm-python/src/openllm/protocol/openai.py @@ -173,20 +173,3 @@ class ModelCard: class ModelList: object: str = 'list' data: t.List[ModelCard] = attr.field(factory=list) - - -async def get_conversation_prompt(request: ChatCompletionRequest, llm_config: openllm_core.LLMConfig) -> str: - conv = llm_config.get_conversation_template() - for message in request.messages: - msg_role = message['role'] - if msg_role == 'system': - conv.set_system_message(message['content']) - elif msg_role == 'user': - conv.append_message(conv.roles[0], message['content']) - elif msg_role == 'assistant': - conv.append_message(conv.roles[1], message['content']) - else: - raise ValueError(f'Unknown role: {msg_role}') - # Add a blank message for the assistant. - conv.append_message(conv.roles[1], '') - return conv.get_prompt() diff --git a/ruff.toml b/ruff.toml index 69126131..88a9883b 100644 --- a/ruff.toml +++ b/ruff.toml @@ -120,4 +120,4 @@ docstring-quotes = "double" "openllm-python/tests/**/*" = ["S101", "TID252", "PT011", "S307"] "openllm-python/src/openllm/_llm.py" = ["F811"] "openllm-core/src/openllm_core/utils/import_utils.py" = ["PLW0603", "F811"] -"openllm-core/src/openllm_core/_configuration.py" = ["F811"] +"openllm-core/src/openllm_core/_configuration.py" = ["F811", "Q001"] diff --git a/tools/update-config-stubs.py b/tools/update-config-stubs.py index 1d9c5259..9340b83e 100755 --- a/tools/update-config-stubs.py +++ b/tools/update-config-stubs.py @@ -2,7 +2,6 @@ from __future__ import annotations import os import sys - from pathlib import Path # currently we are assuming the indentatio level is 2 for comments @@ -12,16 +11,19 @@ START_SPECIAL_COMMENT = f'# {os.path.basename(__file__)}: special start\n' END_SPECIAL_COMMENT = f'# {os.path.basename(__file__)}: special stop\n' START_ATTRS_COMMENT = f'# {os.path.basename(__file__)}: attrs start\n' END_ATTRS_COMMENT = f'# {os.path.basename(__file__)}: attrs stop\n' +# Stubs for auto class +START_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs start\n' +END_AUTO_STUBS_COMMENT = f'# {os.path.basename(__file__)}: auto stubs stop\n' ROOT = Path(__file__).parent.parent _TARGET_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / '_configuration.py' +_TARGET_AUTO_FILE = ROOT / 'openllm-core' / 'src' / 'openllm_core' / 'config' / 'configuration_auto.py' sys.path.insert(0, (ROOT / 'openllm-core' / 'src').__fspath__()) -from openllm_core._configuration import GenerationConfig -from openllm_core._configuration import ModelSettings -from openllm_core._configuration import PeftType -from openllm_core._configuration import SamplingParams +from openllm_core._configuration import GenerationConfig, ModelSettings, SamplingParams +from openllm_core.config.configuration_auto import CONFIG_MAPPING_NAMES from openllm_core.utils import codegen +from openllm_core.utils.peft import PeftType def process_annotations(annotations: str) -> str: @@ -57,7 +59,6 @@ _value_docstring = { ```bash openllm start gpt-neox --model-id stabilityai/stablelm-tuned-alpha-3b ```""", - 'conversation': """The conversation class for this given LLM to determine its chat templates.""", 'add_generation_prompt': """Whether to add generation prompt token for formatting chat templates. This arguments will be used for chat-based models.""", 'backend': """List of supported backend for this given LLM class. Currently, we support "pt" and "vllm".""", 'serialisation': """Default serialisation format for different models. Some will default to use the legacy 'bin'. """, @@ -84,7 +85,7 @@ _value_docstring = { 'fine_tune_strategies': 'The fine-tune strategies for this given LLM.', } -_transformed = {'fine_tune_strategies': 't.Dict[AdapterType, FineTuneConfig]', 'conversation': 'Conversation'} +_transformed = {'fine_tune_strategies': 't.Dict[AdapterType, FineTuneConfig]'} def main() -> int: @@ -105,7 +106,7 @@ def main() -> int: special_attrs_lines: list[str] = [] for keys, ForwardRef in codegen.get_annotations(ModelSettings).items(): special_attrs_lines.append( - f"{' ' * 4}{keys}: {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}\n" + f"{' ' * 4}{keys}:{_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}\n" ) # NOTE: inline stubs for _ConfigAttr type stubs config_attr_lines: list[str] = [] @@ -114,7 +115,7 @@ def main() -> int: [ ' ' * 4 + line for line in [ - f'__openllm_{keys}__: {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))} = Field(None)\n', + f'__openllm_{keys}__:{_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}=Field(None)\n', f"'''{_value_docstring[keys]}'''\n", ] ] @@ -128,7 +129,7 @@ def main() -> int: ' ' * 2 + line for line in [ '@overload\n', - f"def __getitem__(self, item: t.Literal['{keys}']) -> {_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}: ...\n", + f"def __getitem__(self,item:t.Literal['{keys}'])->{_transformed.get(keys, process_annotations(ForwardRef.__forward_arg__))}:...\n", ] ] ) @@ -139,11 +140,11 @@ def main() -> int: ' ' * 2 + line for line in [ '@overload\n', - "def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ...\n", + "def __getitem__(self,item:t.Literal['generation_class'])->t.Type[openllm_core.GenerationConfig]:...\n", '@overload\n', - "def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ...\n", + "def __getitem__(self,item:t.Literal['sampling_class'])->t.Type[openllm_core.SamplingParams]:...\n", '@overload\n', - "def __getitem__(self, item: t.Literal['extras']) -> t.Dict[str, t.Any]: ...\n", + "def __getitem__(self,item:t.Literal['extras'])->t.Dict[str, t.Any]:...\n", ] ] ) @@ -153,7 +154,7 @@ def main() -> int: lines.extend( [ ' ' * 2 + line - for line in ['@overload\n', f"def __getitem__(self, item: t.Literal['{keys}']) -> {type_pep563}: ...\n"] + for line in ['@overload\n', f"def __getitem__(self,item:t.Literal['{keys}'])->{type_pep563}:...\n"] ] ) lines.append(' ' * 2 + '# NOTE: SamplingParams arguments\n') @@ -162,7 +163,7 @@ def main() -> int: lines.extend( [ ' ' * 2 + line - for line in ['@overload\n', f"def __getitem__(self, item: t.Literal['{keys}']) -> {type_pep563}: ...\n"] + for line in ['@overload\n', f"def __getitem__(self,item:t.Literal['{keys}'])->{type_pep563}:...\n"] ] ) lines.append(' ' * 2 + '# NOTE: PeftType arguments\n') @@ -172,7 +173,7 @@ def main() -> int: ' ' * 2 + line for line in [ '@overload\n', - f"def __getitem__(self, item: t.Literal['{keys.lower()}']) -> dict[str, t.Any]: ...\n", + f"def __getitem__(self,item:t.Literal['{keys.lower()}'])->t.Dict[str, t.Any]:...\n", ] ] ) @@ -188,6 +189,33 @@ def main() -> int: ) with _TARGET_FILE.open('w') as f: f.writelines(processed) + + with _TARGET_AUTO_FILE.open('r') as f: + processed = f.readlines() + + start_auto_stubs_idx, end_auto_stubs_idx = ( + processed.index(' ' * 2 + START_AUTO_STUBS_COMMENT), + processed.index(' ' * 2 + END_AUTO_STUBS_COMMENT), + ) + lines = [] + for model, class_name in CONFIG_MAPPING_NAMES.items(): + lines.extend( + [ + ' ' * 2 + line + for line in [ + '@t.overload\n', + '@classmethod\n', + f"def for_model(cls,model_name:t.Literal['{model}'],**attrs:t.Any)->openllm_core.config.{class_name}:...\n", + ] + ] + ) + processed = ( + processed[:start_auto_stubs_idx] + + [' ' * 2 + START_AUTO_STUBS_COMMENT, *lines, ' ' * 2 + END_AUTO_STUBS_COMMENT] + + processed[end_auto_stubs_idx + 1 :] + ) + with _TARGET_AUTO_FILE.open('w') as f: + f.writelines(processed) return 0